From 79358c74026c5ec6c72fb210017fa34f3e3d1960 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Wed, 20 Nov 2024 10:04:36 +0100 Subject: [PATCH 1/6] Add simple nbody seq and omp The blocksize is hardcoded to 2048 for now. --- src/CMakeLists.txt | 1 + src/nbody/CMakeLists.txt | 22 ++ src/nbody/README.md | 23 ++ src/nbody/scripts/granularity-tests.sh | 89 ++++++ src/nbody/scripts/run-tests.sh | 141 ++++++++++ src/nbody/src/blocking/common/common_utils.c | 53 ++++ src/nbody/src/blocking/common/nbody.h | 59 ++++ src/nbody/src/blocking/mpi/main.c | 66 +++++ src/nbody/src/blocking/mpi/nbody.h | 32 +++ src/nbody/src/blocking/mpi/solver_mpi.c | 202 ++++++++++++++ src/nbody/src/blocking/mpi/solver_mpi_ompss.c | 221 +++++++++++++++ .../mpi/solver_mpi_ompss_interop_async.c | 221 +++++++++++++++ src/nbody/src/blocking/mpi/utils.c | 203 ++++++++++++++ src/nbody/src/blocking/smp/main.c | 40 +++ src/nbody/src/blocking/smp/nbody.h | 28 ++ src/nbody/src/blocking/smp/solver_ompss.c | 157 +++++++++++ src/nbody/src/blocking/smp/solver_seq.c | 147 ++++++++++ src/nbody/src/blocking/smp/utils.c | 181 ++++++++++++ src/nbody/src/common/common.c | 132 +++++++++ src/nbody/src/common/common.h | 74 +++++ src/nbody/src/plain/main.c | 38 +++ src/nbody/src/plain/nbody.h | 73 +++++ src/nbody/src/plain/solver_omp.c | 114 ++++++++ src/nbody/src/plain/solver_seq.c | 108 ++++++++ src/nbody/src/plain/utils.c | 261 ++++++++++++++++++ 25 files changed, 2686 insertions(+) create mode 100644 src/nbody/CMakeLists.txt create mode 100644 src/nbody/README.md create mode 100755 src/nbody/scripts/granularity-tests.sh create mode 100755 src/nbody/scripts/run-tests.sh create mode 100644 src/nbody/src/blocking/common/common_utils.c create mode 100644 src/nbody/src/blocking/common/nbody.h create mode 100644 src/nbody/src/blocking/mpi/main.c create mode 100644 src/nbody/src/blocking/mpi/nbody.h create mode 100644 src/nbody/src/blocking/mpi/solver_mpi.c create mode 100644 src/nbody/src/blocking/mpi/solver_mpi_ompss.c create mode 100644 src/nbody/src/blocking/mpi/solver_mpi_ompss_interop_async.c create mode 100644 src/nbody/src/blocking/mpi/utils.c create mode 100644 src/nbody/src/blocking/smp/main.c create mode 100644 src/nbody/src/blocking/smp/nbody.h create mode 100644 src/nbody/src/blocking/smp/solver_ompss.c create mode 100644 src/nbody/src/blocking/smp/solver_seq.c create mode 100644 src/nbody/src/blocking/smp/utils.c create mode 100644 src/nbody/src/common/common.c create mode 100644 src/nbody/src/common/common.h create mode 100644 src/nbody/src/plain/main.c create mode 100644 src/nbody/src/plain/nbody.h create mode 100644 src/nbody/src/plain/solver_omp.c create mode 100644 src/nbody/src/plain/solver_seq.c create mode 100644 src/nbody/src/plain/utils.c diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2bef695..b11cb53 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(bench6) add_subdirectory(ompss2) add_subdirectory(heat) +add_subdirectory(nbody) diff --git a/src/nbody/CMakeLists.txt b/src/nbody/CMakeLists.txt new file mode 100644 index 0000000..8f2be92 --- /dev/null +++ b/src/nbody/CMakeLists.txt @@ -0,0 +1,22 @@ +include_directories(src) + +set(PLAIN_SOURCES + src/common/common.c + src/plain/utils.c + src/plain/main.c) + +set(SMP_SOURCES + src/common/common.c + src/blocking/common/common_utils.c + src/blocking/smp/utils.c + src/blocking/smp/main.c) + +mk_bench(b6_nbody_seq_plain) +target_sources(b6_nbody_seq_plain PRIVATE ${PLAIN_SOURCES} src/plain/solver_seq.c) + +find_package(OpenMP) +if (OPENMP_FOUND) + mk_bench(b6_nbody_omp_plain) + target_sources(b6_nbody_omp_plain PRIVATE ${PLAIN_SOURCES} src/plain/solver_omp.c) + target_link_libraries(b6_nbody_omp_plain PRIVATE OpenMP::OpenMP_C) +endif() diff --git a/src/nbody/README.md b/src/nbody/README.md new file mode 100644 index 0000000..ed1b574 --- /dev/null +++ b/src/nbody/README.md @@ -0,0 +1,23 @@ +# N-body + +## Introduction +An N-body simulation numerically approximates the evolution of a system of +bodies in which each body continuously interacts with every other body. A +familiar example is an astrophysical simulation in which each body represents a +galaxy or an individual star, and the bodies attract each other through the +gravitational force. + +N-body simulation arises in many other computational science problems as well. +For example, protein folding is studied using N-body simulation to calculate +electrostatic and van der Waals forces. Turbulent fluid flow simulation and +global illumination computation in computer graphics are other examples of +problems that use N-body simulation. + +## Available versions and building instructions + +TODO + +## Execution instructions + +TODO + diff --git a/src/nbody/scripts/granularity-tests.sh b/src/nbody/scripts/granularity-tests.sh new file mode 100755 index 0000000..7e2a068 --- /dev/null +++ b/src/nbody/scripts/granularity-tests.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +# Check if Makefile is in the default folder +cd ${0%/*} # Never assume the directory and always change it to where the script is located + +makefile_path=".." +if [ ! -f "${makefile_path}/Makefile" ]; then + read -p 'Makefile not found by default, please specify in which folder it is found (relative path): ' makefile_path +fi + +function usage { + echo "Usage: ./granularity-tests.sh [-bs blocksize] [-bo BIGO] [OPTIONAL_PARAMETERS]\n" + echo "Parameters:\n" + echo " -p PARTICLES: use PARTICLES as the total number of particles\n" + echo " -t TIMESTEPS: use TIMESTEPS as the number of timesteps\n\n" + echo "Optional parameters:\n" + echo " -bo BO - use BO as the BIGO (defaults to N2)\n" + echo " -bs BS - use BS as blocksize (defaults to 2048)\n" + echo " -f, - forcefully generate particles by avoiding creating files\n" + echo " -c, - check the correctness of the result (disabled by default)\n" + echo " -C, - do not check the correctness of the result\n" + echo " -o, - save the computed particles to the default output file (disabled by default)\n" + echo " -O, - do not save the computed particles to the default output file\n" + echo " -h, - display this help and exit\n\n" + exit 1 +} + + +##################### +# COLLECT ARGUMENTS # +##################### + +argc=$# +argv=($@) + +for (( j=0; j $logfile + error=$? + elif [[ $progname == *"_mpi_ompss"* ]]; then + mpiexec.hydra -n $nprocs -bind-to hwthread:$nthreadsxproc $prog -p $size -t $timesteps -c &> $logfile + error=$? + else + $prog -p $size -t $timesteps -c &> $logfile + error=$? + fi + + # Check the return value of the program + if [ "$error" -ne 0 ]; then + failed=$(($failed + 1)) + echo $progname FAILED + rm -f $logfile + continue; + fi + + # Result checking + grep -Fxq 'Result validation: OK' $logfile + error=$? + if [ "$error" -eq 0 ]; then + echo $progname PASSED + else + failed=$(($failed + 1)) + echo $progname FAILED + fi + rm -f $logfile +done + +echo --------------------------------- +if [ $failed -eq 0 ]; then + echo SUMMARY: ALL TESTS PASSED +else + echo SUMMARY: ${failed}/${total} TESTS FAILED +fi +echo --------------------------------- + +rm -f $logfile diff --git a/src/nbody/src/blocking/common/common_utils.c b/src/nbody/src/blocking/common/common_utils.c new file mode 100644 index 0000000..9253796 --- /dev/null +++ b/src/nbody/src/blocking/common/common_utils.c @@ -0,0 +1,53 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "nbody.h" + +#include +#include +#include +#include +#include +#include +#include + + +void nbody_particle_init(const nbody_conf_t *conf, particles_block_t *part) +{ + for (int i = 0; i < BLOCK_SIZE; i++){ + part->position_x[i] = conf->domain_size_x * ((float)random() / ((float)RAND_MAX + 1.0)); + part->position_y[i] = conf->domain_size_y * ((float)random() / ((float)RAND_MAX + 1.0)); + part->position_z[i] = conf->domain_size_z * ((float)random() / ((float)RAND_MAX + 1.0)); + part->mass[i] = conf->mass_maximum * ((float)random() / ((float)RAND_MAX + 1.0)); + part->weight[i] = gravitational_constant * part->mass[i]; + } +} + +int nbody_compare_particles(const particles_block_t *local, const particles_block_t *reference, int num_blocks) +{ + double error = 0.0; + int count = 0; + for (int i = 0; i < num_blocks; i++) { + for (int e = 0; e < BLOCK_SIZE; e++) { + if ((local[i].position_x[e] != reference[i].position_x[e]) || + (local[i].position_y[e] != reference[i].position_y[e]) || + (local[i].position_z[e] != reference[i].position_z[e])) { + error += fabs(((local[i].position_x[e] - reference[i].position_x[e])*100.0) / reference[i].position_x[e]) + + fabs(((local[i].position_y[e] - reference[i].position_y[e])*100.0) / reference[i].position_y[e]) + + fabs(((local[i].position_z[e] - reference[i].position_z[e])*100.0) / reference[i].position_z[e]); + count++; + } + } + } + + double relative_error = (count != 0) ? error / (3.0 * count) : 0.0; + if ((count * 100.0) / (num_blocks * BLOCK_SIZE) > 0.6 || relative_error > TOLERATED_ERROR) { + return 0; + } + return 1; +} + diff --git a/src/nbody/src/blocking/common/nbody.h b/src/nbody/src/blocking/common/nbody.h new file mode 100644 index 0000000..b6c8d13 --- /dev/null +++ b/src/nbody/src/blocking/common/nbody.h @@ -0,0 +1,59 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#ifndef NBODY_H +#define NBODY_H + +#include "common/common.h" + +#include +#include +#include + +// Block size definition +#ifndef BLOCK_SIZE +#define BLOCK_SIZE 2048 +#endif + +#define MIN_PARTICLES (4096 * BLOCK_SIZE / sizeof(particles_block_t)) + +// Solver structures +typedef struct { + float position_x[BLOCK_SIZE]; /* m */ + float position_y[BLOCK_SIZE]; /* m */ + float position_z[BLOCK_SIZE]; /* m */ + float velocity_x[BLOCK_SIZE]; /* m/s */ + float velocity_y[BLOCK_SIZE]; /* m/s */ + float velocity_z[BLOCK_SIZE]; /* m/s */ + float mass[BLOCK_SIZE]; /* kg */ + float weight[BLOCK_SIZE]; +} particles_block_t; + +typedef struct { + float x[BLOCK_SIZE]; /* x */ + float y[BLOCK_SIZE]; /* y */ + float z[BLOCK_SIZE]; /* z */ +} forces_block_t; + +// Forward declaration +typedef struct nbody_file_t nbody_file_t; +typedef struct nbody_t nbody_t; + +// Solver function +void nbody_solve(nbody_t *nbody, const int num_blocks, const int timesteps, const float time_interval); + +// Auxiliary functions +nbody_t nbody_setup(const nbody_conf_t *conf); +void nbody_particle_init(const nbody_conf_t *conf, particles_block_t *part); +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time); +void nbody_save_particles(const nbody_t *nbody); +void nbody_free(nbody_t *nbody); +void nbody_check(const nbody_t *nbody); +int nbody_compare_particles(const particles_block_t *local, const particles_block_t *reference, int num_blocks); + +#endif // NBODY_H + diff --git a/src/nbody/src/blocking/mpi/main.c b/src/nbody/src/blocking/mpi/main.c new file mode 100644 index 0000000..59c9094 --- /dev/null +++ b/src/nbody/src/blocking/mpi/main.c @@ -0,0 +1,66 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/mpi/nbody.h" + +#include +#include +#include +#include +#include + +#include + +#ifdef INTEROPERABILITY +#include +#endif + +#ifdef INTEROPERABILITY +#define DESIRED_THREAD_LEVEL MPI_TASK_MULTIPLE +#else +#define DESIRED_THREAD_LEVEL MPI_THREAD_MULTIPLE +#endif + +int main(int argc, char** argv) +{ + int provided; + MPI_Init_thread(&argc, &argv, DESIRED_THREAD_LEVEL, &provided); + assert(provided == DESIRED_THREAD_LEVEL); + + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + assert(rank_size > 0); + + nbody_conf_t conf = nbody_get_conf(argc, argv); + assert(conf.num_particles > 0); + assert(conf.timesteps > 0); + + int total_particles = ROUNDUP(conf.num_particles, MIN_PARTICLES); + int my_particles = total_particles / rank_size; + assert(my_particles >= BLOCK_SIZE); + conf.num_particles = my_particles; + + conf.num_blocks = my_particles / BLOCK_SIZE; + assert(conf.num_blocks > 0); + + nbody_t nbody = nbody_setup(&conf); + MPI_Barrier(MPI_COMM_WORLD); + + double start = get_time(); + nbody_solve(&nbody, conf.num_blocks, conf.timesteps, conf.time_interval); + double end = get_time(); + + nbody_stats(&nbody, &conf, end - start); + + if (conf.save_result) nbody_save_particles(&nbody); + if (conf.check_result) nbody_check(&nbody); + nbody_free(&nbody); + + MPI_Finalize(); + return 0; +} diff --git a/src/nbody/src/blocking/mpi/nbody.h b/src/nbody/src/blocking/mpi/nbody.h new file mode 100644 index 0000000..68ad324 --- /dev/null +++ b/src/nbody/src/blocking/mpi/nbody.h @@ -0,0 +1,32 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#ifndef NBODY_MPI_H +#define NBODY_MPI_H + +#include "blocking/common/nbody.h" + +// Application structures +struct nbody_file_t { + size_t total_size; + size_t size; + size_t offset; + char name[1000]; +}; + +struct nbody_t { + particles_block_t *local; + particles_block_t *remote1; + particles_block_t *remote2; + forces_block_t *forces; + int num_blocks; + int timesteps; + nbody_file_t file; +}; + +#endif // NBODY_MPI_H + diff --git a/src/nbody/src/blocking/mpi/solver_mpi.c b/src/nbody/src/blocking/mpi/solver_mpi.c new file mode 100644 index 0000000..02690f8 --- /dev/null +++ b/src/nbody/src/blocking/mpi/solver_mpi.c @@ -0,0 +1,202 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/mpi/nbody.h" + +#include +#include +#include +#include +#include + +#include + +static void calculate_forces(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks); +static void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval); +static void exchange_particles(const particles_block_t *sendbuf, particles_block_t *recvbuf, const int num_blocks, const int rank, const int rank_size); +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2); +static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval); +static void exchange_particles_block(const particles_block_t *sendbuf, particles_block_t *recvbuf, int block_id, int rank, int rank_size); + +void nbody_solve(nbody_t *nbody, const int num_blocks, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + assert(timesteps > 0); + + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + particles_block_t *local = nbody->local; + particles_block_t *remote1 = nbody->remote1; + particles_block_t *remote2 = nbody->remote2; + forces_block_t *forces = nbody->forces; + + for (int t = 0; t < timesteps; t++) { + particles_block_t *sendbuf = local; + particles_block_t *recvbuf = remote1; + + for (int r = 0; r < rank_size; r++) { + calculate_forces(forces, local, sendbuf, num_blocks); + if (r < rank_size - 1) { + exchange_particles(sendbuf, recvbuf, num_blocks, rank, rank_size); + } + + particles_block_t *aux = recvbuf; + recvbuf = (r != 0) ? sendbuf : remote2; + sendbuf = aux; + } + + update_particles(local, forces, num_blocks, time_interval); + } + + MPI_Barrier(MPI_COMM_WORLD); +} + +void calculate_forces_N2(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < num_blocks; j++) { + calculate_forces_block(forces+i, block1+i, block2+j); + } + } +} + +void calculate_forces_NlogN(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < LOG2(num_blocks); j++) { + calculate_forces_block(forces+i, block1+i, block2+j); + } + } +} + +void calculate_forces_N(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks - 1; i++) { + calculate_forces_block(forces+i, block1+i, block2+i+1); + } +} + +void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval) +{ + for (int i = 0; i < num_blocks; i++) { + update_particles_block(particles+i, forces+i, time_interval); + } +} + +void exchange_particles(const particles_block_t *sendbuf, particles_block_t *recvbuf, const int num_blocks, const int rank, const int rank_size) +{ + for (int i = 0; i < num_blocks; i++) { + exchange_particles_block(sendbuf+i, recvbuf+i, i, rank, rank_size); + } +} + +void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const int same_block = (block1 == block2); + const float *pos_x1 = block1->position_x; + const float *pos_y1 = block1->position_y; + const float *pos_z1 = block1->position_z; + const float *mass1 = block1->mass ; + + const float *pos_x2 = block2->position_x; + const float *pos_y2 = block2->position_y; + const float *pos_z2 = block2->position_z; + const float *mass2 = block2->mass; + + for (int i = 0; i < BLOCK_SIZE; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < BLOCK_SIZE; j++) { + const float diff_x = pos_x2[j] - pos_x1[i]; + const float diff_y = pos_y2[j] - pos_y1[i]; + const float diff_z = pos_z2[j] - pos_z1[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (!same_block || distance_squared != 0.0f) { + force = (mass1[i] / (distance_squared * distance)) * (mass2[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval) +{ + for (int e = 0; e < BLOCK_SIZE; e++){ + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + memset(forces, 0, sizeof(forces_block_t)); +} + +void exchange_particles_block(const particles_block_t *sendbuf, particles_block_t *recvbuf, int block_id, int rank, int rank_size) +{ + int src = MOD(rank - 1, rank_size); + int dst = MOD(rank + 1, rank_size); + int size = sizeof(particles_block_t); + + if (rank % 2) { + MPI_Send(sendbuf, size, MPI_BYTE, dst, block_id+10, MPI_COMM_WORLD); + MPI_Recv(recvbuf, size, MPI_BYTE, src, block_id+10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } else { + MPI_Recv(recvbuf, size, MPI_BYTE, src, block_id+10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Send(sendbuf, size, MPI_BYTE, dst, block_id+10, MPI_COMM_WORLD); + } +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + if (!rank) { + int particles = nbody->num_blocks * BLOCK_SIZE; + int total_particles = particles * rank_size; + + printf("bigo, %s, processes, %d, threads, %d, timesteps, %d, total_particles, %d, particles_per_proc, %d, block_size, %d, blocks_per_proc, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), rank_size, 1, nbody->timesteps, total_particles, particles, BLOCK_SIZE, + nbody->num_blocks, time, nbody_compute_throughput(total_particles, nbody->timesteps, time) + ); + } +} diff --git a/src/nbody/src/blocking/mpi/solver_mpi_ompss.c b/src/nbody/src/blocking/mpi/solver_mpi_ompss.c new file mode 100644 index 0000000..0a67f4d --- /dev/null +++ b/src/nbody/src/blocking/mpi/solver_mpi_ompss.c @@ -0,0 +1,221 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/mpi/nbody.h" + +#include +#include +#include +#include +#include + +#include +#include + +static void calculate_forces(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks); +static void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval); +static void exchange_particles(const particles_block_t *sendbuf, particles_block_t *recvbuf, const int num_blocks, const int rank, const int rank_size); +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2); +static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval); +static void exchange_particles_block(const particles_block_t *sendbuf, particles_block_t *recvbuf, int block_id, int rank, int rank_size); + +#ifdef INTEROPERABILITY +int *serial = NULL; +#else +int *serial = (int *)1; +#endif + +void nbody_solve(nbody_t *nbody, const int num_blocks, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + assert(timesteps > 0); + + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + particles_block_t *local = nbody->local; + particles_block_t *remote1 = nbody->remote1; + particles_block_t *remote2 = nbody->remote2; + forces_block_t *forces = nbody->forces; + + for (int t = 0; t < timesteps; t++) { + particles_block_t *sendbuf = local; + particles_block_t *recvbuf = remote1; + + for (int r = 0; r < rank_size; r++) { + calculate_forces(forces, local, sendbuf, num_blocks); + if (r < rank_size - 1) { + exchange_particles(sendbuf, recvbuf, num_blocks, rank, rank_size); + } + + particles_block_t *aux = recvbuf; + recvbuf = (r != 0) ? sendbuf : remote2; + sendbuf = aux; + } + + update_particles(local, forces, num_blocks, time_interval); + } + + #pragma oss taskwait + MPI_Barrier(MPI_COMM_WORLD); +} + +void calculate_forces_N2(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < num_blocks; j++) { + calculate_forces_block(forces+i, block1+i, block2+j); + } + } +} + +void calculate_forces_NlogN(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < LOG2(num_blocks); j++) { + calculate_forces_block(forces+i, block1+i, block2+j); + } + } +} + +void calculate_forces_N(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks - 1; i++) { + calculate_forces_block(forces+i, block1+i, block2+i+1); + } +} + +void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval) +{ + for (int i = 0; i < num_blocks; i++) { + update_particles_block(particles+i, forces+i, time_interval); + } +} + +void exchange_particles(const particles_block_t *sendbuf, particles_block_t *recvbuf, const int num_blocks, const int rank, const int rank_size) +{ + int src = MOD(rank - 1, rank_size); + int dst = MOD(rank + 1, rank_size); + int size = sizeof(particles_block_t); + + if (rank % 2) { + for (int i = 0; i < num_blocks; i++) { + send_particles_block(sendbuf+i, i, dst); + recv_particles_block(recvbuf+i, i, src); + } + } else { + for (int i = 0; i < num_blocks; i++) { + recv_particles_block(recvbuf+i, i, src); + send_particles_block(sendbuf+i, i, dst); + } + } +} + +#pragma oss task in(*block1, *block2) inout(*forces) label("calculate_forces_block") +void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const int same_block = (block1 == block2); + const float *pos_x1 = block1->position_x; + const float *pos_y1 = block1->position_y; + const float *pos_z1 = block1->position_z; + const float *mass1 = block1->mass ; + + const float *pos_x2 = block2->position_x; + const float *pos_y2 = block2->position_y; + const float *pos_z2 = block2->position_z; + const float *mass2 = block2->mass; + + for (int i = 0; i < BLOCK_SIZE; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < BLOCK_SIZE; j++) { + const float diff_x = pos_x2[j] - pos_x1[i]; + const float diff_y = pos_y2[j] - pos_y1[i]; + const float diff_z = pos_z2[j] - pos_z1[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (!same_block || distance_squared != 0.0f) { + force = (mass1[i] / (distance_squared * distance)) * (mass2[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +#pragma oss task inout(*particles, *forces) label("update_particles_block") +void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval) +{ + for (int e = 0; e < BLOCK_SIZE; e++){ + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + memset(forces, 0, sizeof(forces_block_t)); +} + +#pragma oss task in(*sendbuf) inout(*serial) label("send_particles_block") +void send_particles_block(const particles_block_t *sendbuf, int block_id, int dst) +{ + MPI_Send(sendbuf, sizeof(particles_block_t), MPI_BYTE, dst, block_id+10, MPI_COMM_WORLD); +} + +#pragma oss task out(*recvbuf) inout(*serial) label("recv_particles_block") +void recv_particles_block(particles_block_t *recvbuf, int block_id, int src) +{ + MPI_Recv(recvbuf, sizeof(particles_block_t), MPI_BYTE, src, block_id+10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + if (!rank) { + int particles = nbody->num_blocks * BLOCK_SIZE; + int total_particles = particles * rank_size; + + printf("bigo, %s, processes, %d, threads, %d, timesteps, %d, total_particles, %d, particles_per_proc, %d, block_size, %d, blocks_per_proc, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), rank_size, nanos6_get_num_cpus(), nbody->timesteps, total_particles, particles, BLOCK_SIZE, + nbody->num_blocks, time, nbody_compute_throughput(total_particles, nbody->timesteps, time) + ); + } +} diff --git a/src/nbody/src/blocking/mpi/solver_mpi_ompss_interop_async.c b/src/nbody/src/blocking/mpi/solver_mpi_ompss_interop_async.c new file mode 100644 index 0000000..d830240 --- /dev/null +++ b/src/nbody/src/blocking/mpi/solver_mpi_ompss_interop_async.c @@ -0,0 +1,221 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/mpi/nbody.h" + +#include +#include +#include +#include +#include + +#include +#include + +#include + +static void calculate_forces(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks); +static void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval); +static void exchange_particles(const particles_block_t *sendbuf, particles_block_t *recvbuf, const int num_blocks, const int rank, const int rank_size); +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2); +static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval); +static void exchange_particles_block(const particles_block_t *sendbuf, particles_block_t *recvbuf, int block_id, int rank, int rank_size); + +void nbody_solve(nbody_t *nbody, const int num_blocks, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + assert(timesteps > 0); + + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + particles_block_t *local = nbody->local; + particles_block_t *remote1 = nbody->remote1; + particles_block_t *remote2 = nbody->remote2; + forces_block_t *forces = nbody->forces; + + for (int t = 0; t < timesteps; t++) { + particles_block_t *sendbuf = local; + particles_block_t *recvbuf = remote1; + + for (int r = 0; r < rank_size; r++) { + calculate_forces(forces, local, sendbuf, num_blocks); + if (r < rank_size - 1) { + exchange_particles(sendbuf, recvbuf, num_blocks, rank, rank_size); + } + + particles_block_t *aux = recvbuf; + recvbuf = (r != 0) ? sendbuf : remote2; + sendbuf = aux; + } + + update_particles(local, forces, num_blocks, time_interval); + } + + #pragma oss taskwait + MPI_Barrier(MPI_COMM_WORLD); +} + +void calculate_forces_N2(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < num_blocks; j++) { + calculate_forces_block(forces+i, block1+i, block2+j); + } + } +} + +void calculate_forces_NlogN(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < LOG2(num_blocks); j++) { + calculate_forces_block(forces+i, block1+i, block2+j); + } + } +} + +void calculate_forces_N(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks - 1; i++) { + calculate_forces_block(forces+i, block1+i, block2+i+1); + } +} + +void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval) +{ + for (int i = 0; i < num_blocks; i++) { + update_particles_block(particles+i, forces+i, time_interval); + } +} + +void exchange_particles(const particles_block_t *sendbuf, particles_block_t *recvbuf, const int num_blocks, const int rank, const int rank_size) +{ + int src = MOD(rank - 1, rank_size); + int dst = MOD(rank + 1, rank_size); + int size = sizeof(particles_block_t); + + if (rank % 2) { + for (int i = 0; i < num_blocks; i++) { + send_particles_block(sendbuf+i, i, dst); + recv_particles_block(recvbuf+i, i, src); + } + } else { + for (int i = 0; i < num_blocks; i++) { + recv_particles_block(recvbuf+i, i, src); + send_particles_block(sendbuf+i, i, dst); + } + } +} + +#pragma oss task in(*block1, *block2) inout(*forces) label("calculate_forces_block") +void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const int same_block = (block1 == block2); + const float *pos_x1 = block1->position_x; + const float *pos_y1 = block1->position_y; + const float *pos_z1 = block1->position_z; + const float *mass1 = block1->mass ; + + const float *pos_x2 = block2->position_x; + const float *pos_y2 = block2->position_y; + const float *pos_z2 = block2->position_z; + const float *mass2 = block2->mass; + + for (int i = 0; i < BLOCK_SIZE; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < BLOCK_SIZE; j++) { + const float diff_x = pos_x2[j] - pos_x1[i]; + const float diff_y = pos_y2[j] - pos_y1[i]; + const float diff_z = pos_z2[j] - pos_z1[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (!same_block || distance_squared != 0.0f) { + force = (mass1[i] / (distance_squared * distance)) * (mass2[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +#pragma oss task inout(*particles, *forces) label("update_particles_block") +void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval) +{ + for (int e = 0; e < BLOCK_SIZE; e++){ + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + memset(forces, 0, sizeof(forces_block_t)); +} + +#pragma oss task in(*sendbuf) label("send_particles_block") +void send_particles_block(const particles_block_t *sendbuf, int block_id, int dst) +{ + MPI_Request request; + MPI_Isend(sendbuf, sizeof(particles_block_t), MPI_BYTE, dst, block_id+10, MPI_COMM_WORLD, &request); + TAMPI_Iwait(&request, MPI_STATUS_IGNORE); +} + +#pragma oss task out(*recvbuf) label("recv_particles_block") +void recv_particles_block(particles_block_t *recvbuf, int block_id, int src) +{ + MPI_Request request; + MPI_Irecv(recvbuf, sizeof(particles_block_t), MPI_BYTE, src, block_id+10, MPI_COMM_WORLD, &request); + TAMPI_Iwait(&request, MPI_STATUS_IGNORE); +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + if (!rank) { + int particles = nbody->num_blocks * BLOCK_SIZE; + int total_particles = particles * rank_size; + + printf("bigo, %s, processes, %d, threads, %d, timesteps, %d, total_particles, %d, particles_per_proc, %d, block_size, %d, blocks_per_proc, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), rank_size, nanos6_get_num_cpus(), nbody->timesteps, total_particles, particles, BLOCK_SIZE, + nbody->num_blocks, time, nbody_compute_throughput(total_particles, nbody->timesteps, time) + ); + } +} diff --git a/src/nbody/src/blocking/mpi/utils.c b/src/nbody/src/blocking/mpi/utils.c new file mode 100644 index 0000000..d244271 --- /dev/null +++ b/src/nbody/src/blocking/mpi/utils.c @@ -0,0 +1,203 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/mpi/nbody.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +void nbody_generate_particles(const nbody_conf_t *conf, const nbody_file_t *file) +{ + int rank_size; + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + char fname[1024]; + sprintf(fname, "%s.in", file->name); + + if (!access(fname, F_OK)) { + return; + } + + struct stat st = {0}; + if (stat("data", &st) == -1) { + mkdir("data", 0755); + } + + const int fd = open(fname, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IRGRP|S_IROTH); + assert(fd >= 0); + + const int total_size = file->total_size; + assert(total_size % PAGE_SIZE == 0); + + int err = ftruncate(fd, total_size); + assert(!err); + + particles_block_t * const particles = mmap(NULL, total_size, PROT_WRITE|PROT_READ, MAP_SHARED, fd, 0); + + for(int i = 0; i < conf->num_blocks * rank_size; i++) { + nbody_particle_init(conf, particles+i); + } + + err = munmap(particles, total_size); + assert(!err); + + err = close(fd); + assert(!err); +} + +void nbody_check(const nbody_t *nbody) +{ + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + char fname[1024]; + sprintf(fname, "%s.ref", nbody->file.name); + if (access(fname, F_OK) != 0) { + if (!rank) fprintf(stderr, "Warning: %s file does not exist. Skipping the check...\n", fname); + return; + } + + const int fd = open(fname, O_RDONLY, 0); + assert(fd >= 0); + + particles_block_t *reference = mmap(NULL, nbody->file.size, PROT_READ, MAP_SHARED, fd, nbody->file.offset); + assert(reference != MAP_FAILED); + + int correct, correct_chunk; + correct_chunk = nbody_compare_particles(nbody->local, reference, nbody->num_blocks); + + MPI_Reduce(&correct_chunk, &correct, 1, MPI_INT, MPI_LAND, 0, MPI_COMM_WORLD); + + if (!rank) { + if (correct) { + printf("Result validation: OK\n"); + } else { + printf("Result validation: ERROR\n"); + } + } + + int err = munmap(reference, nbody->file.size); + assert(!err); + + err = close(fd); + assert(!err); +} + +nbody_file_t nbody_setup_file(const nbody_conf_t *conf) +{ + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + nbody_file_t file; + file.size = conf->num_blocks * sizeof(particles_block_t); + file.total_size = rank_size * file.size; + file.offset = rank * file.size; + + sprintf(file.name, "%s-%s-%d-%d-%d", conf->name, TOSTRING(BIGO), rank_size * conf->num_blocks * BLOCK_SIZE, BLOCK_SIZE, conf->timesteps); + return file; +} + +particles_block_t * nbody_load_particles(const nbody_conf_t *conf, const nbody_file_t *file) +{ + char fname[1024]; + sprintf(fname, "%s.in", file->name); + + const int fd = open(fname, O_RDONLY, 0); + assert(fd >= 0); + + void * const ptr = mmap(NULL, file->size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, file->offset); + assert(ptr != MAP_FAILED); + + int err = close(fd); + assert(!err); + + return ptr; +} + +nbody_t nbody_setup(const nbody_conf_t *conf) +{ + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + nbody_file_t file = nbody_setup_file(conf); + + if (!rank) nbody_generate_particles(conf, &file); + MPI_Barrier(MPI_COMM_WORLD); + + nbody_t nbody; + nbody.local = nbody_load_particles(conf, &file); + assert(nbody.local != NULL); + + nbody.remote1 = nbody_alloc(conf->num_blocks * sizeof(particles_block_t)); + nbody.remote2 = nbody_alloc(conf->num_blocks * sizeof(particles_block_t)); + assert(nbody.remote1 != NULL); + assert(nbody.remote2 != NULL); + + nbody.forces = nbody_alloc(conf->num_blocks * sizeof(forces_block_t)); + assert(nbody.forces != NULL); + + nbody.num_blocks = conf->num_blocks; + nbody.timesteps = conf->timesteps; + nbody.file = file; + + return nbody; +} + +void nbody_save_particles(const nbody_t *nbody) +{ + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + char fname[1024]; + sprintf(fname, "%s.out", nbody->file.name); + const int mode = MPI_MODE_CREATE | MPI_MODE_WRONLY; + + MPI_File outfile; + int err = MPI_File_open(MPI_COMM_WORLD, fname, mode, MPI_INFO_NULL, &outfile); + assert(err == MPI_SUCCESS); + + MPI_File_set_view(outfile, nbody->file.offset, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); + assert(err == MPI_SUCCESS); + + MPI_File_write(outfile, nbody->local, nbody->file.size, MPI_BYTE, MPI_STATUS_IGNORE); + assert(err == MPI_SUCCESS); + + MPI_File_close(&outfile); + assert(err == MPI_SUCCESS); + + MPI_Barrier(MPI_COMM_WORLD); +} + +void nbody_free(nbody_t *nbody) +{ + const int particles_size = nbody->num_blocks * sizeof(particles_block_t); + const int forces_size = nbody->num_blocks * sizeof(forces_block_t); + + int err = munmap(nbody->local, particles_size); + err |= munmap(nbody->remote1, particles_size); + err |= munmap(nbody->remote2, particles_size); + err |= munmap(nbody->forces, forces_size); + assert(!err); +} + diff --git a/src/nbody/src/blocking/smp/main.c b/src/nbody/src/blocking/smp/main.c new file mode 100644 index 0000000..55c0ab4 --- /dev/null +++ b/src/nbody/src/blocking/smp/main.c @@ -0,0 +1,40 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/smp/nbody.h" + +#include +#include +#include +#include +#include + + +int main(int argc, char** argv) +{ + nbody_conf_t conf = nbody_get_conf(argc, argv); + + conf.num_particles = ROUNDUP(conf.num_particles, MIN_PARTICLES); + assert(conf.num_particles >= BLOCK_SIZE); + assert(conf.timesteps > 0); + + conf.num_blocks = conf.num_particles / BLOCK_SIZE; + assert(conf.num_blocks > 0); + + nbody_t nbody = nbody_setup(&conf); + + double start = get_time(); + nbody_solve(&nbody, conf.num_blocks, conf.timesteps, conf.time_interval); + double end = get_time(); + + nbody_stats(&nbody, &conf, end - start); + + if (conf.save_result && !conf.force_generation) nbody_save_particles(&nbody); + if (conf.check_result) nbody_check(&nbody); + nbody_free(&nbody); + return 0; +} diff --git a/src/nbody/src/blocking/smp/nbody.h b/src/nbody/src/blocking/smp/nbody.h new file mode 100644 index 0000000..6b34fd1 --- /dev/null +++ b/src/nbody/src/blocking/smp/nbody.h @@ -0,0 +1,28 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#ifndef NBODY_SMP_H +#define NBODY_SMP_H + +#include "blocking/common/nbody.h" + +// Application structures +struct nbody_file_t { + size_t size; + char name[1000]; +}; + +struct nbody_t { + particles_block_t *particles; + forces_block_t *forces; + int num_blocks; + int timesteps; + nbody_file_t file; +}; + +#endif // NBODY_SMP_H + diff --git a/src/nbody/src/blocking/smp/solver_ompss.c b/src/nbody/src/blocking/smp/solver_ompss.c new file mode 100644 index 0000000..80628c5 --- /dev/null +++ b/src/nbody/src/blocking/smp/solver_ompss.c @@ -0,0 +1,157 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/smp/nbody.h" + +#include +#include +#include +#include +#include + +#include + +static void calculate_forces(forces_block_t *forces, const particles_block_t *particles, const int num_blocks); +static void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval); +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2); +static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval); + +void nbody_solve(nbody_t *nbody, const int num_blocks, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + + particles_block_t *particles = nbody->particles; + forces_block_t *forces = nbody->forces; + + for (int t = 0; t < timesteps; t++) { + #pragma oss task weakin(particles[0;num_blocks]) weakinout(forces[0;num_blocks]) label("calculate_forces") + calculate_forces(forces, particles, num_blocks); + + #pragma oss task weakinout(particles[0;num_blocks], forces[0;num_blocks]) label("update_particles") + update_particles(particles, forces, num_blocks, time_interval); + } + + #pragma oss taskwait +} + +void calculate_forces_N2(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < num_blocks; j++) { + calculate_forces_block(forces+i, particles+i, particles+j); + } + } +} + +void calculate_forces_NlogN(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < LOG2(num_blocks); j++) { + calculate_forces_block(forces+i, particles+i, particles+j); + } + } +} + +void calculate_forces_N(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +{ + for (int i = 0; i < num_blocks - 1; i++) { + calculate_forces_block(forces+i, particles+i, particles+i+1); + } +} + +void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval) +{ + for (int i = 0; i < num_blocks; i++) { + update_particles_block(particles+i, forces+i, time_interval); + } +} + +#pragma oss task in(*block1, *block2) inout(*forces) wait label("calculate_forces_block") +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const int same_block = (block1 == block2); + const float *pos_x1 = block1->position_x; + const float *pos_y1 = block1->position_y; + const float *pos_z1 = block1->position_z; + const float *mass1 = block1->mass ; + + const float *pos_x2 = block2->position_x; + const float *pos_y2 = block2->position_y; + const float *pos_z2 = block2->position_z; + const float *mass2 = block2->mass; + + #pragma oss task for + for (int i = 0; i < BLOCK_SIZE; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < BLOCK_SIZE; j++) { + const float diff_x = pos_x2[j] - pos_x1[i]; + const float diff_y = pos_y2[j] - pos_y1[i]; + const float diff_z = pos_z2[j] - pos_z1[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (!same_block || distance_squared != 0.0f) { + force = (mass1[i] / (distance_squared * distance)) * (mass2[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +#pragma oss task inout(*particles, *forces) label("update_particles_block") +void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval) +{ + for (int e = 0; e < BLOCK_SIZE; e++){ + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + memset(forces, 0, sizeof(forces_block_t)); +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + int particles = nbody->num_blocks * BLOCK_SIZE; + printf("bigo, %s, threads, %d, timesteps, %d, total_particles, %d, block_size, %d, blocks, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), nanos6_get_num_cpus(), nbody->timesteps, particles, BLOCK_SIZE, + nbody->num_blocks, time, nbody_compute_throughput(particles, nbody->timesteps, time) + ); +} + diff --git a/src/nbody/src/blocking/smp/solver_seq.c b/src/nbody/src/blocking/smp/solver_seq.c new file mode 100644 index 0000000..6b767f9 --- /dev/null +++ b/src/nbody/src/blocking/smp/solver_seq.c @@ -0,0 +1,147 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/smp/nbody.h" + +#include +#include +#include +#include +#include + +static void calculate_forces(forces_block_t *forces, const particles_block_t *particles, const int num_blocks); +static void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval); +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2); +static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval); + +void nbody_solve(nbody_t *nbody, const int num_blocks, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + + particles_block_t *particles = nbody->particles; + forces_block_t *forces = nbody->forces; + + for (int t = 0; t < timesteps; t++) { + calculate_forces(forces, particles, num_blocks); + update_particles(particles, forces, num_blocks, time_interval); + } +} + +void calculate_forces_N2(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < num_blocks; j++) { + calculate_forces_block(forces+i, particles+i, particles+j); + } + } +} + +void calculate_forces_NlogN(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < LOG2(num_blocks); j++) { + calculate_forces_block(forces+i, particles+i, particles+j); + } + } +} + +void calculate_forces_N(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +{ + for (int i = 0; i < num_blocks - 1; i++) { + calculate_forces_block(forces+i, particles+i, particles+i+1); + } +} + +void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval) +{ + for (int i = 0; i < num_blocks; i++) { + update_particles_block(particles+i, forces+i, time_interval); + } +} + +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const int same_block = (block1 == block2); + const float *pos_x1 = block1->position_x; + const float *pos_y1 = block1->position_y; + const float *pos_z1 = block1->position_z; + const float *mass1 = block1->mass ; + + const float *pos_x2 = block2->position_x; + const float *pos_y2 = block2->position_y; + const float *pos_z2 = block2->position_z; + const float *mass2 = block2->mass; + + for (int i = 0; i < BLOCK_SIZE; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < BLOCK_SIZE; j++) { + const float diff_x = pos_x2[j] - pos_x1[i]; + const float diff_y = pos_y2[j] - pos_y1[i]; + const float diff_z = pos_z2[j] - pos_z1[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (!same_block || distance_squared != 0.0f) { + force = (mass1[i] / (distance_squared * distance)) * (mass2[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval) +{ + for (int e = 0; e < BLOCK_SIZE; e++){ + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + memset(forces, 0, sizeof(forces_block_t)); +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + int particles = nbody->num_blocks * BLOCK_SIZE; + printf("bigo, %s, timesteps, %d, total_particles, %d, block_size, %d, blocks, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), nbody->timesteps, particles, BLOCK_SIZE, nbody->num_blocks, + time, nbody_compute_throughput(particles, nbody->timesteps, time) + ); +} + diff --git a/src/nbody/src/blocking/smp/utils.c b/src/nbody/src/blocking/smp/utils.c new file mode 100644 index 0000000..3e70e9d --- /dev/null +++ b/src/nbody/src/blocking/smp/utils.c @@ -0,0 +1,181 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/smp/nbody.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +void nbody_generate_particles(const nbody_conf_t *conf, const nbody_file_t *file) +{ + char fname[1024]; + sprintf(fname, "%s.in", file->name); + + if (!access(fname, F_OK)) { + return; + } + + struct stat st = {0}; + if (stat("data", &st) == -1) { + mkdir("data", 0755); + } + + const int fd = open(fname, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IRGRP|S_IROTH); + assert(fd >= 0); + + const int size = file->size; + assert(size % PAGE_SIZE == 0); + + int err = ftruncate(fd, size); + assert(!err); + + particles_block_t * const particles = mmap(NULL, size, PROT_WRITE|PROT_READ, MAP_SHARED, fd, 0); + + for(int i = 0; i < conf->num_blocks; i++) { + nbody_particle_init(conf, particles+i); + } + + err = munmap(particles, size); + assert(!err); + + err = close(fd); + assert(!err); +} + +void nbody_check(const nbody_t *nbody) +{ + char fname[1024]; + sprintf(fname, "%s.ref", nbody->file.name); + if (access(fname, F_OK) != 0) { + fprintf(stderr, "Warning: %s file does not exist. Skipping the check...\n", fname); + return; + } + + const int fd = open(fname, O_RDONLY, 0); + assert(fd >= 0); + + particles_block_t *reference = mmap(NULL, nbody->file.size, PROT_READ, MAP_SHARED, fd, 0); + assert(reference != MAP_FAILED); + + if (nbody_compare_particles(nbody->particles, reference, nbody->num_blocks)) { + printf("Result validation: OK\n"); + } else { + printf("Result validation: ERROR\n"); + } + + int err = munmap(reference, nbody->file.size); + assert(!err); + + err = close(fd); + assert(!err); +} + +nbody_file_t nbody_setup_file(const nbody_conf_t *conf) +{ + nbody_file_t file; + file.size = conf->num_blocks * sizeof(particles_block_t); + + sprintf(file.name, "%s-%s-%d-%d-%d", conf->name, TOSTRING(BIGO), conf->num_blocks * BLOCK_SIZE, BLOCK_SIZE, conf->timesteps); + return file; +} + +particles_block_t * nbody_load_particles(const nbody_conf_t *conf, const nbody_file_t *file) +{ + char fname[1024]; + sprintf(fname, "%s.in", file->name); + + const int fd = open(fname, O_RDONLY, 0); + assert(fd >= 0); + + void * const ptr = mmap(NULL, file->size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); + assert(ptr != MAP_FAILED); + + int err = close(fd); + assert(!err); + + return ptr; +} + +nbody_t nbody_setup(const nbody_conf_t *conf) +{ + nbody_t nbody; + nbody.timesteps = conf->timesteps; + nbody.num_blocks = conf->num_blocks; + + nbody_file_t file = nbody_setup_file(conf); + nbody.file = file; + + if (conf->force_generation) { + nbody.particles = nbody_alloc(conf->num_blocks * sizeof(particles_block_t)); + assert(nbody.particles != NULL); + + for (int i = 0; i < conf->num_blocks; i++) { + nbody_particle_init(conf, nbody.particles+i); + } + + nbody.forces = nbody_alloc(conf->num_blocks * sizeof(forces_block_t)); + assert(nbody.forces != NULL); + } + else { + nbody_generate_particles(conf, &file); + + nbody.particles = nbody_load_particles(conf, &file); + assert(nbody.particles != NULL); + + nbody.forces = nbody_alloc(conf->num_blocks * sizeof(forces_block_t)); + assert(nbody.forces != NULL); + } + + return nbody; +} + +void nbody_save_particles(const nbody_t *nbody) +{ + char fname[1024]; + sprintf(fname, "%s.out", nbody->file.name); + + const int fd = open(fname, O_RDWR|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH); + assert(fd >= 0); + + const int size = nbody->file.size; + assert(size % PAGE_SIZE == 0); + + int err = ftruncate(fd, size); + assert(!err); + + particles_block_t * const particles = mmap(NULL, size, PROT_WRITE|PROT_READ, MAP_SHARED, fd, 0); + assert(particles != MAP_FAILED); + + memcpy(particles, nbody->particles, size); + + err = munmap(particles, size); + assert(!err); + + err = close(fd); + assert(!err); +} + +void nbody_free(nbody_t *nbody) +{ + int err = munmap(nbody->particles, nbody->num_blocks * sizeof(particles_block_t)); + err |= munmap(nbody->forces, nbody->num_blocks * sizeof(forces_block_t)); + assert(!err); +} + diff --git a/src/nbody/src/common/common.c b/src/nbody/src/common/common.c new file mode 100644 index 0000000..e44feee --- /dev/null +++ b/src/nbody/src/common/common.c @@ -0,0 +1,132 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "common.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +void * nbody_alloc(size_t size) +{ + void *addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + assert(addr != MAP_FAILED); + return addr; +} + +static void nbody_print_usage(int argc, char **argv) +{ + (void) argc; + fprintf(stderr, "Usage: %s <-p particles> <-t timesteps> [OPTION]...\n", argv[0]); + fprintf(stderr, "Parameters:\n"); + fprintf(stderr, " -p, --particles=PARTICLES\t\tuse PARTICLES as the total number of particles (default: 16384)\n"); + fprintf(stderr, " -t, --timesteps=TIMESTEPS\t\tuse TIMESTEPS as the number of timesteps (default: 10)\n\n"); + fprintf(stderr, "Optional parameters:\n"); + fprintf(stderr, " -f, --force-generation\t\t\t\talways force the generation of particles without the usage of files(disabled by default)\n"); + fprintf(stderr, " -c, --check\t\t\t\tcheck the correctness of the result (disabled by default)\n"); + fprintf(stderr, " -C, --no-check\t\t\tdo not check the correctness of the result\n"); + fprintf(stderr, " -o, --output\t\t\t\tsave the computed particles to the default output file (disabled by default)\n"); + fprintf(stderr, " -O, --no-output\t\t\tdo not save the computed particles to the default output file\n"); + fprintf(stderr, " -h, --help\t\t\t\tdisplay this help and exit\n\n"); +} + +nbody_conf_t nbody_get_conf(int argc, char **argv) +{ + nbody_conf_t conf; + conf.domain_size_x = default_domain_size_x; + conf.domain_size_y = default_domain_size_y; + conf.domain_size_z = default_domain_size_z; + conf.mass_maximum = default_mass_maximum; + conf.time_interval = default_time_interval; + conf.seed = default_seed; + conf.name = default_name; + conf.num_particles = default_num_particles; + conf.num_blocks = conf.num_particles / BLOCK_SIZE; + conf.timesteps = default_timesteps; + conf.save_result = default_save_result; + conf.check_result = default_check_result; + conf.force_generation = default_force_generation; + + static struct option long_options[] = { + {"particles", required_argument, 0, 'p'}, + {"timesteps", required_argument, 0, 't'}, + {"force-generation", no_argument, 0, 'f'}, + {"check", no_argument, 0, 'c'}, + {"no-check", no_argument, 0, 'C'}, + {"output", no_argument, 0, 'o'}, + {"no-output", no_argument, 0, 'O'}, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0} + }; + + int c; + int index; + while ((c = getopt_long(argc, argv, "hfoOcCp:t:", long_options, &index)) != -1) { + switch (c) { + case 'h': + nbody_print_usage(argc, argv); + exit(0); + case 'f': + conf.force_generation = 1; + break; + case 'o': + conf.save_result = 1; + break; + case 'O': + conf.save_result = 0; + break; + case 'c': + conf.check_result = 1; + break; + case 'C': + conf.check_result = 0; + break; + case 'p': + conf.num_particles = atoi(optarg); + break; + case 't': + conf.timesteps = atoi(optarg); + break; + case '?': + exit(1); + default: + abort(); + } + } + + if (!conf.num_particles || !conf.timesteps) { + nbody_print_usage(argc, argv); + exit(1); + } + + return conf; +} + +double nbody_compute_throughput(int num_particles, int timesteps, double elapsed_time) +{ + double interactions_per_timestep = 0; +#if defined(_BIGO_N2) + interactions_per_timestep = (double)(num_particles)*(double)(num_particles); +#elif defined(_BIGO_NlogN) + interactions_per_timestep = (double)(num_particles)*(double)(LOG2(num_particles)); +#elif defined(_BIGO_N) + interactions_per_timestep = (double)(num_particles); +#endif + return (((interactions_per_timestep * (double)timesteps) / elapsed_time) / 1000000.0); +} + +double get_time(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)(ts.tv_sec) + (double)ts.tv_nsec * 1.0e-9; +} diff --git a/src/nbody/src/common/common.h b/src/nbody/src/common/common.h new file mode 100644 index 0000000..2deef81 --- /dev/null +++ b/src/nbody/src/common/common.h @@ -0,0 +1,74 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#ifndef COMMON_H +#define COMMON_H + +#include + +#ifndef BLOCK_SIZE +#define BLOCK_SIZE 2048 +#endif + +// BIGO definition +#ifndef BIGO +#define BIGO N2 +#define _BIGO_N2 +#endif + +#define PART 1024 +#define PAGE_SIZE 4096 + +#define TOLERATED_ERROR 0.0008 + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) +#define LOG2(a) (31-__builtin_clz((a))) +#define MOD(a, b) ((a) < 0 ? ((((a) % (b)) + (b)) % (b)) : ((a) % (b))) +#define ROUNDUP(x, y) ((((x) + (y - 1)) / y) * y) +#define STRINGIFY(s) #s +#define TOSTRING(s) STRINGIFY(s) +#define CALCULATE_FORCES(s) calculate_forces_##s +#define XCALCULATE_FORCES(s) CALCULATE_FORCES(s) +#define calculate_forces XCALCULATE_FORCES(BIGO) + +static const float gravitational_constant = 6.6726e-11f; /* N(m/kg)2 */ +static const float default_domain_size_x = 1.0e+10; /* m */ +static const float default_domain_size_y = 1.0e+10; /* m */ +static const float default_domain_size_z = 1.0e+10; /* m */ +static const float default_mass_maximum = 1.0e+28; /* kg */ +static const float default_time_interval = 1.0e+0; /* s */ +static const int default_seed = 12345; +static const char* default_name = "data/nbody"; +static const int default_num_particles = 0; +static const int default_timesteps = 0; +static const int default_save_result = 0; +static const int default_check_result = 0; +static const int default_force_generation = 0; + +typedef struct { + float domain_size_x; + float domain_size_y; + float domain_size_z; + float mass_maximum; + float time_interval; + int seed; + const char* name; + int num_particles; + int num_blocks; + int timesteps; + int save_result; + int check_result; + int force_generation; +} nbody_conf_t; + +nbody_conf_t nbody_get_conf(int argc, char **argv); +double nbody_compute_throughput(int num_particles, int timesteps, double elapsed_time); +void * nbody_alloc(size_t size); +double get_time(void); + +#endif // COMMON_H diff --git a/src/nbody/src/plain/main.c b/src/nbody/src/plain/main.c new file mode 100644 index 0000000..62f7e26 --- /dev/null +++ b/src/nbody/src/plain/main.c @@ -0,0 +1,38 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "plain/nbody.h" + +#include +#include +#include +#include +#include + + +int main(int argc, char** argv) +{ + nbody_conf_t conf = nbody_get_conf(argc, argv); + + conf.num_blocks = 1; + conf.num_particles = ROUNDUP(conf.num_particles, MIN_PARTICLES); + assert(conf.num_particles >= MIN_PARTICLES); + assert(conf.timesteps > 0); + + nbody_t nbody = nbody_setup(&conf); + + double start = get_time(); + nbody_solve(&nbody, conf.timesteps, conf.time_interval); + double end = get_time(); + + nbody_stats(&nbody, &conf, end - start); + + if (conf.save_result && !conf.force_generation) nbody_save_particles(&nbody); + if (conf.check_result) nbody_check(&nbody); + nbody_free(&nbody); + return 0; +} diff --git a/src/nbody/src/plain/nbody.h b/src/nbody/src/plain/nbody.h new file mode 100644 index 0000000..d044022 --- /dev/null +++ b/src/nbody/src/plain/nbody.h @@ -0,0 +1,73 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#ifndef NBODY_H +#define NBODY_H + +#include "common/common.h" + +#include +#include +#include + +#define MIN_PARTICLES 4096 +#define PARTICLE_MEMBERS 8 +#define FORCE_MEMBERS 3 + +// Solver structures +typedef struct { + float *position_x; /* m */ + float *position_y; /* m */ + float *position_z; /* m */ + float *velocity_x; /* m/s */ + float *velocity_y; /* m/s */ + float *velocity_z; /* m/s */ + float *mass; /* kg */ + float *weight; + void *_ptr; + size_t _size; +} particles_t; + +typedef struct { + float *x; /* x */ + float *y; /* y */ + float *z; /* z */ + void *_ptr; + size_t _size; +} forces_t; + +// Application structures +typedef struct { + size_t size; + char name[1000]; +} nbody_file_t; + +typedef struct { + particles_t particles; + forces_t forces; + int num_particles; + int timesteps; + nbody_file_t file; +} nbody_t; + +// Solver function +void nbody_solve(nbody_t *nbody, const int timesteps, const float time_interval); + +// Auxiliary functions +nbody_t nbody_setup(const nbody_conf_t *conf); +void nbody_setup_particles(nbody_t *nbody, const nbody_conf_t *conf); +void nbody_setup_forces(nbody_t *nbody, const nbody_conf_t *conf); +void nbody_link_particles(particles_t *reference, int num_particles, float *addr); +void nbody_particle_init(const nbody_conf_t *conf, particles_t *part); +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time); +void nbody_save_particles(const nbody_t *nbody); +void nbody_free(nbody_t *nbody); +void nbody_check(const nbody_t *nbody); +int nbody_compare_particles(const particles_t *local, const particles_t* reference, int num_particles); + +#endif // NBODY_H + diff --git a/src/nbody/src/plain/solver_omp.c b/src/nbody/src/plain/solver_omp.c new file mode 100644 index 0000000..55c9182 --- /dev/null +++ b/src/nbody/src/plain/solver_omp.c @@ -0,0 +1,114 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "plain/nbody.h" + +#include +#include +#include +#include +#include + +#include + +static void calculate_forces_N2(forces_t *forces, const particles_t *particles, const int num_particles); +static void update_particles(particles_t *particles, forces_t *forces, const int num_particles, const float time_interval); + +void nbody_solve(nbody_t *nbody, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + + int num_particles = nbody->num_particles; + particles_t *particles = &nbody->particles; + forces_t *forces = &nbody->forces; + + #pragma omp parallel + for (int t = 0; t < timesteps; t++) { + calculate_forces(forces, particles, num_particles); + update_particles(particles, forces, num_particles, time_interval); + } +} + +void calculate_forces_N2(forces_t *forces, const particles_t *particles, const int num_particles) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const float *pos_x = particles->position_x; + const float *pos_y = particles->position_y; + const float *pos_z = particles->position_z; + const float *mass = particles->mass; + + #pragma omp for schedule(static) + for (int i = 0; i < num_particles; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < num_particles; j++) { + const float diff_x = pos_x[j] - pos_x[i]; + const float diff_y = pos_y[j] - pos_y[i]; + const float diff_z = pos_z[j] - pos_z[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (distance_squared != 0.0f) { + force = (mass[i] / (distance_squared * distance)) * (mass[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +void update_particles(particles_t *particles, forces_t *forces, const int num_particles, const float time_interval) +{ + #pragma omp for schedule(static) + for (int e = 0; e < num_particles; e++) { + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + #pragma omp single + memset(forces->_ptr, 0, forces->_size); +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + (void) conf; + + printf("bigo, %s, threads, %d, timesteps, %d, total_particles, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), omp_get_max_threads(), nbody->timesteps, nbody->num_particles, + time, nbody_compute_throughput(nbody->num_particles, nbody->timesteps, time) + ); +} diff --git a/src/nbody/src/plain/solver_seq.c b/src/nbody/src/plain/solver_seq.c new file mode 100644 index 0000000..864745a --- /dev/null +++ b/src/nbody/src/plain/solver_seq.c @@ -0,0 +1,108 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "plain/nbody.h" + +#include +#include +#include +#include +#include + + +static void calculate_forces_N2(forces_t *forces, const particles_t *particles, const int num_particles); +static void update_particles(particles_t *particles, forces_t *forces, const int num_particles, const float time_interval); + +void nbody_solve(nbody_t *nbody, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + + int num_particles = nbody->num_particles; + particles_t *particles = &nbody->particles; + forces_t *forces = &nbody->forces; + + for (int t = 0; t < timesteps; t++) { + calculate_forces(forces, particles, num_particles); + update_particles(particles, forces, num_particles, time_interval); + } +} + +void calculate_forces_N2(forces_t *forces, const particles_t *particles, const int num_particles) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const float *pos_x = particles->position_x; + const float *pos_y = particles->position_y; + const float *pos_z = particles->position_z; + const float *mass = particles->mass; + + for (int i = 0; i < num_particles; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < num_particles; j++) { + const float diff_x = pos_x[j] - pos_x[i]; + const float diff_y = pos_y[j] - pos_y[i]; + const float diff_z = pos_z[j] - pos_z[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (distance_squared != 0.0f) { + force = (mass[i] / (distance_squared * distance)) * (mass[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +void update_particles(particles_t *particles, forces_t *forces, const int num_particles, const float time_interval) +{ + for (int e = 0; e < num_particles; e++) { + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + memset(forces->_ptr, 0, forces->_size); +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + (void) conf; + printf("bigo, %s, timesteps, %d, total_particles, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), nbody->timesteps, nbody->num_particles, time, + nbody_compute_throughput(nbody->num_particles, nbody->timesteps, time) + ); +} diff --git a/src/nbody/src/plain/utils.c b/src/nbody/src/plain/utils.c new file mode 100644 index 0000000..451c79b --- /dev/null +++ b/src/nbody/src/plain/utils.c @@ -0,0 +1,261 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "plain/nbody.h" + +#undef NDEBUG +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void nbody_particle_init(const nbody_conf_t *conf, particles_t *part) +{ + const int num_particles = conf->num_particles; + + for (int i = 0; i < num_particles; i++) { + part->position_x[i] = conf->domain_size_x * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->position_y[i] = conf->domain_size_y * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->position_z[i] = conf->domain_size_z * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->mass[i] = conf->mass_maximum * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->weight[i] = gravitational_constant * part->mass[i]; + } +} + +int nbody_compare_particles(const particles_t *local, const particles_t *reference, int num_particles) +{ + double error = 0.0; + int count = 0; + for (int i = 0; i < num_particles; i++) { + if ((local->position_x[i] != reference->position_x[i]) || + (local->position_y[i] != reference->position_y[i]) || + (local->position_z[i] != reference->position_z[i])) { + error += fabs(((local->position_x[i] - reference->position_x[i])*100.0) / reference->position_x[i]) + + fabs(((local->position_y[i] - reference->position_y[i])*100.0) / reference->position_y[i]) + + fabs(((local->position_z[i] - reference->position_z[i])*100.0) / reference->position_z[i]); + count++; + } + } + + double relative_error = (count != 0) ? error / (3.0 * count) : 0.0; + if ((count * 100.0) / (num_particles) > 0.6 || relative_error > TOLERATED_ERROR) { + return 0; + } + return 1; +} + +void nbody_check(const nbody_t *nbody) +{ + char fname[1024]; + sprintf(fname, "%s.ref", nbody->file.name); + if (access(fname, F_OK) != 0) { + fprintf(stderr, "Warning: %s file does not exist. Skipping the check...\n", fname); + return; + } + + const int fd = open(fname, O_RDONLY, 0); + assert(fd >= 0); + + void *reference_addr = mmap(NULL, nbody->file.size, PROT_READ, MAP_SHARED, fd, 0); + assert(reference_addr != MAP_FAILED); + + particles_t reference; + nbody_link_particles(&reference, nbody->num_particles, reference_addr); + + if (nbody_compare_particles(&nbody->particles, &reference, nbody->num_particles)) { + printf("Result validation: OK\n"); + } else { + printf("Result validation: ERROR\n"); + } + + int err = munmap(reference_addr, nbody->file.size); + assert(!err); + + err = close(fd); + assert(!err); +} + +static void nbody_generate_particles(const nbody_conf_t *conf, nbody_file_t *file) +{ + char fname[1024]; + sprintf(fname, "%s.in", file->name); + + if (!access(fname, F_OK)) { + return; + } + + struct stat st = {0}; + if (stat("data", &st) == -1) { + mkdir("data", 0755); + } + + const int fd = open(fname, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IRGRP|S_IROTH); + assert(fd >= 0); + + const int size = file->size; + assert(size % PAGE_SIZE == 0); + + int err = ftruncate(fd, size); + assert(!err); + + void *addr = mmap(NULL, size, PROT_WRITE|PROT_READ, MAP_SHARED, fd, 0); + assert(addr != MAP_FAILED); + + particles_t particles; + nbody_link_particles(&particles, conf->num_particles, addr); + + // Initialize the particles + nbody_particle_init(conf, &particles); + + err = munmap(addr, size); + assert(!err); + + err = close(fd); + assert(!err); +} + +static void nbody_load_particles(nbody_t *nbody, const nbody_conf_t *conf, nbody_file_t *file) +{ + (void) conf; + + char fname[1024]; + sprintf(fname, "%s.in", file->name); + + const int fd = open(fname, O_RDONLY, 0); + assert(fd >= 0); + + void * addr = mmap(NULL, file->size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); + assert(addr != MAP_FAILED); + + memcpy(nbody->particles._ptr, addr, nbody->particles._size); + + int err = munmap(addr, file->size); + assert(!err); + + err = close(fd); + assert(!err); +} + +static void nbody_setup_file(nbody_t *nbody, const nbody_conf_t *conf) +{ + nbody_file_t file; + file.size = nbody->particles._size; + sprintf(file.name, "%s-%s-%d-%d-%d", conf->name, TOSTRING(BIGO), conf->num_particles, conf->num_particles, conf->timesteps); + nbody->file = file; +} + +void nbody_link_particles(particles_t *particles, int num_particles, float *addr) +{ + const size_t size = num_particles * sizeof(float); + particles->position_x = addr + 0; + particles->position_y = addr + 1; + particles->position_z = addr + 2; + particles->velocity_x = addr + 3; + particles->velocity_y = addr + 4; + particles->velocity_z = addr + 5; + particles->mass = addr + 6; + particles->weight = addr + 7; + particles->_ptr = addr; + particles->_size = size * PARTICLE_MEMBERS; +} + +void nbody_setup_particles(nbody_t *nbody, const nbody_conf_t *conf) +{ + void *addr = nbody_alloc(nbody->particles._size); + nbody_link_particles(&nbody->particles, nbody->num_particles, addr); + + nbody_generate_particles(conf, &nbody->file); + nbody_load_particles(nbody, conf, &nbody->file); +} + +void nbody_setup_forces(nbody_t *nbody, const nbody_conf_t *conf) +{ + char *base_addr = nbody_alloc(nbody->forces._size); + size_t array_size = conf->num_particles * sizeof(float); + + nbody->forces._ptr = base_addr; + nbody->forces.x = (float *) (base_addr + (0 * array_size)); + nbody->forces.y = (float *) (base_addr + (1 * array_size)); + nbody->forces.z = (float *) (base_addr + (2 * array_size)); +} + +nbody_t nbody_setup(const nbody_conf_t *conf) +{ + nbody_t nbody; + nbody.num_particles = conf->num_particles; + nbody.timesteps = conf->timesteps; + + // Compute the size of the structures + nbody.particles._size = nbody.num_particles * PARTICLE_MEMBERS * sizeof(float); + nbody.forces._size = nbody.num_particles * FORCE_MEMBERS * sizeof(float); + + // Set up the file + nbody_setup_file(&nbody, conf); + + if (conf->force_generation) { + void *addr = nbody_alloc(nbody.particles._size); + nbody_link_particles(&nbody.particles, nbody.num_particles, addr); + + // Initialize the particles + nbody_particle_init(conf, &nbody.particles); + + // Initialize the forces + nbody_setup_forces(&nbody, conf); + } + else { + // Initialize the particles + nbody_setup_particles(&nbody, conf); + + // Initialize the forces + nbody_setup_forces(&nbody, conf); + } + + return nbody; +} + +void nbody_save_particles(const nbody_t *nbody) +{ + char fname[1024]; + sprintf(fname, "%s.out", nbody->file.name); + + const int fd = open(fname, O_RDWR|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH); + assert(fd >= 0); + + const int size = nbody->file.size; + assert(size % PAGE_SIZE == 0); + + int err = ftruncate(fd, size); + assert(!err); + + void *particles = mmap(NULL, size, PROT_WRITE|PROT_READ, MAP_SHARED, fd, 0); + assert(particles != MAP_FAILED); + + memcpy(particles, nbody->particles._ptr, size); + + err = munmap(particles, size); + assert(!err); + + err = close(fd); + assert(!err); +} + +void nbody_free(nbody_t *nbody) +{ + int err = munmap(nbody->particles._ptr, nbody->particles._size); + err |= munmap(nbody->forces._ptr, nbody->forces._size); + assert(!err); +} + -- GitLab From b93bbefefbf41de9c13784d1d4af00337872fa35 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Wed, 20 Nov 2024 10:39:48 +0100 Subject: [PATCH 2/6] Make blocksize configurable at run time --- src/nbody/CMakeLists.txt | 3 ++ src/nbody/src/blocking/common/common_utils.c | 8 +-- src/nbody/src/blocking/common/nbody.h | 2 +- src/nbody/src/blocking/smp/main.c | 2 +- src/nbody/src/blocking/smp/solver_seq.c | 44 +++++++++-------- src/nbody/src/blocking/smp/utils.c | 9 ++-- src/nbody/src/common/common.c | 52 ++++++++++---------- src/nbody/src/common/common.h | 10 ++-- 8 files changed, 69 insertions(+), 61 deletions(-) diff --git a/src/nbody/CMakeLists.txt b/src/nbody/CMakeLists.txt index 8f2be92..7f4b4e2 100644 --- a/src/nbody/CMakeLists.txt +++ b/src/nbody/CMakeLists.txt @@ -14,6 +14,9 @@ set(SMP_SOURCES mk_bench(b6_nbody_seq_plain) target_sources(b6_nbody_seq_plain PRIVATE ${PLAIN_SOURCES} src/plain/solver_seq.c) +mk_bench(b6_nbody_seq) +target_sources(b6_nbody_seq PRIVATE ${SMP_SOURCES} src/blocking/smp/solver_seq.c) + find_package(OpenMP) if (OPENMP_FOUND) mk_bench(b6_nbody_omp_plain) diff --git a/src/nbody/src/blocking/common/common_utils.c b/src/nbody/src/blocking/common/common_utils.c index 9253796..9600293 100644 --- a/src/nbody/src/blocking/common/common_utils.c +++ b/src/nbody/src/blocking/common/common_utils.c @@ -19,10 +19,10 @@ void nbody_particle_init(const nbody_conf_t *conf, particles_block_t *part) { for (int i = 0; i < BLOCK_SIZE; i++){ - part->position_x[i] = conf->domain_size_x * ((float)random() / ((float)RAND_MAX + 1.0)); - part->position_y[i] = conf->domain_size_y * ((float)random() / ((float)RAND_MAX + 1.0)); - part->position_z[i] = conf->domain_size_z * ((float)random() / ((float)RAND_MAX + 1.0)); - part->mass[i] = conf->mass_maximum * ((float)random() / ((float)RAND_MAX + 1.0)); + part->position_x[i] = conf->domain_size_x * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->position_y[i] = conf->domain_size_y * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->position_z[i] = conf->domain_size_z * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->mass[i] = conf->mass_maximum * ((float)rand() / ((float)RAND_MAX + 1.0)); part->weight[i] = gravitational_constant * part->mass[i]; } } diff --git a/src/nbody/src/blocking/common/nbody.h b/src/nbody/src/blocking/common/nbody.h index b6c8d13..51b6fe6 100644 --- a/src/nbody/src/blocking/common/nbody.h +++ b/src/nbody/src/blocking/common/nbody.h @@ -44,7 +44,7 @@ typedef struct nbody_file_t nbody_file_t; typedef struct nbody_t nbody_t; // Solver function -void nbody_solve(nbody_t *nbody, const int num_blocks, const int timesteps, const float time_interval); +void nbody_solve(nbody_t *nbody, const int blocksize, const int num_blocks, const int timesteps, const float time_interval); // Auxiliary functions nbody_t nbody_setup(const nbody_conf_t *conf); diff --git a/src/nbody/src/blocking/smp/main.c b/src/nbody/src/blocking/smp/main.c index 55c0ab4..d631bd8 100644 --- a/src/nbody/src/blocking/smp/main.c +++ b/src/nbody/src/blocking/smp/main.c @@ -28,7 +28,7 @@ int main(int argc, char** argv) nbody_t nbody = nbody_setup(&conf); double start = get_time(); - nbody_solve(&nbody, conf.num_blocks, conf.timesteps, conf.time_interval); + nbody_solve(&nbody, conf.blocksize, conf.num_blocks, conf.timesteps, conf.time_interval); double end = get_time(); nbody_stats(&nbody, &conf, end - start); diff --git a/src/nbody/src/blocking/smp/solver_seq.c b/src/nbody/src/blocking/smp/solver_seq.c index 6b767f9..a3ae2c9 100644 --- a/src/nbody/src/blocking/smp/solver_seq.c +++ b/src/nbody/src/blocking/smp/solver_seq.c @@ -13,12 +13,12 @@ #include #include -static void calculate_forces(forces_block_t *forces, const particles_block_t *particles, const int num_blocks); -static void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval); -static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2); -static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval); +static void calculate_forces(forces_block_t *forces, const particles_block_t *particles, const int blocksize, const int num_blocks); +static void update_particles(particles_block_t *particles, forces_block_t *forces, const int blocksize, const int num_blocks, const float time_interval); +static void calculate_forces_block(forces_block_t *forces, const int blocksize, const particles_block_t *block1, const particles_block_t *block2); +static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval, const int blocksize); -void nbody_solve(nbody_t *nbody, const int num_blocks, const int timesteps, const float time_interval) +void nbody_solve(nbody_t *nbody, const int blocksize, const int num_blocks, const int timesteps, const float time_interval) { assert(nbody != NULL); @@ -26,21 +26,22 @@ void nbody_solve(nbody_t *nbody, const int num_blocks, const int timesteps, cons forces_block_t *forces = nbody->forces; for (int t = 0; t < timesteps; t++) { - calculate_forces(forces, particles, num_blocks); - update_particles(particles, forces, num_blocks, time_interval); + calculate_forces(forces, particles, blocksize, num_blocks); + update_particles(particles, forces, blocksize, num_blocks, time_interval); } } -void calculate_forces_N2(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +void calculate_forces_N2(forces_block_t *forces, const particles_block_t *particles, const int blocksize, const int num_blocks) { for (int i = 0; i < num_blocks; i++) { for (int j = 0; j < num_blocks; j++) { - calculate_forces_block(forces+i, particles+i, particles+j); + calculate_forces_block(forces+i, blocksize, particles+i, particles+j); } } } -void calculate_forces_NlogN(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +#if 0 +static void calculate_forces_NlogN(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) { for (int i = 0; i < num_blocks; i++) { for (int j = 0; j < LOG2(num_blocks); j++) { @@ -49,21 +50,22 @@ void calculate_forces_NlogN(forces_block_t *forces, const particles_block_t *par } } -void calculate_forces_N(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +static void calculate_forces_N(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) { for (int i = 0; i < num_blocks - 1; i++) { calculate_forces_block(forces+i, particles+i, particles+i+1); } } +#endif -void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval) +void update_particles(particles_block_t *particles, forces_block_t *forces, const int blocksize, const int num_blocks, const float time_interval) { for (int i = 0; i < num_blocks; i++) { - update_particles_block(particles+i, forces+i, time_interval); + update_particles_block(particles+i, forces+i, time_interval, blocksize); } } -static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2) +static void calculate_forces_block(forces_block_t *forces, const int blocksize, const particles_block_t *block1, const particles_block_t *block2) { float *x = forces->x; float *y = forces->y; @@ -80,9 +82,9 @@ static void calculate_forces_block(forces_block_t *forces, const particles_block const float *pos_z2 = block2->position_z; const float *mass2 = block2->mass; - for (int i = 0; i < BLOCK_SIZE; i++) { + for (int i = 0; i < blocksize; i++) { float fx = x[i], fy = y[i], fz = z[i]; - for (int j = 0; j < BLOCK_SIZE; j++) { + for (int j = 0; j < blocksize; j++) { const float diff_x = pos_x2[j] - pos_x1[i]; const float diff_y = pos_y2[j] - pos_y1[i]; const float diff_z = pos_z2[j] - pos_z1[i]; @@ -104,9 +106,9 @@ static void calculate_forces_block(forces_block_t *forces, const particles_block } } -void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval) +void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval, const int blocksize) { - for (int e = 0; e < BLOCK_SIZE; e++){ + for (int e = 0; e < blocksize; e++){ const float mass = particles->mass[e]; const float velocity_x = particles->velocity_x[e]; const float velocity_y = particles->velocity_y[e]; @@ -138,9 +140,11 @@ void update_particles_block(particles_block_t *particles, forces_block_t *forces void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) { - int particles = nbody->num_blocks * BLOCK_SIZE; + (void) conf; + + int particles = nbody->num_blocks * conf->blocksize; printf("bigo, %s, timesteps, %d, total_particles, %d, block_size, %d, blocks, %d, time, %.2f, performance, %.2f\n", - TOSTRING(BIGO), nbody->timesteps, particles, BLOCK_SIZE, nbody->num_blocks, + TOSTRING(BIGO), nbody->timesteps, particles, conf->blocksize, nbody->num_blocks, time, nbody_compute_throughput(particles, nbody->timesteps, time) ); } diff --git a/src/nbody/src/blocking/smp/utils.c b/src/nbody/src/blocking/smp/utils.c index 3e70e9d..0aa5dd0 100644 --- a/src/nbody/src/blocking/smp/utils.c +++ b/src/nbody/src/blocking/smp/utils.c @@ -7,6 +7,7 @@ #include "blocking/smp/nbody.h" +#undef NDEBUG #include #include #include @@ -23,7 +24,7 @@ #include -void nbody_generate_particles(const nbody_conf_t *conf, const nbody_file_t *file) +static void nbody_generate_particles(const nbody_conf_t *conf, const nbody_file_t *file) { char fname[1024]; sprintf(fname, "%s.in", file->name); @@ -87,7 +88,7 @@ void nbody_check(const nbody_t *nbody) assert(!err); } -nbody_file_t nbody_setup_file(const nbody_conf_t *conf) +static nbody_file_t nbody_setup_file(const nbody_conf_t *conf) { nbody_file_t file; file.size = conf->num_blocks * sizeof(particles_block_t); @@ -96,8 +97,10 @@ nbody_file_t nbody_setup_file(const nbody_conf_t *conf) return file; } -particles_block_t * nbody_load_particles(const nbody_conf_t *conf, const nbody_file_t *file) +static particles_block_t *nbody_load_particles(const nbody_conf_t *conf, const nbody_file_t *file) { + (void) conf; + char fname[1024]; sprintf(fname, "%s.in", file->name); diff --git a/src/nbody/src/common/common.c b/src/nbody/src/common/common.c index e44feee..4d941db 100644 --- a/src/nbody/src/common/common.c +++ b/src/nbody/src/common/common.c @@ -26,17 +26,17 @@ void * nbody_alloc(size_t size) static void nbody_print_usage(int argc, char **argv) { (void) argc; - fprintf(stderr, "Usage: %s <-p particles> <-t timesteps> [OPTION]...\n", argv[0]); - fprintf(stderr, "Parameters:\n"); - fprintf(stderr, " -p, --particles=PARTICLES\t\tuse PARTICLES as the total number of particles (default: 16384)\n"); - fprintf(stderr, " -t, --timesteps=TIMESTEPS\t\tuse TIMESTEPS as the number of timesteps (default: 10)\n\n"); - fprintf(stderr, "Optional parameters:\n"); - fprintf(stderr, " -f, --force-generation\t\t\t\talways force the generation of particles without the usage of files(disabled by default)\n"); - fprintf(stderr, " -c, --check\t\t\t\tcheck the correctness of the result (disabled by default)\n"); - fprintf(stderr, " -C, --no-check\t\t\tdo not check the correctness of the result\n"); - fprintf(stderr, " -o, --output\t\t\t\tsave the computed particles to the default output file (disabled by default)\n"); - fprintf(stderr, " -O, --no-output\t\t\tdo not save the computed particles to the default output file\n"); - fprintf(stderr, " -h, --help\t\t\t\tdisplay this help and exit\n\n"); + fprintf(stderr, "Usage: %s [OPTIONS]...\n", argv[0]); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -p, --particles=PARTICLES use PARTICLES as the total number of particles (default: %d)\n", default_num_particles); + fprintf(stderr, " -t, --timesteps=TIMESTEPS use TIMESTEPS as the number of timesteps (default: %d)\n\n", default_timesteps); + fprintf(stderr, " -b, --blocksize=BLOCKSIZE use BLOCKSIZE as the block size (default: %d)\n", default_blocksize); + fprintf(stderr, " -f, --force-generation always force the generation of particles without the usage of files (disabled by default)\n"); + fprintf(stderr, " -c, --check check the correctness of the result (disabled by default)\n"); + fprintf(stderr, " -C, --no-check do not check the correctness of the result\n"); + fprintf(stderr, " -o, --output save the computed particles to the default output file (disabled by default)\n"); + fprintf(stderr, " -O, --no-output do not save the computed particles to the default output file\n"); + fprintf(stderr, " -h, --help display this help and exit\n\n"); } nbody_conf_t nbody_get_conf(int argc, char **argv) @@ -50,27 +50,29 @@ nbody_conf_t nbody_get_conf(int argc, char **argv) conf.seed = default_seed; conf.name = default_name; conf.num_particles = default_num_particles; - conf.num_blocks = conf.num_particles / BLOCK_SIZE; + conf.blocksize = default_blocksize; + conf.num_blocks = conf.num_particles / conf.blocksize; conf.timesteps = default_timesteps; conf.save_result = default_save_result; conf.check_result = default_check_result; conf.force_generation = default_force_generation; static struct option long_options[] = { - {"particles", required_argument, 0, 'p'}, - {"timesteps", required_argument, 0, 't'}, - {"force-generation", no_argument, 0, 'f'}, - {"check", no_argument, 0, 'c'}, - {"no-check", no_argument, 0, 'C'}, - {"output", no_argument, 0, 'o'}, - {"no-output", no_argument, 0, 'O'}, - {"help", no_argument, 0, 'h'}, + {"particles", required_argument, 0, 'p'}, + {"timesteps", required_argument, 0, 't'}, + {"blocksize", required_argument, 0, 'b'}, + {"force-generation", no_argument, 0, 'f'}, + {"check", no_argument, 0, 'c'}, + {"no-check", no_argument, 0, 'C'}, + {"output", no_argument, 0, 'o'}, + {"no-output", no_argument, 0, 'O'}, + {"help", no_argument, 0, 'h'}, {0, 0, 0, 0} }; int c; int index; - while ((c = getopt_long(argc, argv, "hfoOcCp:t:", long_options, &index)) != -1) { + while ((c = getopt_long(argc, argv, "hfoOcCp:t:b:", long_options, &index)) != -1) { switch (c) { case 'h': nbody_print_usage(argc, argv); @@ -93,6 +95,9 @@ nbody_conf_t nbody_get_conf(int argc, char **argv) case 'p': conf.num_particles = atoi(optarg); break; + case 'b': + conf.blocksize = atoi(optarg); + break; case 't': conf.timesteps = atoi(optarg); break; @@ -103,11 +108,6 @@ nbody_conf_t nbody_get_conf(int argc, char **argv) } } - if (!conf.num_particles || !conf.timesteps) { - nbody_print_usage(argc, argv); - exit(1); - } - return conf; } diff --git a/src/nbody/src/common/common.h b/src/nbody/src/common/common.h index 2deef81..2919a5f 100644 --- a/src/nbody/src/common/common.h +++ b/src/nbody/src/common/common.h @@ -10,10 +10,6 @@ #include -#ifndef BLOCK_SIZE -#define BLOCK_SIZE 2048 -#endif - // BIGO definition #ifndef BIGO #define BIGO N2 @@ -44,8 +40,9 @@ static const float default_mass_maximum = 1.0e+28; /* kg */ static const float default_time_interval = 1.0e+0; /* s */ static const int default_seed = 12345; static const char* default_name = "data/nbody"; -static const int default_num_particles = 0; -static const int default_timesteps = 0; +static const int default_blocksize = 2048; +static const int default_num_particles = 16384; +static const int default_timesteps = 10; static const int default_save_result = 0; static const int default_check_result = 0; static const int default_force_generation = 0; @@ -59,6 +56,7 @@ typedef struct { int seed; const char* name; int num_particles; + int blocksize; int num_blocks; int timesteps; int save_result; -- GitLab From 595fe97a821e74c8cf9cba9af3bd8653fcc7b2f7 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Wed, 20 Nov 2024 15:06:23 +0100 Subject: [PATCH 3/6] Make blocks dynamic for nbody --- src/nbody/src/blocking/common/common_utils.c | 8 +- src/nbody/src/blocking/common/nbody.h | 31 +++--- src/nbody/src/blocking/smp/main.c | 7 +- src/nbody/src/blocking/smp/nbody.h | 3 + src/nbody/src/blocking/smp/utils.c | 106 ++++++++++++++++--- src/nbody/src/common/common.c | 3 +- 6 files changed, 114 insertions(+), 44 deletions(-) diff --git a/src/nbody/src/blocking/common/common_utils.c b/src/nbody/src/blocking/common/common_utils.c index 9600293..98273ef 100644 --- a/src/nbody/src/blocking/common/common_utils.c +++ b/src/nbody/src/blocking/common/common_utils.c @@ -18,7 +18,7 @@ void nbody_particle_init(const nbody_conf_t *conf, particles_block_t *part) { - for (int i = 0; i < BLOCK_SIZE; i++){ + for (int i = 0; i < conf->blocksize; i++) { part->position_x[i] = conf->domain_size_x * ((float)rand() / ((float)RAND_MAX + 1.0)); part->position_y[i] = conf->domain_size_y * ((float)rand() / ((float)RAND_MAX + 1.0)); part->position_z[i] = conf->domain_size_z * ((float)rand() / ((float)RAND_MAX + 1.0)); @@ -27,12 +27,12 @@ void nbody_particle_init(const nbody_conf_t *conf, particles_block_t *part) } } -int nbody_compare_particles(const particles_block_t *local, const particles_block_t *reference, int num_blocks) +int nbody_compare_particles(const particles_block_t *local, const particles_block_t *reference, int blocksize, int num_blocks) { double error = 0.0; int count = 0; for (int i = 0; i < num_blocks; i++) { - for (int e = 0; e < BLOCK_SIZE; e++) { + for (int e = 0; e < blocksize; e++) { if ((local[i].position_x[e] != reference[i].position_x[e]) || (local[i].position_y[e] != reference[i].position_y[e]) || (local[i].position_z[e] != reference[i].position_z[e])) { @@ -45,7 +45,7 @@ int nbody_compare_particles(const particles_block_t *local, const particles_bloc } double relative_error = (count != 0) ? error / (3.0 * count) : 0.0; - if ((count * 100.0) / (num_blocks * BLOCK_SIZE) > 0.6 || relative_error > TOLERATED_ERROR) { + if ((count * 100.0) / (num_blocks * blocksize) > 0.6 || relative_error > TOLERATED_ERROR) { return 0; } return 1; diff --git a/src/nbody/src/blocking/common/nbody.h b/src/nbody/src/blocking/common/nbody.h index 51b6fe6..4d713ed 100644 --- a/src/nbody/src/blocking/common/nbody.h +++ b/src/nbody/src/blocking/common/nbody.h @@ -14,29 +14,24 @@ #include #include -// Block size definition -#ifndef BLOCK_SIZE -#define BLOCK_SIZE 2048 -#endif - -#define MIN_PARTICLES (4096 * BLOCK_SIZE / sizeof(particles_block_t)) +//#define MIN_PARTICLES (4096 * BLOCK_SIZE / sizeof(particles_block_t)) // Solver structures typedef struct { - float position_x[BLOCK_SIZE]; /* m */ - float position_y[BLOCK_SIZE]; /* m */ - float position_z[BLOCK_SIZE]; /* m */ - float velocity_x[BLOCK_SIZE]; /* m/s */ - float velocity_y[BLOCK_SIZE]; /* m/s */ - float velocity_z[BLOCK_SIZE]; /* m/s */ - float mass[BLOCK_SIZE]; /* kg */ - float weight[BLOCK_SIZE]; + float *position_x; /* m */ + float *position_y; /* m */ + float *position_z; /* m */ + float *velocity_x; /* m/s */ + float *velocity_y; /* m/s */ + float *velocity_z; /* m/s */ + float *mass; /* kg */ + float *weight; } particles_block_t; typedef struct { - float x[BLOCK_SIZE]; /* x */ - float y[BLOCK_SIZE]; /* y */ - float z[BLOCK_SIZE]; /* z */ + float *x; /* x */ + float *y; /* y */ + float *z; /* z */ } forces_block_t; // Forward declaration @@ -53,7 +48,7 @@ void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time); void nbody_save_particles(const nbody_t *nbody); void nbody_free(nbody_t *nbody); void nbody_check(const nbody_t *nbody); -int nbody_compare_particles(const particles_block_t *local, const particles_block_t *reference, int num_blocks); +int nbody_compare_particles(const particles_block_t *local, const particles_block_t *reference, int blocksize, int num_blocks); #endif // NBODY_H diff --git a/src/nbody/src/blocking/smp/main.c b/src/nbody/src/blocking/smp/main.c index d631bd8..333cb15 100644 --- a/src/nbody/src/blocking/smp/main.c +++ b/src/nbody/src/blocking/smp/main.c @@ -13,16 +13,15 @@ #include #include - int main(int argc, char** argv) { nbody_conf_t conf = nbody_get_conf(argc, argv); - - conf.num_particles = ROUNDUP(conf.num_particles, MIN_PARTICLES); + + //conf.num_particles = ROUNDUP(conf.num_particles, MIN_PARTICLES); assert(conf.num_particles >= BLOCK_SIZE); assert(conf.timesteps > 0); - conf.num_blocks = conf.num_particles / BLOCK_SIZE; + conf.num_blocks = conf.num_particles / conf.blocksize; assert(conf.num_blocks > 0); nbody_t nbody = nbody_setup(&conf); diff --git a/src/nbody/src/blocking/smp/nbody.h b/src/nbody/src/blocking/smp/nbody.h index 6b34fd1..6611ee5 100644 --- a/src/nbody/src/blocking/smp/nbody.h +++ b/src/nbody/src/blocking/smp/nbody.h @@ -17,8 +17,11 @@ struct nbody_file_t { }; struct nbody_t { + float *particles_map; particles_block_t *particles; + float *forces_map; forces_block_t *forces; + int blocksize; int num_blocks; int timesteps; nbody_file_t file; diff --git a/src/nbody/src/blocking/smp/utils.c b/src/nbody/src/blocking/smp/utils.c index 0aa5dd0..da8c299 100644 --- a/src/nbody/src/blocking/smp/utils.c +++ b/src/nbody/src/blocking/smp/utils.c @@ -23,7 +23,7 @@ #include #include - +/* static void nbody_generate_particles(const nbody_conf_t *conf, const nbody_file_t *file) { char fname[1024]; @@ -59,6 +59,7 @@ static void nbody_generate_particles(const nbody_conf_t *conf, const nbody_file_ err = close(fd); assert(!err); } +*/ void nbody_check(const nbody_t *nbody) { @@ -75,7 +76,7 @@ void nbody_check(const nbody_t *nbody) particles_block_t *reference = mmap(NULL, nbody->file.size, PROT_READ, MAP_SHARED, fd, 0); assert(reference != MAP_FAILED); - if (nbody_compare_particles(nbody->particles, reference, nbody->num_blocks)) { + if (nbody_compare_particles(nbody->particles, reference, nbody->blocksize, nbody->num_blocks)) { printf("Result validation: OK\n"); } else { printf("Result validation: ERROR\n"); @@ -93,10 +94,10 @@ static nbody_file_t nbody_setup_file(const nbody_conf_t *conf) nbody_file_t file; file.size = conf->num_blocks * sizeof(particles_block_t); - sprintf(file.name, "%s-%s-%d-%d-%d", conf->name, TOSTRING(BIGO), conf->num_blocks * BLOCK_SIZE, BLOCK_SIZE, conf->timesteps); + sprintf(file.name, "%s-%s-%d-%d-%d", conf->name, TOSTRING(BIGO), conf->num_blocks * conf->blocksize, conf->blocksize, conf->timesteps); return file; } - +/* static particles_block_t *nbody_load_particles(const nbody_conf_t *conf, const nbody_file_t *file) { (void) conf; @@ -115,36 +116,107 @@ static particles_block_t *nbody_load_particles(const nbody_conf_t *conf, const n return ptr; } +*/ + +static void alloc_particles(const nbody_conf_t *conf, nbody_t *nbody) +{ + size_t bs = (size_t) conf->blocksize; + size_t n = (size_t) conf->num_blocks; + size_t block_floats = bs * (3 + 3 + 2); + size_t block_bytes = block_floats * sizeof(float); + size_t alloc_bytes = n * block_bytes; + + nbody->particles_map = nbody_alloc(alloc_bytes); + nbody->particles = nbody_alloc(n * sizeof(particles_block_t)); + + float *m = nbody->particles_map; + + /* Setup the particles_block_t pointers, so they point to the big chunk + * in particle_map: + * + * [ particles[0] ] [ particles[1] ] ... + * [ px ][ py ][ pz ] [... ] + * | \_________________ + * | \ + * v v + * [ px0 ] [ px1 ] [ ... ] [ py0 ] [ ... ] ... + * [ particles_map ] + */ + + for (int i = 0; i < conf->num_blocks; i++) { + particles_block_t *p = &nbody->particles[i]; + float *b = &m[block_floats * i]; + p->position_x = &b[bs * 0]; + p->position_y = &b[bs * 1]; + p->position_z = &b[bs * 2]; + p->velocity_x = &b[bs * 3]; + p->velocity_y = &b[bs * 4]; + p->velocity_z = &b[bs * 5]; + p->mass = &b[bs * 6]; + p->weight = &b[bs * 7]; + } +} + +static void alloc_forces(const nbody_conf_t *conf, nbody_t *nbody) +{ + size_t bs = (size_t) conf->blocksize; + size_t n = (size_t) conf->num_blocks; + size_t block_floats = bs * 3; + size_t block_bytes = block_floats * sizeof(float); + size_t alloc_bytes = n * block_bytes; + + nbody->forces_map = nbody_alloc(alloc_bytes); + nbody->forces = nbody_alloc(n * sizeof(forces_block_t)); + + float *m = nbody->forces_map; + + /* Setup the forces_block_t pointers, so they point to the big chunk + * in forces_map: + * + * [ forces[0] ] [ forces[1] ] ... + * [ x ][ y ][ z ] + * | \________________ + * | \ + * v v + * [ x0 ] [ x1 ] [ ... ] [ y0 ] [ y1 ] [ ... ] + * [ forces_map ] + */ + + for (int i = 0; i < conf->num_blocks; i++) { + forces_block_t *f = &nbody->forces[i]; + float *b = &m[block_floats * i]; + f->x = &b[bs * 0]; + f->y = &b[bs * 1]; + f->z = &b[bs * 2]; + } +} nbody_t nbody_setup(const nbody_conf_t *conf) { nbody_t nbody; nbody.timesteps = conf->timesteps; + nbody.blocksize = conf->blocksize; nbody.num_blocks = conf->num_blocks; nbody_file_t file = nbody_setup_file(conf); nbody.file = file; if (conf->force_generation) { - nbody.particles = nbody_alloc(conf->num_blocks * sizeof(particles_block_t)); - assert(nbody.particles != NULL); + alloc_particles(conf, &nbody); - for (int i = 0; i < conf->num_blocks; i++) { + for (int i = 0; i < conf->num_blocks; i++) nbody_particle_init(conf, nbody.particles+i); - } - - nbody.forces = nbody_alloc(conf->num_blocks * sizeof(forces_block_t)); - assert(nbody.forces != NULL); - } - else { + } else { + fprintf(stderr, "not implemented\n"); + exit(1); + /* nbody_generate_particles(conf, &file); - nbody.particles = nbody_load_particles(conf, &file); assert(nbody.particles != NULL); - - nbody.forces = nbody_alloc(conf->num_blocks * sizeof(forces_block_t)); - assert(nbody.forces != NULL); + */ } + + alloc_forces(conf, &nbody); return nbody; } diff --git a/src/nbody/src/common/common.c b/src/nbody/src/common/common.c index 4d941db..7608100 100644 --- a/src/nbody/src/common/common.c +++ b/src/nbody/src/common/common.c @@ -19,7 +19,8 @@ void * nbody_alloc(size_t size) { void *addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - assert(addr != MAP_FAILED); + if (addr == MAP_FAILED) + abort(); return addr; } -- GitLab From 3cc7dd3c63d0a3e32fd8c243d13f71c40f1d2084 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Wed, 20 Nov 2024 15:40:11 +0100 Subject: [PATCH 4/6] Add OpenMP block-based nbody --- src/nbody/CMakeLists.txt | 8 +- src/nbody/src/blocking/smp/main.c | 4 +- src/nbody/src/blocking/smp/solver_omp.c | 154 ++++++++++++++++++++++++ src/nbody/src/blocking/smp/solver_seq.c | 9 +- src/nbody/src/blocking/smp/utils.c | 1 + 5 files changed, 172 insertions(+), 4 deletions(-) create mode 100644 src/nbody/src/blocking/smp/solver_omp.c diff --git a/src/nbody/CMakeLists.txt b/src/nbody/CMakeLists.txt index 7f4b4e2..8715385 100644 --- a/src/nbody/CMakeLists.txt +++ b/src/nbody/CMakeLists.txt @@ -13,13 +13,19 @@ set(SMP_SOURCES mk_bench(b6_nbody_seq_plain) target_sources(b6_nbody_seq_plain PRIVATE ${PLAIN_SOURCES} src/plain/solver_seq.c) +target_link_libraries(b6_nbody_seq_plain PRIVATE m) mk_bench(b6_nbody_seq) target_sources(b6_nbody_seq PRIVATE ${SMP_SOURCES} src/blocking/smp/solver_seq.c) +target_link_libraries(b6_nbody_seq PRIVATE m) find_package(OpenMP) if (OPENMP_FOUND) mk_bench(b6_nbody_omp_plain) target_sources(b6_nbody_omp_plain PRIVATE ${PLAIN_SOURCES} src/plain/solver_omp.c) - target_link_libraries(b6_nbody_omp_plain PRIVATE OpenMP::OpenMP_C) + target_link_libraries(b6_nbody_omp_plain PRIVATE m OpenMP::OpenMP_C) + + mk_bench(b6_nbody_omp) + target_sources(b6_nbody_omp PRIVATE ${SMP_SOURCES} src/blocking/smp/solver_omp.c) + target_link_libraries(b6_nbody_omp PRIVATE m OpenMP::OpenMP_C) endif() diff --git a/src/nbody/src/blocking/smp/main.c b/src/nbody/src/blocking/smp/main.c index 333cb15..700fcb4 100644 --- a/src/nbody/src/blocking/smp/main.c +++ b/src/nbody/src/blocking/smp/main.c @@ -18,8 +18,8 @@ int main(int argc, char** argv) nbody_conf_t conf = nbody_get_conf(argc, argv); //conf.num_particles = ROUNDUP(conf.num_particles, MIN_PARTICLES); - assert(conf.num_particles >= BLOCK_SIZE); - assert(conf.timesteps > 0); + //assert(conf.num_particles >= BLOCK_SIZE); + //assert(conf.timesteps > 0); conf.num_blocks = conf.num_particles / conf.blocksize; assert(conf.num_blocks > 0); diff --git a/src/nbody/src/blocking/smp/solver_omp.c b/src/nbody/src/blocking/smp/solver_omp.c new file mode 100644 index 0000000..2a3307f --- /dev/null +++ b/src/nbody/src/blocking/smp/solver_omp.c @@ -0,0 +1,154 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/smp/nbody.h" + +#include +#include +#include +#include +#include + +#include + +static void calculate_forces(forces_block_t *forces, const particles_block_t *particles, const int blocksize, const int num_blocks); +static void update_particles(particles_block_t *particles, forces_block_t *forces, const int blocksize, const int num_blocks, const float time_interval); +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int blocksize); +static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval, const int blocksize); + +void nbody_solve(nbody_t *nbody, const int blocksize, const int num_blocks, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + + particles_block_t *particles = nbody->particles; + forces_block_t *forces = nbody->forces; + + #pragma omp parallel + #pragma omp single + for (int t = 0; t < timesteps; t++) { + calculate_forces(forces, particles, blocksize, num_blocks); + + update_particles(particles, forces, blocksize, num_blocks, time_interval); + } + + #pragma omp taskwait +} + +void calculate_forces_N2(forces_block_t *forces, const particles_block_t *particles, const int blocksize, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < num_blocks; j++) { + #pragma omp task depend(in: particles[i], particles[j]) depend(inout: forces[i]) + calculate_forces_block(forces+i, particles+i, particles+j, blocksize); + } + } +} + +void update_particles(particles_block_t *particles, forces_block_t *forces, const int blocksize, const int num_blocks, const float time_interval) +{ + for (int i = 0; i < num_blocks; i++) { + #pragma omp task depend(inout: particles[i], forces[i]) + update_particles_block(particles+i, forces+i, time_interval, blocksize); + } +} + +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int blocksize) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const int same_block = (block1 == block2); + const float *pos_x1 = block1->position_x; + const float *pos_y1 = block1->position_y; + const float *pos_z1 = block1->position_z; + const float *mass1 = block1->mass ; + + const float *pos_x2 = block2->position_x; + const float *pos_y2 = block2->position_y; + const float *pos_z2 = block2->position_z; + const float *mass2 = block2->mass; + + for (int i = 0; i < blocksize; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < blocksize; j++) { + const float diff_x = pos_x2[j] - pos_x1[i]; + const float diff_y = pos_y2[j] - pos_y1[i]; + const float diff_z = pos_z2[j] - pos_z1[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (!same_block || distance_squared != 0.0f) { + force = (mass1[i] / (distance_squared * distance)) * (mass2[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval, const int blocksize) +{ + for (int e = 0; e < blocksize; e++){ + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + for (int e = 0; e < blocksize; e++) + forces->x[e] = 0; + + for (int e = 0; e < blocksize; e++) + forces->y[e] = 0; + + for (int e = 0; e < blocksize; e++) + forces->z[e] = 0; +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + (void) conf; + + int particles = nbody->num_blocks * nbody->blocksize; + + int nthreads; + #pragma omp parallel + nthreads = omp_get_num_threads(); + + printf("bigo, %s, threads, %d, timesteps, %d, total_particles, %d, block_size, %d, blocks, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), nthreads, nbody->timesteps, particles, nbody->blocksize, + nbody->num_blocks, time, nbody_compute_throughput(particles, nbody->timesteps, time) + ); +} + diff --git a/src/nbody/src/blocking/smp/solver_seq.c b/src/nbody/src/blocking/smp/solver_seq.c index a3ae2c9..9d788a6 100644 --- a/src/nbody/src/blocking/smp/solver_seq.c +++ b/src/nbody/src/blocking/smp/solver_seq.c @@ -135,7 +135,14 @@ void update_particles_block(particles_block_t *particles, forces_block_t *forces particles->position_z[e] = position_z + position_change_z; } - memset(forces, 0, sizeof(forces_block_t)); + for (int e = 0; e < blocksize; e++) + forces->x[e] = 0; + + for (int e = 0; e < blocksize; e++) + forces->y[e] = 0; + + for (int e = 0; e < blocksize; e++) + forces->z[e] = 0; } void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) diff --git a/src/nbody/src/blocking/smp/utils.c b/src/nbody/src/blocking/smp/utils.c index da8c299..5826510 100644 --- a/src/nbody/src/blocking/smp/utils.c +++ b/src/nbody/src/blocking/smp/utils.c @@ -201,6 +201,7 @@ nbody_t nbody_setup(const nbody_conf_t *conf) nbody_file_t file = nbody_setup_file(conf); nbody.file = file; + printf("force gen=%d\n", conf->force_generation); if (conf->force_generation) { alloc_particles(conf, &nbody); -- GitLab From 8312e7265de532dab4edc770322fc1f4641f154c Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Thu, 21 Nov 2024 10:30:07 +0100 Subject: [PATCH 5/6] Add OpenMP-V variant for nbody --- cmake/CheckOpenmpv.cmake | 7 +++++++ src/nbody/CMakeLists.txt | 9 +++++++++ src/nbody/src/blocking/smp/solver_omp.c | 4 ++-- 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 cmake/CheckOpenmpv.cmake diff --git a/cmake/CheckOpenmpv.cmake b/cmake/CheckOpenmpv.cmake new file mode 100644 index 0000000..f1de0cd --- /dev/null +++ b/cmake/CheckOpenmpv.cmake @@ -0,0 +1,7 @@ +# Copyright (c) 2022-2023 Barcelona Supercomputing Center (BSC) +# SPDX-License-Identifier: GPL-3.0-or-later + +include(CheckCCompilerFlag) + +#set(CMAKE_REQUIRED_LINK_OPTIONS "-fompss-2") +check_c_compiler_flag("-fopenmp=libompv" OPENMPV_COMPILER_FOUND) diff --git a/src/nbody/CMakeLists.txt b/src/nbody/CMakeLists.txt index 8715385..b969a86 100644 --- a/src/nbody/CMakeLists.txt +++ b/src/nbody/CMakeLists.txt @@ -29,3 +29,12 @@ if (OPENMP_FOUND) target_sources(b6_nbody_omp PRIVATE ${SMP_SOURCES} src/blocking/smp/solver_omp.c) target_link_libraries(b6_nbody_omp PRIVATE m OpenMP::OpenMP_C) endif() + +include(CheckOpenmpv) +if (OPENMPV_COMPILER_FOUND) + mk_bench(b6_nbody_ompv) + target_sources(b6_nbody_ompv PRIVATE ${SMP_SOURCES} src/blocking/smp/solver_omp.c) + target_link_libraries(b6_nbody_ompv PRIVATE m) + target_compile_options(b6_nbody_ompv PRIVATE "-fopenmp=libompv") + target_link_options(b6_nbody_ompv PRIVATE "-fopenmp=libompv") +endif() diff --git a/src/nbody/src/blocking/smp/solver_omp.c b/src/nbody/src/blocking/smp/solver_omp.c index 2a3307f..2996390 100644 --- a/src/nbody/src/blocking/smp/solver_omp.c +++ b/src/nbody/src/blocking/smp/solver_omp.c @@ -42,7 +42,7 @@ void calculate_forces_N2(forces_block_t *forces, const particles_block_t *partic { for (int i = 0; i < num_blocks; i++) { for (int j = 0; j < num_blocks; j++) { - #pragma omp task depend(in: particles[i], particles[j]) depend(inout: forces[i]) + #pragma omp task depend(in: particles[i], particles[j]) depend(inout: forces[i]) label("calculate_forces_N2") calculate_forces_block(forces+i, particles+i, particles+j, blocksize); } } @@ -51,7 +51,7 @@ void calculate_forces_N2(forces_block_t *forces, const particles_block_t *partic void update_particles(particles_block_t *particles, forces_block_t *forces, const int blocksize, const int num_blocks, const float time_interval) { for (int i = 0; i < num_blocks; i++) { - #pragma omp task depend(inout: particles[i], forces[i]) + #pragma omp task depend(inout: particles[i], forces[i]) label("update_particles") update_particles_block(particles+i, forces+i, time_interval, blocksize); } } -- GitLab From 19477dfab6efe64fb4bc54215f0caccddcb4a002 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Thu, 21 Nov 2024 14:38:48 +0100 Subject: [PATCH 6/6] Add -fopenmp=libompv to link for checking --- cmake/CheckOpenmpv.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/CheckOpenmpv.cmake b/cmake/CheckOpenmpv.cmake index f1de0cd..a62c1c4 100644 --- a/cmake/CheckOpenmpv.cmake +++ b/cmake/CheckOpenmpv.cmake @@ -3,5 +3,5 @@ include(CheckCCompilerFlag) -#set(CMAKE_REQUIRED_LINK_OPTIONS "-fompss-2") +set(CMAKE_REQUIRED_LINK_OPTIONS "-fopenmp=libompv") check_c_compiler_flag("-fopenmp=libompv" OPENMPV_COMPILER_FOUND) -- GitLab