diff --git a/cmake/CheckOpenmpv.cmake b/cmake/CheckOpenmpv.cmake new file mode 100644 index 0000000000000000000000000000000000000000..a62c1c40d350b8965ec5c05ed976d8bbb0608d3e --- /dev/null +++ b/cmake/CheckOpenmpv.cmake @@ -0,0 +1,7 @@ +# Copyright (c) 2022-2023 Barcelona Supercomputing Center (BSC) +# SPDX-License-Identifier: GPL-3.0-or-later + +include(CheckCCompilerFlag) + +set(CMAKE_REQUIRED_LINK_OPTIONS "-fopenmp=libompv") +check_c_compiler_flag("-fopenmp=libompv" OPENMPV_COMPILER_FOUND) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2bef6951580d45f63264d5b6ecce5b47c8b37d88..b11cb530648205eb12de011c74dd587692f8d496 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(bench6) add_subdirectory(ompss2) add_subdirectory(heat) +add_subdirectory(nbody) diff --git a/src/nbody/CMakeLists.txt b/src/nbody/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..b969a8626c406bcab87f787a371172956ecc96e9 --- /dev/null +++ b/src/nbody/CMakeLists.txt @@ -0,0 +1,40 @@ +include_directories(src) + +set(PLAIN_SOURCES + src/common/common.c + src/plain/utils.c + src/plain/main.c) + +set(SMP_SOURCES + src/common/common.c + src/blocking/common/common_utils.c + src/blocking/smp/utils.c + src/blocking/smp/main.c) + +mk_bench(b6_nbody_seq_plain) +target_sources(b6_nbody_seq_plain PRIVATE ${PLAIN_SOURCES} src/plain/solver_seq.c) +target_link_libraries(b6_nbody_seq_plain PRIVATE m) + +mk_bench(b6_nbody_seq) +target_sources(b6_nbody_seq PRIVATE ${SMP_SOURCES} src/blocking/smp/solver_seq.c) +target_link_libraries(b6_nbody_seq PRIVATE m) + +find_package(OpenMP) +if (OPENMP_FOUND) + mk_bench(b6_nbody_omp_plain) + target_sources(b6_nbody_omp_plain PRIVATE ${PLAIN_SOURCES} src/plain/solver_omp.c) + target_link_libraries(b6_nbody_omp_plain PRIVATE m OpenMP::OpenMP_C) + + mk_bench(b6_nbody_omp) + target_sources(b6_nbody_omp PRIVATE ${SMP_SOURCES} src/blocking/smp/solver_omp.c) + target_link_libraries(b6_nbody_omp PRIVATE m OpenMP::OpenMP_C) +endif() + +include(CheckOpenmpv) +if (OPENMPV_COMPILER_FOUND) + mk_bench(b6_nbody_ompv) + target_sources(b6_nbody_ompv PRIVATE ${SMP_SOURCES} src/blocking/smp/solver_omp.c) + target_link_libraries(b6_nbody_ompv PRIVATE m) + target_compile_options(b6_nbody_ompv PRIVATE "-fopenmp=libompv") + target_link_options(b6_nbody_ompv PRIVATE "-fopenmp=libompv") +endif() diff --git a/src/nbody/README.md b/src/nbody/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ed1b574fcc58133bdd5c980b9c779c2f0df504e1 --- /dev/null +++ b/src/nbody/README.md @@ -0,0 +1,23 @@ +# N-body + +## Introduction +An N-body simulation numerically approximates the evolution of a system of +bodies in which each body continuously interacts with every other body. A +familiar example is an astrophysical simulation in which each body represents a +galaxy or an individual star, and the bodies attract each other through the +gravitational force. + +N-body simulation arises in many other computational science problems as well. +For example, protein folding is studied using N-body simulation to calculate +electrostatic and van der Waals forces. Turbulent fluid flow simulation and +global illumination computation in computer graphics are other examples of +problems that use N-body simulation. + +## Available versions and building instructions + +TODO + +## Execution instructions + +TODO + diff --git a/src/nbody/scripts/granularity-tests.sh b/src/nbody/scripts/granularity-tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..7e2a068911de6d2caf5bd8d0d1637a7a89b75d43 --- /dev/null +++ b/src/nbody/scripts/granularity-tests.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +# Check if Makefile is in the default folder +cd ${0%/*} # Never assume the directory and always change it to where the script is located + +makefile_path=".." +if [ ! -f "${makefile_path}/Makefile" ]; then + read -p 'Makefile not found by default, please specify in which folder it is found (relative path): ' makefile_path +fi + +function usage { + echo "Usage: ./granularity-tests.sh [-bs blocksize] [-bo BIGO] [OPTIONAL_PARAMETERS]\n" + echo "Parameters:\n" + echo " -p PARTICLES: use PARTICLES as the total number of particles\n" + echo " -t TIMESTEPS: use TIMESTEPS as the number of timesteps\n\n" + echo "Optional parameters:\n" + echo " -bo BO - use BO as the BIGO (defaults to N2)\n" + echo " -bs BS - use BS as blocksize (defaults to 2048)\n" + echo " -f, - forcefully generate particles by avoiding creating files\n" + echo " -c, - check the correctness of the result (disabled by default)\n" + echo " -C, - do not check the correctness of the result\n" + echo " -o, - save the computed particles to the default output file (disabled by default)\n" + echo " -O, - do not save the computed particles to the default output file\n" + echo " -h, - display this help and exit\n\n" + exit 1 +} + + +##################### +# COLLECT ARGUMENTS # +##################### + +argc=$# +argv=($@) + +for (( j=0; j $logfile + error=$? + elif [[ $progname == *"_mpi_ompss"* ]]; then + mpiexec.hydra -n $nprocs -bind-to hwthread:$nthreadsxproc $prog -p $size -t $timesteps -c &> $logfile + error=$? + else + $prog -p $size -t $timesteps -c &> $logfile + error=$? + fi + + # Check the return value of the program + if [ "$error" -ne 0 ]; then + failed=$(($failed + 1)) + echo $progname FAILED + rm -f $logfile + continue; + fi + + # Result checking + grep -Fxq 'Result validation: OK' $logfile + error=$? + if [ "$error" -eq 0 ]; then + echo $progname PASSED + else + failed=$(($failed + 1)) + echo $progname FAILED + fi + rm -f $logfile +done + +echo --------------------------------- +if [ $failed -eq 0 ]; then + echo SUMMARY: ALL TESTS PASSED +else + echo SUMMARY: ${failed}/${total} TESTS FAILED +fi +echo --------------------------------- + +rm -f $logfile diff --git a/src/nbody/src/blocking/common/common_utils.c b/src/nbody/src/blocking/common/common_utils.c new file mode 100644 index 0000000000000000000000000000000000000000..98273ef5a2bc44d5ffa8e91ead852cdb03ba6d24 --- /dev/null +++ b/src/nbody/src/blocking/common/common_utils.c @@ -0,0 +1,53 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "nbody.h" + +#include +#include +#include +#include +#include +#include +#include + + +void nbody_particle_init(const nbody_conf_t *conf, particles_block_t *part) +{ + for (int i = 0; i < conf->blocksize; i++) { + part->position_x[i] = conf->domain_size_x * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->position_y[i] = conf->domain_size_y * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->position_z[i] = conf->domain_size_z * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->mass[i] = conf->mass_maximum * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->weight[i] = gravitational_constant * part->mass[i]; + } +} + +int nbody_compare_particles(const particles_block_t *local, const particles_block_t *reference, int blocksize, int num_blocks) +{ + double error = 0.0; + int count = 0; + for (int i = 0; i < num_blocks; i++) { + for (int e = 0; e < blocksize; e++) { + if ((local[i].position_x[e] != reference[i].position_x[e]) || + (local[i].position_y[e] != reference[i].position_y[e]) || + (local[i].position_z[e] != reference[i].position_z[e])) { + error += fabs(((local[i].position_x[e] - reference[i].position_x[e])*100.0) / reference[i].position_x[e]) + + fabs(((local[i].position_y[e] - reference[i].position_y[e])*100.0) / reference[i].position_y[e]) + + fabs(((local[i].position_z[e] - reference[i].position_z[e])*100.0) / reference[i].position_z[e]); + count++; + } + } + } + + double relative_error = (count != 0) ? error / (3.0 * count) : 0.0; + if ((count * 100.0) / (num_blocks * blocksize) > 0.6 || relative_error > TOLERATED_ERROR) { + return 0; + } + return 1; +} + diff --git a/src/nbody/src/blocking/common/nbody.h b/src/nbody/src/blocking/common/nbody.h new file mode 100644 index 0000000000000000000000000000000000000000..4d713ed64f055ccf0a9e2a79aad7459b072ddbd0 --- /dev/null +++ b/src/nbody/src/blocking/common/nbody.h @@ -0,0 +1,54 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#ifndef NBODY_H +#define NBODY_H + +#include "common/common.h" + +#include +#include +#include + +//#define MIN_PARTICLES (4096 * BLOCK_SIZE / sizeof(particles_block_t)) + +// Solver structures +typedef struct { + float *position_x; /* m */ + float *position_y; /* m */ + float *position_z; /* m */ + float *velocity_x; /* m/s */ + float *velocity_y; /* m/s */ + float *velocity_z; /* m/s */ + float *mass; /* kg */ + float *weight; +} particles_block_t; + +typedef struct { + float *x; /* x */ + float *y; /* y */ + float *z; /* z */ +} forces_block_t; + +// Forward declaration +typedef struct nbody_file_t nbody_file_t; +typedef struct nbody_t nbody_t; + +// Solver function +void nbody_solve(nbody_t *nbody, const int blocksize, const int num_blocks, const int timesteps, const float time_interval); + +// Auxiliary functions +nbody_t nbody_setup(const nbody_conf_t *conf); +void nbody_particle_init(const nbody_conf_t *conf, particles_block_t *part); +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time); +void nbody_save_particles(const nbody_t *nbody); +void nbody_free(nbody_t *nbody); +void nbody_check(const nbody_t *nbody); +int nbody_compare_particles(const particles_block_t *local, const particles_block_t *reference, int blocksize, int num_blocks); + +#endif // NBODY_H + diff --git a/src/nbody/src/blocking/mpi/main.c b/src/nbody/src/blocking/mpi/main.c new file mode 100644 index 0000000000000000000000000000000000000000..59c9094ffb944acc2c48dca53e0eb910e5cc1273 --- /dev/null +++ b/src/nbody/src/blocking/mpi/main.c @@ -0,0 +1,66 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/mpi/nbody.h" + +#include +#include +#include +#include +#include + +#include + +#ifdef INTEROPERABILITY +#include +#endif + +#ifdef INTEROPERABILITY +#define DESIRED_THREAD_LEVEL MPI_TASK_MULTIPLE +#else +#define DESIRED_THREAD_LEVEL MPI_THREAD_MULTIPLE +#endif + +int main(int argc, char** argv) +{ + int provided; + MPI_Init_thread(&argc, &argv, DESIRED_THREAD_LEVEL, &provided); + assert(provided == DESIRED_THREAD_LEVEL); + + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + assert(rank_size > 0); + + nbody_conf_t conf = nbody_get_conf(argc, argv); + assert(conf.num_particles > 0); + assert(conf.timesteps > 0); + + int total_particles = ROUNDUP(conf.num_particles, MIN_PARTICLES); + int my_particles = total_particles / rank_size; + assert(my_particles >= BLOCK_SIZE); + conf.num_particles = my_particles; + + conf.num_blocks = my_particles / BLOCK_SIZE; + assert(conf.num_blocks > 0); + + nbody_t nbody = nbody_setup(&conf); + MPI_Barrier(MPI_COMM_WORLD); + + double start = get_time(); + nbody_solve(&nbody, conf.num_blocks, conf.timesteps, conf.time_interval); + double end = get_time(); + + nbody_stats(&nbody, &conf, end - start); + + if (conf.save_result) nbody_save_particles(&nbody); + if (conf.check_result) nbody_check(&nbody); + nbody_free(&nbody); + + MPI_Finalize(); + return 0; +} diff --git a/src/nbody/src/blocking/mpi/nbody.h b/src/nbody/src/blocking/mpi/nbody.h new file mode 100644 index 0000000000000000000000000000000000000000..68ad324cc43bee0b6f0226eba29dc6b7595c16f8 --- /dev/null +++ b/src/nbody/src/blocking/mpi/nbody.h @@ -0,0 +1,32 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#ifndef NBODY_MPI_H +#define NBODY_MPI_H + +#include "blocking/common/nbody.h" + +// Application structures +struct nbody_file_t { + size_t total_size; + size_t size; + size_t offset; + char name[1000]; +}; + +struct nbody_t { + particles_block_t *local; + particles_block_t *remote1; + particles_block_t *remote2; + forces_block_t *forces; + int num_blocks; + int timesteps; + nbody_file_t file; +}; + +#endif // NBODY_MPI_H + diff --git a/src/nbody/src/blocking/mpi/solver_mpi.c b/src/nbody/src/blocking/mpi/solver_mpi.c new file mode 100644 index 0000000000000000000000000000000000000000..02690f8aff68202859b74079c69228dc0aa009f3 --- /dev/null +++ b/src/nbody/src/blocking/mpi/solver_mpi.c @@ -0,0 +1,202 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/mpi/nbody.h" + +#include +#include +#include +#include +#include + +#include + +static void calculate_forces(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks); +static void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval); +static void exchange_particles(const particles_block_t *sendbuf, particles_block_t *recvbuf, const int num_blocks, const int rank, const int rank_size); +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2); +static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval); +static void exchange_particles_block(const particles_block_t *sendbuf, particles_block_t *recvbuf, int block_id, int rank, int rank_size); + +void nbody_solve(nbody_t *nbody, const int num_blocks, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + assert(timesteps > 0); + + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + particles_block_t *local = nbody->local; + particles_block_t *remote1 = nbody->remote1; + particles_block_t *remote2 = nbody->remote2; + forces_block_t *forces = nbody->forces; + + for (int t = 0; t < timesteps; t++) { + particles_block_t *sendbuf = local; + particles_block_t *recvbuf = remote1; + + for (int r = 0; r < rank_size; r++) { + calculate_forces(forces, local, sendbuf, num_blocks); + if (r < rank_size - 1) { + exchange_particles(sendbuf, recvbuf, num_blocks, rank, rank_size); + } + + particles_block_t *aux = recvbuf; + recvbuf = (r != 0) ? sendbuf : remote2; + sendbuf = aux; + } + + update_particles(local, forces, num_blocks, time_interval); + } + + MPI_Barrier(MPI_COMM_WORLD); +} + +void calculate_forces_N2(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < num_blocks; j++) { + calculate_forces_block(forces+i, block1+i, block2+j); + } + } +} + +void calculate_forces_NlogN(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < LOG2(num_blocks); j++) { + calculate_forces_block(forces+i, block1+i, block2+j); + } + } +} + +void calculate_forces_N(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks - 1; i++) { + calculate_forces_block(forces+i, block1+i, block2+i+1); + } +} + +void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval) +{ + for (int i = 0; i < num_blocks; i++) { + update_particles_block(particles+i, forces+i, time_interval); + } +} + +void exchange_particles(const particles_block_t *sendbuf, particles_block_t *recvbuf, const int num_blocks, const int rank, const int rank_size) +{ + for (int i = 0; i < num_blocks; i++) { + exchange_particles_block(sendbuf+i, recvbuf+i, i, rank, rank_size); + } +} + +void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const int same_block = (block1 == block2); + const float *pos_x1 = block1->position_x; + const float *pos_y1 = block1->position_y; + const float *pos_z1 = block1->position_z; + const float *mass1 = block1->mass ; + + const float *pos_x2 = block2->position_x; + const float *pos_y2 = block2->position_y; + const float *pos_z2 = block2->position_z; + const float *mass2 = block2->mass; + + for (int i = 0; i < BLOCK_SIZE; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < BLOCK_SIZE; j++) { + const float diff_x = pos_x2[j] - pos_x1[i]; + const float diff_y = pos_y2[j] - pos_y1[i]; + const float diff_z = pos_z2[j] - pos_z1[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (!same_block || distance_squared != 0.0f) { + force = (mass1[i] / (distance_squared * distance)) * (mass2[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval) +{ + for (int e = 0; e < BLOCK_SIZE; e++){ + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + memset(forces, 0, sizeof(forces_block_t)); +} + +void exchange_particles_block(const particles_block_t *sendbuf, particles_block_t *recvbuf, int block_id, int rank, int rank_size) +{ + int src = MOD(rank - 1, rank_size); + int dst = MOD(rank + 1, rank_size); + int size = sizeof(particles_block_t); + + if (rank % 2) { + MPI_Send(sendbuf, size, MPI_BYTE, dst, block_id+10, MPI_COMM_WORLD); + MPI_Recv(recvbuf, size, MPI_BYTE, src, block_id+10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } else { + MPI_Recv(recvbuf, size, MPI_BYTE, src, block_id+10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Send(sendbuf, size, MPI_BYTE, dst, block_id+10, MPI_COMM_WORLD); + } +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + if (!rank) { + int particles = nbody->num_blocks * BLOCK_SIZE; + int total_particles = particles * rank_size; + + printf("bigo, %s, processes, %d, threads, %d, timesteps, %d, total_particles, %d, particles_per_proc, %d, block_size, %d, blocks_per_proc, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), rank_size, 1, nbody->timesteps, total_particles, particles, BLOCK_SIZE, + nbody->num_blocks, time, nbody_compute_throughput(total_particles, nbody->timesteps, time) + ); + } +} diff --git a/src/nbody/src/blocking/mpi/solver_mpi_ompss.c b/src/nbody/src/blocking/mpi/solver_mpi_ompss.c new file mode 100644 index 0000000000000000000000000000000000000000..0a67f4d5819243d874892893b9a1f86ef574332b --- /dev/null +++ b/src/nbody/src/blocking/mpi/solver_mpi_ompss.c @@ -0,0 +1,221 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/mpi/nbody.h" + +#include +#include +#include +#include +#include + +#include +#include + +static void calculate_forces(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks); +static void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval); +static void exchange_particles(const particles_block_t *sendbuf, particles_block_t *recvbuf, const int num_blocks, const int rank, const int rank_size); +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2); +static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval); +static void exchange_particles_block(const particles_block_t *sendbuf, particles_block_t *recvbuf, int block_id, int rank, int rank_size); + +#ifdef INTEROPERABILITY +int *serial = NULL; +#else +int *serial = (int *)1; +#endif + +void nbody_solve(nbody_t *nbody, const int num_blocks, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + assert(timesteps > 0); + + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + particles_block_t *local = nbody->local; + particles_block_t *remote1 = nbody->remote1; + particles_block_t *remote2 = nbody->remote2; + forces_block_t *forces = nbody->forces; + + for (int t = 0; t < timesteps; t++) { + particles_block_t *sendbuf = local; + particles_block_t *recvbuf = remote1; + + for (int r = 0; r < rank_size; r++) { + calculate_forces(forces, local, sendbuf, num_blocks); + if (r < rank_size - 1) { + exchange_particles(sendbuf, recvbuf, num_blocks, rank, rank_size); + } + + particles_block_t *aux = recvbuf; + recvbuf = (r != 0) ? sendbuf : remote2; + sendbuf = aux; + } + + update_particles(local, forces, num_blocks, time_interval); + } + + #pragma oss taskwait + MPI_Barrier(MPI_COMM_WORLD); +} + +void calculate_forces_N2(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < num_blocks; j++) { + calculate_forces_block(forces+i, block1+i, block2+j); + } + } +} + +void calculate_forces_NlogN(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < LOG2(num_blocks); j++) { + calculate_forces_block(forces+i, block1+i, block2+j); + } + } +} + +void calculate_forces_N(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks - 1; i++) { + calculate_forces_block(forces+i, block1+i, block2+i+1); + } +} + +void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval) +{ + for (int i = 0; i < num_blocks; i++) { + update_particles_block(particles+i, forces+i, time_interval); + } +} + +void exchange_particles(const particles_block_t *sendbuf, particles_block_t *recvbuf, const int num_blocks, const int rank, const int rank_size) +{ + int src = MOD(rank - 1, rank_size); + int dst = MOD(rank + 1, rank_size); + int size = sizeof(particles_block_t); + + if (rank % 2) { + for (int i = 0; i < num_blocks; i++) { + send_particles_block(sendbuf+i, i, dst); + recv_particles_block(recvbuf+i, i, src); + } + } else { + for (int i = 0; i < num_blocks; i++) { + recv_particles_block(recvbuf+i, i, src); + send_particles_block(sendbuf+i, i, dst); + } + } +} + +#pragma oss task in(*block1, *block2) inout(*forces) label("calculate_forces_block") +void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const int same_block = (block1 == block2); + const float *pos_x1 = block1->position_x; + const float *pos_y1 = block1->position_y; + const float *pos_z1 = block1->position_z; + const float *mass1 = block1->mass ; + + const float *pos_x2 = block2->position_x; + const float *pos_y2 = block2->position_y; + const float *pos_z2 = block2->position_z; + const float *mass2 = block2->mass; + + for (int i = 0; i < BLOCK_SIZE; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < BLOCK_SIZE; j++) { + const float diff_x = pos_x2[j] - pos_x1[i]; + const float diff_y = pos_y2[j] - pos_y1[i]; + const float diff_z = pos_z2[j] - pos_z1[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (!same_block || distance_squared != 0.0f) { + force = (mass1[i] / (distance_squared * distance)) * (mass2[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +#pragma oss task inout(*particles, *forces) label("update_particles_block") +void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval) +{ + for (int e = 0; e < BLOCK_SIZE; e++){ + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + memset(forces, 0, sizeof(forces_block_t)); +} + +#pragma oss task in(*sendbuf) inout(*serial) label("send_particles_block") +void send_particles_block(const particles_block_t *sendbuf, int block_id, int dst) +{ + MPI_Send(sendbuf, sizeof(particles_block_t), MPI_BYTE, dst, block_id+10, MPI_COMM_WORLD); +} + +#pragma oss task out(*recvbuf) inout(*serial) label("recv_particles_block") +void recv_particles_block(particles_block_t *recvbuf, int block_id, int src) +{ + MPI_Recv(recvbuf, sizeof(particles_block_t), MPI_BYTE, src, block_id+10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + if (!rank) { + int particles = nbody->num_blocks * BLOCK_SIZE; + int total_particles = particles * rank_size; + + printf("bigo, %s, processes, %d, threads, %d, timesteps, %d, total_particles, %d, particles_per_proc, %d, block_size, %d, blocks_per_proc, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), rank_size, nanos6_get_num_cpus(), nbody->timesteps, total_particles, particles, BLOCK_SIZE, + nbody->num_blocks, time, nbody_compute_throughput(total_particles, nbody->timesteps, time) + ); + } +} diff --git a/src/nbody/src/blocking/mpi/solver_mpi_ompss_interop_async.c b/src/nbody/src/blocking/mpi/solver_mpi_ompss_interop_async.c new file mode 100644 index 0000000000000000000000000000000000000000..d83024055ac34c54ddc4f1739de32f846cde36a7 --- /dev/null +++ b/src/nbody/src/blocking/mpi/solver_mpi_ompss_interop_async.c @@ -0,0 +1,221 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/mpi/nbody.h" + +#include +#include +#include +#include +#include + +#include +#include + +#include + +static void calculate_forces(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks); +static void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval); +static void exchange_particles(const particles_block_t *sendbuf, particles_block_t *recvbuf, const int num_blocks, const int rank, const int rank_size); +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2); +static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval); +static void exchange_particles_block(const particles_block_t *sendbuf, particles_block_t *recvbuf, int block_id, int rank, int rank_size); + +void nbody_solve(nbody_t *nbody, const int num_blocks, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + assert(timesteps > 0); + + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + particles_block_t *local = nbody->local; + particles_block_t *remote1 = nbody->remote1; + particles_block_t *remote2 = nbody->remote2; + forces_block_t *forces = nbody->forces; + + for (int t = 0; t < timesteps; t++) { + particles_block_t *sendbuf = local; + particles_block_t *recvbuf = remote1; + + for (int r = 0; r < rank_size; r++) { + calculate_forces(forces, local, sendbuf, num_blocks); + if (r < rank_size - 1) { + exchange_particles(sendbuf, recvbuf, num_blocks, rank, rank_size); + } + + particles_block_t *aux = recvbuf; + recvbuf = (r != 0) ? sendbuf : remote2; + sendbuf = aux; + } + + update_particles(local, forces, num_blocks, time_interval); + } + + #pragma oss taskwait + MPI_Barrier(MPI_COMM_WORLD); +} + +void calculate_forces_N2(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < num_blocks; j++) { + calculate_forces_block(forces+i, block1+i, block2+j); + } + } +} + +void calculate_forces_NlogN(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < LOG2(num_blocks); j++) { + calculate_forces_block(forces+i, block1+i, block2+j); + } + } +} + +void calculate_forces_N(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int num_blocks) +{ + for (int i = 0; i < num_blocks - 1; i++) { + calculate_forces_block(forces+i, block1+i, block2+i+1); + } +} + +void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval) +{ + for (int i = 0; i < num_blocks; i++) { + update_particles_block(particles+i, forces+i, time_interval); + } +} + +void exchange_particles(const particles_block_t *sendbuf, particles_block_t *recvbuf, const int num_blocks, const int rank, const int rank_size) +{ + int src = MOD(rank - 1, rank_size); + int dst = MOD(rank + 1, rank_size); + int size = sizeof(particles_block_t); + + if (rank % 2) { + for (int i = 0; i < num_blocks; i++) { + send_particles_block(sendbuf+i, i, dst); + recv_particles_block(recvbuf+i, i, src); + } + } else { + for (int i = 0; i < num_blocks; i++) { + recv_particles_block(recvbuf+i, i, src); + send_particles_block(sendbuf+i, i, dst); + } + } +} + +#pragma oss task in(*block1, *block2) inout(*forces) label("calculate_forces_block") +void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const int same_block = (block1 == block2); + const float *pos_x1 = block1->position_x; + const float *pos_y1 = block1->position_y; + const float *pos_z1 = block1->position_z; + const float *mass1 = block1->mass ; + + const float *pos_x2 = block2->position_x; + const float *pos_y2 = block2->position_y; + const float *pos_z2 = block2->position_z; + const float *mass2 = block2->mass; + + for (int i = 0; i < BLOCK_SIZE; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < BLOCK_SIZE; j++) { + const float diff_x = pos_x2[j] - pos_x1[i]; + const float diff_y = pos_y2[j] - pos_y1[i]; + const float diff_z = pos_z2[j] - pos_z1[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (!same_block || distance_squared != 0.0f) { + force = (mass1[i] / (distance_squared * distance)) * (mass2[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +#pragma oss task inout(*particles, *forces) label("update_particles_block") +void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval) +{ + for (int e = 0; e < BLOCK_SIZE; e++){ + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + memset(forces, 0, sizeof(forces_block_t)); +} + +#pragma oss task in(*sendbuf) label("send_particles_block") +void send_particles_block(const particles_block_t *sendbuf, int block_id, int dst) +{ + MPI_Request request; + MPI_Isend(sendbuf, sizeof(particles_block_t), MPI_BYTE, dst, block_id+10, MPI_COMM_WORLD, &request); + TAMPI_Iwait(&request, MPI_STATUS_IGNORE); +} + +#pragma oss task out(*recvbuf) label("recv_particles_block") +void recv_particles_block(particles_block_t *recvbuf, int block_id, int src) +{ + MPI_Request request; + MPI_Irecv(recvbuf, sizeof(particles_block_t), MPI_BYTE, src, block_id+10, MPI_COMM_WORLD, &request); + TAMPI_Iwait(&request, MPI_STATUS_IGNORE); +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + if (!rank) { + int particles = nbody->num_blocks * BLOCK_SIZE; + int total_particles = particles * rank_size; + + printf("bigo, %s, processes, %d, threads, %d, timesteps, %d, total_particles, %d, particles_per_proc, %d, block_size, %d, blocks_per_proc, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), rank_size, nanos6_get_num_cpus(), nbody->timesteps, total_particles, particles, BLOCK_SIZE, + nbody->num_blocks, time, nbody_compute_throughput(total_particles, nbody->timesteps, time) + ); + } +} diff --git a/src/nbody/src/blocking/mpi/utils.c b/src/nbody/src/blocking/mpi/utils.c new file mode 100644 index 0000000000000000000000000000000000000000..d244271bae978ab8ce7730cbb317f4b19d24bc47 --- /dev/null +++ b/src/nbody/src/blocking/mpi/utils.c @@ -0,0 +1,203 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/mpi/nbody.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +void nbody_generate_particles(const nbody_conf_t *conf, const nbody_file_t *file) +{ + int rank_size; + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + char fname[1024]; + sprintf(fname, "%s.in", file->name); + + if (!access(fname, F_OK)) { + return; + } + + struct stat st = {0}; + if (stat("data", &st) == -1) { + mkdir("data", 0755); + } + + const int fd = open(fname, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IRGRP|S_IROTH); + assert(fd >= 0); + + const int total_size = file->total_size; + assert(total_size % PAGE_SIZE == 0); + + int err = ftruncate(fd, total_size); + assert(!err); + + particles_block_t * const particles = mmap(NULL, total_size, PROT_WRITE|PROT_READ, MAP_SHARED, fd, 0); + + for(int i = 0; i < conf->num_blocks * rank_size; i++) { + nbody_particle_init(conf, particles+i); + } + + err = munmap(particles, total_size); + assert(!err); + + err = close(fd); + assert(!err); +} + +void nbody_check(const nbody_t *nbody) +{ + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + char fname[1024]; + sprintf(fname, "%s.ref", nbody->file.name); + if (access(fname, F_OK) != 0) { + if (!rank) fprintf(stderr, "Warning: %s file does not exist. Skipping the check...\n", fname); + return; + } + + const int fd = open(fname, O_RDONLY, 0); + assert(fd >= 0); + + particles_block_t *reference = mmap(NULL, nbody->file.size, PROT_READ, MAP_SHARED, fd, nbody->file.offset); + assert(reference != MAP_FAILED); + + int correct, correct_chunk; + correct_chunk = nbody_compare_particles(nbody->local, reference, nbody->num_blocks); + + MPI_Reduce(&correct_chunk, &correct, 1, MPI_INT, MPI_LAND, 0, MPI_COMM_WORLD); + + if (!rank) { + if (correct) { + printf("Result validation: OK\n"); + } else { + printf("Result validation: ERROR\n"); + } + } + + int err = munmap(reference, nbody->file.size); + assert(!err); + + err = close(fd); + assert(!err); +} + +nbody_file_t nbody_setup_file(const nbody_conf_t *conf) +{ + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + nbody_file_t file; + file.size = conf->num_blocks * sizeof(particles_block_t); + file.total_size = rank_size * file.size; + file.offset = rank * file.size; + + sprintf(file.name, "%s-%s-%d-%d-%d", conf->name, TOSTRING(BIGO), rank_size * conf->num_blocks * BLOCK_SIZE, BLOCK_SIZE, conf->timesteps); + return file; +} + +particles_block_t * nbody_load_particles(const nbody_conf_t *conf, const nbody_file_t *file) +{ + char fname[1024]; + sprintf(fname, "%s.in", file->name); + + const int fd = open(fname, O_RDONLY, 0); + assert(fd >= 0); + + void * const ptr = mmap(NULL, file->size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, file->offset); + assert(ptr != MAP_FAILED); + + int err = close(fd); + assert(!err); + + return ptr; +} + +nbody_t nbody_setup(const nbody_conf_t *conf) +{ + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + nbody_file_t file = nbody_setup_file(conf); + + if (!rank) nbody_generate_particles(conf, &file); + MPI_Barrier(MPI_COMM_WORLD); + + nbody_t nbody; + nbody.local = nbody_load_particles(conf, &file); + assert(nbody.local != NULL); + + nbody.remote1 = nbody_alloc(conf->num_blocks * sizeof(particles_block_t)); + nbody.remote2 = nbody_alloc(conf->num_blocks * sizeof(particles_block_t)); + assert(nbody.remote1 != NULL); + assert(nbody.remote2 != NULL); + + nbody.forces = nbody_alloc(conf->num_blocks * sizeof(forces_block_t)); + assert(nbody.forces != NULL); + + nbody.num_blocks = conf->num_blocks; + nbody.timesteps = conf->timesteps; + nbody.file = file; + + return nbody; +} + +void nbody_save_particles(const nbody_t *nbody) +{ + int rank, rank_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &rank_size); + + char fname[1024]; + sprintf(fname, "%s.out", nbody->file.name); + const int mode = MPI_MODE_CREATE | MPI_MODE_WRONLY; + + MPI_File outfile; + int err = MPI_File_open(MPI_COMM_WORLD, fname, mode, MPI_INFO_NULL, &outfile); + assert(err == MPI_SUCCESS); + + MPI_File_set_view(outfile, nbody->file.offset, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); + assert(err == MPI_SUCCESS); + + MPI_File_write(outfile, nbody->local, nbody->file.size, MPI_BYTE, MPI_STATUS_IGNORE); + assert(err == MPI_SUCCESS); + + MPI_File_close(&outfile); + assert(err == MPI_SUCCESS); + + MPI_Barrier(MPI_COMM_WORLD); +} + +void nbody_free(nbody_t *nbody) +{ + const int particles_size = nbody->num_blocks * sizeof(particles_block_t); + const int forces_size = nbody->num_blocks * sizeof(forces_block_t); + + int err = munmap(nbody->local, particles_size); + err |= munmap(nbody->remote1, particles_size); + err |= munmap(nbody->remote2, particles_size); + err |= munmap(nbody->forces, forces_size); + assert(!err); +} + diff --git a/src/nbody/src/blocking/smp/main.c b/src/nbody/src/blocking/smp/main.c new file mode 100644 index 0000000000000000000000000000000000000000..700fcb4592db927c05b502e6da967c03de9e0fe9 --- /dev/null +++ b/src/nbody/src/blocking/smp/main.c @@ -0,0 +1,39 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/smp/nbody.h" + +#include +#include +#include +#include +#include + +int main(int argc, char** argv) +{ + nbody_conf_t conf = nbody_get_conf(argc, argv); + + //conf.num_particles = ROUNDUP(conf.num_particles, MIN_PARTICLES); + //assert(conf.num_particles >= BLOCK_SIZE); + //assert(conf.timesteps > 0); + + conf.num_blocks = conf.num_particles / conf.blocksize; + assert(conf.num_blocks > 0); + + nbody_t nbody = nbody_setup(&conf); + + double start = get_time(); + nbody_solve(&nbody, conf.blocksize, conf.num_blocks, conf.timesteps, conf.time_interval); + double end = get_time(); + + nbody_stats(&nbody, &conf, end - start); + + if (conf.save_result && !conf.force_generation) nbody_save_particles(&nbody); + if (conf.check_result) nbody_check(&nbody); + nbody_free(&nbody); + return 0; +} diff --git a/src/nbody/src/blocking/smp/nbody.h b/src/nbody/src/blocking/smp/nbody.h new file mode 100644 index 0000000000000000000000000000000000000000..6611ee5082d2164f489a413a2c074adaffadd879 --- /dev/null +++ b/src/nbody/src/blocking/smp/nbody.h @@ -0,0 +1,31 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#ifndef NBODY_SMP_H +#define NBODY_SMP_H + +#include "blocking/common/nbody.h" + +// Application structures +struct nbody_file_t { + size_t size; + char name[1000]; +}; + +struct nbody_t { + float *particles_map; + particles_block_t *particles; + float *forces_map; + forces_block_t *forces; + int blocksize; + int num_blocks; + int timesteps; + nbody_file_t file; +}; + +#endif // NBODY_SMP_H + diff --git a/src/nbody/src/blocking/smp/solver_omp.c b/src/nbody/src/blocking/smp/solver_omp.c new file mode 100644 index 0000000000000000000000000000000000000000..299639088a49d77bfcb5d09a2a9237ae881337ab --- /dev/null +++ b/src/nbody/src/blocking/smp/solver_omp.c @@ -0,0 +1,154 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/smp/nbody.h" + +#include +#include +#include +#include +#include + +#include + +static void calculate_forces(forces_block_t *forces, const particles_block_t *particles, const int blocksize, const int num_blocks); +static void update_particles(particles_block_t *particles, forces_block_t *forces, const int blocksize, const int num_blocks, const float time_interval); +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int blocksize); +static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval, const int blocksize); + +void nbody_solve(nbody_t *nbody, const int blocksize, const int num_blocks, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + + particles_block_t *particles = nbody->particles; + forces_block_t *forces = nbody->forces; + + #pragma omp parallel + #pragma omp single + for (int t = 0; t < timesteps; t++) { + calculate_forces(forces, particles, blocksize, num_blocks); + + update_particles(particles, forces, blocksize, num_blocks, time_interval); + } + + #pragma omp taskwait +} + +void calculate_forces_N2(forces_block_t *forces, const particles_block_t *particles, const int blocksize, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < num_blocks; j++) { + #pragma omp task depend(in: particles[i], particles[j]) depend(inout: forces[i]) label("calculate_forces_N2") + calculate_forces_block(forces+i, particles+i, particles+j, blocksize); + } + } +} + +void update_particles(particles_block_t *particles, forces_block_t *forces, const int blocksize, const int num_blocks, const float time_interval) +{ + for (int i = 0; i < num_blocks; i++) { + #pragma omp task depend(inout: particles[i], forces[i]) label("update_particles") + update_particles_block(particles+i, forces+i, time_interval, blocksize); + } +} + +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2, const int blocksize) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const int same_block = (block1 == block2); + const float *pos_x1 = block1->position_x; + const float *pos_y1 = block1->position_y; + const float *pos_z1 = block1->position_z; + const float *mass1 = block1->mass ; + + const float *pos_x2 = block2->position_x; + const float *pos_y2 = block2->position_y; + const float *pos_z2 = block2->position_z; + const float *mass2 = block2->mass; + + for (int i = 0; i < blocksize; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < blocksize; j++) { + const float diff_x = pos_x2[j] - pos_x1[i]; + const float diff_y = pos_y2[j] - pos_y1[i]; + const float diff_z = pos_z2[j] - pos_z1[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (!same_block || distance_squared != 0.0f) { + force = (mass1[i] / (distance_squared * distance)) * (mass2[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval, const int blocksize) +{ + for (int e = 0; e < blocksize; e++){ + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + for (int e = 0; e < blocksize; e++) + forces->x[e] = 0; + + for (int e = 0; e < blocksize; e++) + forces->y[e] = 0; + + for (int e = 0; e < blocksize; e++) + forces->z[e] = 0; +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + (void) conf; + + int particles = nbody->num_blocks * nbody->blocksize; + + int nthreads; + #pragma omp parallel + nthreads = omp_get_num_threads(); + + printf("bigo, %s, threads, %d, timesteps, %d, total_particles, %d, block_size, %d, blocks, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), nthreads, nbody->timesteps, particles, nbody->blocksize, + nbody->num_blocks, time, nbody_compute_throughput(particles, nbody->timesteps, time) + ); +} + diff --git a/src/nbody/src/blocking/smp/solver_ompss.c b/src/nbody/src/blocking/smp/solver_ompss.c new file mode 100644 index 0000000000000000000000000000000000000000..80628c5129f66958d7bb7101ebacc56b6022b11c --- /dev/null +++ b/src/nbody/src/blocking/smp/solver_ompss.c @@ -0,0 +1,157 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/smp/nbody.h" + +#include +#include +#include +#include +#include + +#include + +static void calculate_forces(forces_block_t *forces, const particles_block_t *particles, const int num_blocks); +static void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval); +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2); +static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval); + +void nbody_solve(nbody_t *nbody, const int num_blocks, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + + particles_block_t *particles = nbody->particles; + forces_block_t *forces = nbody->forces; + + for (int t = 0; t < timesteps; t++) { + #pragma oss task weakin(particles[0;num_blocks]) weakinout(forces[0;num_blocks]) label("calculate_forces") + calculate_forces(forces, particles, num_blocks); + + #pragma oss task weakinout(particles[0;num_blocks], forces[0;num_blocks]) label("update_particles") + update_particles(particles, forces, num_blocks, time_interval); + } + + #pragma oss taskwait +} + +void calculate_forces_N2(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < num_blocks; j++) { + calculate_forces_block(forces+i, particles+i, particles+j); + } + } +} + +void calculate_forces_NlogN(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < LOG2(num_blocks); j++) { + calculate_forces_block(forces+i, particles+i, particles+j); + } + } +} + +void calculate_forces_N(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +{ + for (int i = 0; i < num_blocks - 1; i++) { + calculate_forces_block(forces+i, particles+i, particles+i+1); + } +} + +void update_particles(particles_block_t *particles, forces_block_t *forces, const int num_blocks, const float time_interval) +{ + for (int i = 0; i < num_blocks; i++) { + update_particles_block(particles+i, forces+i, time_interval); + } +} + +#pragma oss task in(*block1, *block2) inout(*forces) wait label("calculate_forces_block") +static void calculate_forces_block(forces_block_t *forces, const particles_block_t *block1, const particles_block_t *block2) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const int same_block = (block1 == block2); + const float *pos_x1 = block1->position_x; + const float *pos_y1 = block1->position_y; + const float *pos_z1 = block1->position_z; + const float *mass1 = block1->mass ; + + const float *pos_x2 = block2->position_x; + const float *pos_y2 = block2->position_y; + const float *pos_z2 = block2->position_z; + const float *mass2 = block2->mass; + + #pragma oss task for + for (int i = 0; i < BLOCK_SIZE; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < BLOCK_SIZE; j++) { + const float diff_x = pos_x2[j] - pos_x1[i]; + const float diff_y = pos_y2[j] - pos_y1[i]; + const float diff_z = pos_z2[j] - pos_z1[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (!same_block || distance_squared != 0.0f) { + force = (mass1[i] / (distance_squared * distance)) * (mass2[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +#pragma oss task inout(*particles, *forces) label("update_particles_block") +void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval) +{ + for (int e = 0; e < BLOCK_SIZE; e++){ + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + memset(forces, 0, sizeof(forces_block_t)); +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + int particles = nbody->num_blocks * BLOCK_SIZE; + printf("bigo, %s, threads, %d, timesteps, %d, total_particles, %d, block_size, %d, blocks, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), nanos6_get_num_cpus(), nbody->timesteps, particles, BLOCK_SIZE, + nbody->num_blocks, time, nbody_compute_throughput(particles, nbody->timesteps, time) + ); +} + diff --git a/src/nbody/src/blocking/smp/solver_seq.c b/src/nbody/src/blocking/smp/solver_seq.c new file mode 100644 index 0000000000000000000000000000000000000000..9d788a63b3d00fc4a4aa504bea8e1c504151921d --- /dev/null +++ b/src/nbody/src/blocking/smp/solver_seq.c @@ -0,0 +1,158 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/smp/nbody.h" + +#include +#include +#include +#include +#include + +static void calculate_forces(forces_block_t *forces, const particles_block_t *particles, const int blocksize, const int num_blocks); +static void update_particles(particles_block_t *particles, forces_block_t *forces, const int blocksize, const int num_blocks, const float time_interval); +static void calculate_forces_block(forces_block_t *forces, const int blocksize, const particles_block_t *block1, const particles_block_t *block2); +static void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval, const int blocksize); + +void nbody_solve(nbody_t *nbody, const int blocksize, const int num_blocks, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + + particles_block_t *particles = nbody->particles; + forces_block_t *forces = nbody->forces; + + for (int t = 0; t < timesteps; t++) { + calculate_forces(forces, particles, blocksize, num_blocks); + update_particles(particles, forces, blocksize, num_blocks, time_interval); + } +} + +void calculate_forces_N2(forces_block_t *forces, const particles_block_t *particles, const int blocksize, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < num_blocks; j++) { + calculate_forces_block(forces+i, blocksize, particles+i, particles+j); + } + } +} + +#if 0 +static void calculate_forces_NlogN(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +{ + for (int i = 0; i < num_blocks; i++) { + for (int j = 0; j < LOG2(num_blocks); j++) { + calculate_forces_block(forces+i, particles+i, particles+j); + } + } +} + +static void calculate_forces_N(forces_block_t *forces, const particles_block_t *particles, const int num_blocks) +{ + for (int i = 0; i < num_blocks - 1; i++) { + calculate_forces_block(forces+i, particles+i, particles+i+1); + } +} +#endif + +void update_particles(particles_block_t *particles, forces_block_t *forces, const int blocksize, const int num_blocks, const float time_interval) +{ + for (int i = 0; i < num_blocks; i++) { + update_particles_block(particles+i, forces+i, time_interval, blocksize); + } +} + +static void calculate_forces_block(forces_block_t *forces, const int blocksize, const particles_block_t *block1, const particles_block_t *block2) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const int same_block = (block1 == block2); + const float *pos_x1 = block1->position_x; + const float *pos_y1 = block1->position_y; + const float *pos_z1 = block1->position_z; + const float *mass1 = block1->mass ; + + const float *pos_x2 = block2->position_x; + const float *pos_y2 = block2->position_y; + const float *pos_z2 = block2->position_z; + const float *mass2 = block2->mass; + + for (int i = 0; i < blocksize; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < blocksize; j++) { + const float diff_x = pos_x2[j] - pos_x1[i]; + const float diff_y = pos_y2[j] - pos_y1[i]; + const float diff_z = pos_z2[j] - pos_z1[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (!same_block || distance_squared != 0.0f) { + force = (mass1[i] / (distance_squared * distance)) * (mass2[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +void update_particles_block(particles_block_t *particles, forces_block_t *forces, const float time_interval, const int blocksize) +{ + for (int e = 0; e < blocksize; e++){ + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + for (int e = 0; e < blocksize; e++) + forces->x[e] = 0; + + for (int e = 0; e < blocksize; e++) + forces->y[e] = 0; + + for (int e = 0; e < blocksize; e++) + forces->z[e] = 0; +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + (void) conf; + + int particles = nbody->num_blocks * conf->blocksize; + printf("bigo, %s, timesteps, %d, total_particles, %d, block_size, %d, blocks, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), nbody->timesteps, particles, conf->blocksize, nbody->num_blocks, + time, nbody_compute_throughput(particles, nbody->timesteps, time) + ); +} + diff --git a/src/nbody/src/blocking/smp/utils.c b/src/nbody/src/blocking/smp/utils.c new file mode 100644 index 0000000000000000000000000000000000000000..58265108370ce43f41a67a6dfe278637afdb8f45 --- /dev/null +++ b/src/nbody/src/blocking/smp/utils.c @@ -0,0 +1,257 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "blocking/smp/nbody.h" + +#undef NDEBUG +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* +static void nbody_generate_particles(const nbody_conf_t *conf, const nbody_file_t *file) +{ + char fname[1024]; + sprintf(fname, "%s.in", file->name); + + if (!access(fname, F_OK)) { + return; + } + + struct stat st = {0}; + if (stat("data", &st) == -1) { + mkdir("data", 0755); + } + + const int fd = open(fname, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IRGRP|S_IROTH); + assert(fd >= 0); + + const int size = file->size; + assert(size % PAGE_SIZE == 0); + + int err = ftruncate(fd, size); + assert(!err); + + particles_block_t * const particles = mmap(NULL, size, PROT_WRITE|PROT_READ, MAP_SHARED, fd, 0); + + for(int i = 0; i < conf->num_blocks; i++) { + nbody_particle_init(conf, particles+i); + } + + err = munmap(particles, size); + assert(!err); + + err = close(fd); + assert(!err); +} +*/ + +void nbody_check(const nbody_t *nbody) +{ + char fname[1024]; + sprintf(fname, "%s.ref", nbody->file.name); + if (access(fname, F_OK) != 0) { + fprintf(stderr, "Warning: %s file does not exist. Skipping the check...\n", fname); + return; + } + + const int fd = open(fname, O_RDONLY, 0); + assert(fd >= 0); + + particles_block_t *reference = mmap(NULL, nbody->file.size, PROT_READ, MAP_SHARED, fd, 0); + assert(reference != MAP_FAILED); + + if (nbody_compare_particles(nbody->particles, reference, nbody->blocksize, nbody->num_blocks)) { + printf("Result validation: OK\n"); + } else { + printf("Result validation: ERROR\n"); + } + + int err = munmap(reference, nbody->file.size); + assert(!err); + + err = close(fd); + assert(!err); +} + +static nbody_file_t nbody_setup_file(const nbody_conf_t *conf) +{ + nbody_file_t file; + file.size = conf->num_blocks * sizeof(particles_block_t); + + sprintf(file.name, "%s-%s-%d-%d-%d", conf->name, TOSTRING(BIGO), conf->num_blocks * conf->blocksize, conf->blocksize, conf->timesteps); + return file; +} +/* +static particles_block_t *nbody_load_particles(const nbody_conf_t *conf, const nbody_file_t *file) +{ + (void) conf; + + char fname[1024]; + sprintf(fname, "%s.in", file->name); + + const int fd = open(fname, O_RDONLY, 0); + assert(fd >= 0); + + void * const ptr = mmap(NULL, file->size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); + assert(ptr != MAP_FAILED); + + int err = close(fd); + assert(!err); + + return ptr; +} +*/ + +static void alloc_particles(const nbody_conf_t *conf, nbody_t *nbody) +{ + size_t bs = (size_t) conf->blocksize; + size_t n = (size_t) conf->num_blocks; + size_t block_floats = bs * (3 + 3 + 2); + size_t block_bytes = block_floats * sizeof(float); + size_t alloc_bytes = n * block_bytes; + + nbody->particles_map = nbody_alloc(alloc_bytes); + nbody->particles = nbody_alloc(n * sizeof(particles_block_t)); + + float *m = nbody->particles_map; + + /* Setup the particles_block_t pointers, so they point to the big chunk + * in particle_map: + * + * [ particles[0] ] [ particles[1] ] ... + * [ px ][ py ][ pz ] [... ] + * | \_________________ + * | \ + * v v + * [ px0 ] [ px1 ] [ ... ] [ py0 ] [ ... ] ... + * [ particles_map ] + */ + + for (int i = 0; i < conf->num_blocks; i++) { + particles_block_t *p = &nbody->particles[i]; + float *b = &m[block_floats * i]; + p->position_x = &b[bs * 0]; + p->position_y = &b[bs * 1]; + p->position_z = &b[bs * 2]; + p->velocity_x = &b[bs * 3]; + p->velocity_y = &b[bs * 4]; + p->velocity_z = &b[bs * 5]; + p->mass = &b[bs * 6]; + p->weight = &b[bs * 7]; + } +} + +static void alloc_forces(const nbody_conf_t *conf, nbody_t *nbody) +{ + size_t bs = (size_t) conf->blocksize; + size_t n = (size_t) conf->num_blocks; + size_t block_floats = bs * 3; + size_t block_bytes = block_floats * sizeof(float); + size_t alloc_bytes = n * block_bytes; + + nbody->forces_map = nbody_alloc(alloc_bytes); + nbody->forces = nbody_alloc(n * sizeof(forces_block_t)); + + float *m = nbody->forces_map; + + /* Setup the forces_block_t pointers, so they point to the big chunk + * in forces_map: + * + * [ forces[0] ] [ forces[1] ] ... + * [ x ][ y ][ z ] + * | \________________ + * | \ + * v v + * [ x0 ] [ x1 ] [ ... ] [ y0 ] [ y1 ] [ ... ] + * [ forces_map ] + */ + + for (int i = 0; i < conf->num_blocks; i++) { + forces_block_t *f = &nbody->forces[i]; + float *b = &m[block_floats * i]; + f->x = &b[bs * 0]; + f->y = &b[bs * 1]; + f->z = &b[bs * 2]; + } +} + +nbody_t nbody_setup(const nbody_conf_t *conf) +{ + nbody_t nbody; + nbody.timesteps = conf->timesteps; + nbody.blocksize = conf->blocksize; + nbody.num_blocks = conf->num_blocks; + + nbody_file_t file = nbody_setup_file(conf); + nbody.file = file; + + printf("force gen=%d\n", conf->force_generation); + if (conf->force_generation) { + alloc_particles(conf, &nbody); + + for (int i = 0; i < conf->num_blocks; i++) + nbody_particle_init(conf, nbody.particles+i); + } else { + fprintf(stderr, "not implemented\n"); + exit(1); + /* + nbody_generate_particles(conf, &file); + nbody.particles = nbody_load_particles(conf, &file); + assert(nbody.particles != NULL); + */ + } + + alloc_forces(conf, &nbody); + + return nbody; +} + +void nbody_save_particles(const nbody_t *nbody) +{ + char fname[1024]; + sprintf(fname, "%s.out", nbody->file.name); + + const int fd = open(fname, O_RDWR|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH); + assert(fd >= 0); + + const int size = nbody->file.size; + assert(size % PAGE_SIZE == 0); + + int err = ftruncate(fd, size); + assert(!err); + + particles_block_t * const particles = mmap(NULL, size, PROT_WRITE|PROT_READ, MAP_SHARED, fd, 0); + assert(particles != MAP_FAILED); + + memcpy(particles, nbody->particles, size); + + err = munmap(particles, size); + assert(!err); + + err = close(fd); + assert(!err); +} + +void nbody_free(nbody_t *nbody) +{ + int err = munmap(nbody->particles, nbody->num_blocks * sizeof(particles_block_t)); + err |= munmap(nbody->forces, nbody->num_blocks * sizeof(forces_block_t)); + assert(!err); +} + diff --git a/src/nbody/src/common/common.c b/src/nbody/src/common/common.c new file mode 100644 index 0000000000000000000000000000000000000000..7608100196cf0c1a1d06a32cb569768a38a9f1c7 --- /dev/null +++ b/src/nbody/src/common/common.c @@ -0,0 +1,133 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "common.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +void * nbody_alloc(size_t size) +{ + void *addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) + abort(); + return addr; +} + +static void nbody_print_usage(int argc, char **argv) +{ + (void) argc; + fprintf(stderr, "Usage: %s [OPTIONS]...\n", argv[0]); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -p, --particles=PARTICLES use PARTICLES as the total number of particles (default: %d)\n", default_num_particles); + fprintf(stderr, " -t, --timesteps=TIMESTEPS use TIMESTEPS as the number of timesteps (default: %d)\n\n", default_timesteps); + fprintf(stderr, " -b, --blocksize=BLOCKSIZE use BLOCKSIZE as the block size (default: %d)\n", default_blocksize); + fprintf(stderr, " -f, --force-generation always force the generation of particles without the usage of files (disabled by default)\n"); + fprintf(stderr, " -c, --check check the correctness of the result (disabled by default)\n"); + fprintf(stderr, " -C, --no-check do not check the correctness of the result\n"); + fprintf(stderr, " -o, --output save the computed particles to the default output file (disabled by default)\n"); + fprintf(stderr, " -O, --no-output do not save the computed particles to the default output file\n"); + fprintf(stderr, " -h, --help display this help and exit\n\n"); +} + +nbody_conf_t nbody_get_conf(int argc, char **argv) +{ + nbody_conf_t conf; + conf.domain_size_x = default_domain_size_x; + conf.domain_size_y = default_domain_size_y; + conf.domain_size_z = default_domain_size_z; + conf.mass_maximum = default_mass_maximum; + conf.time_interval = default_time_interval; + conf.seed = default_seed; + conf.name = default_name; + conf.num_particles = default_num_particles; + conf.blocksize = default_blocksize; + conf.num_blocks = conf.num_particles / conf.blocksize; + conf.timesteps = default_timesteps; + conf.save_result = default_save_result; + conf.check_result = default_check_result; + conf.force_generation = default_force_generation; + + static struct option long_options[] = { + {"particles", required_argument, 0, 'p'}, + {"timesteps", required_argument, 0, 't'}, + {"blocksize", required_argument, 0, 'b'}, + {"force-generation", no_argument, 0, 'f'}, + {"check", no_argument, 0, 'c'}, + {"no-check", no_argument, 0, 'C'}, + {"output", no_argument, 0, 'o'}, + {"no-output", no_argument, 0, 'O'}, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0} + }; + + int c; + int index; + while ((c = getopt_long(argc, argv, "hfoOcCp:t:b:", long_options, &index)) != -1) { + switch (c) { + case 'h': + nbody_print_usage(argc, argv); + exit(0); + case 'f': + conf.force_generation = 1; + break; + case 'o': + conf.save_result = 1; + break; + case 'O': + conf.save_result = 0; + break; + case 'c': + conf.check_result = 1; + break; + case 'C': + conf.check_result = 0; + break; + case 'p': + conf.num_particles = atoi(optarg); + break; + case 'b': + conf.blocksize = atoi(optarg); + break; + case 't': + conf.timesteps = atoi(optarg); + break; + case '?': + exit(1); + default: + abort(); + } + } + + return conf; +} + +double nbody_compute_throughput(int num_particles, int timesteps, double elapsed_time) +{ + double interactions_per_timestep = 0; +#if defined(_BIGO_N2) + interactions_per_timestep = (double)(num_particles)*(double)(num_particles); +#elif defined(_BIGO_NlogN) + interactions_per_timestep = (double)(num_particles)*(double)(LOG2(num_particles)); +#elif defined(_BIGO_N) + interactions_per_timestep = (double)(num_particles); +#endif + return (((interactions_per_timestep * (double)timesteps) / elapsed_time) / 1000000.0); +} + +double get_time(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)(ts.tv_sec) + (double)ts.tv_nsec * 1.0e-9; +} diff --git a/src/nbody/src/common/common.h b/src/nbody/src/common/common.h new file mode 100644 index 0000000000000000000000000000000000000000..2919a5f4e5708a848ee81ffc9b3f664585ab2304 --- /dev/null +++ b/src/nbody/src/common/common.h @@ -0,0 +1,72 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#ifndef COMMON_H +#define COMMON_H + +#include + +// BIGO definition +#ifndef BIGO +#define BIGO N2 +#define _BIGO_N2 +#endif + +#define PART 1024 +#define PAGE_SIZE 4096 + +#define TOLERATED_ERROR 0.0008 + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) +#define LOG2(a) (31-__builtin_clz((a))) +#define MOD(a, b) ((a) < 0 ? ((((a) % (b)) + (b)) % (b)) : ((a) % (b))) +#define ROUNDUP(x, y) ((((x) + (y - 1)) / y) * y) +#define STRINGIFY(s) #s +#define TOSTRING(s) STRINGIFY(s) +#define CALCULATE_FORCES(s) calculate_forces_##s +#define XCALCULATE_FORCES(s) CALCULATE_FORCES(s) +#define calculate_forces XCALCULATE_FORCES(BIGO) + +static const float gravitational_constant = 6.6726e-11f; /* N(m/kg)2 */ +static const float default_domain_size_x = 1.0e+10; /* m */ +static const float default_domain_size_y = 1.0e+10; /* m */ +static const float default_domain_size_z = 1.0e+10; /* m */ +static const float default_mass_maximum = 1.0e+28; /* kg */ +static const float default_time_interval = 1.0e+0; /* s */ +static const int default_seed = 12345; +static const char* default_name = "data/nbody"; +static const int default_blocksize = 2048; +static const int default_num_particles = 16384; +static const int default_timesteps = 10; +static const int default_save_result = 0; +static const int default_check_result = 0; +static const int default_force_generation = 0; + +typedef struct { + float domain_size_x; + float domain_size_y; + float domain_size_z; + float mass_maximum; + float time_interval; + int seed; + const char* name; + int num_particles; + int blocksize; + int num_blocks; + int timesteps; + int save_result; + int check_result; + int force_generation; +} nbody_conf_t; + +nbody_conf_t nbody_get_conf(int argc, char **argv); +double nbody_compute_throughput(int num_particles, int timesteps, double elapsed_time); +void * nbody_alloc(size_t size); +double get_time(void); + +#endif // COMMON_H diff --git a/src/nbody/src/plain/main.c b/src/nbody/src/plain/main.c new file mode 100644 index 0000000000000000000000000000000000000000..62f7e2673fa2d1c406a6850bcdee9a63436762b6 --- /dev/null +++ b/src/nbody/src/plain/main.c @@ -0,0 +1,38 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "plain/nbody.h" + +#include +#include +#include +#include +#include + + +int main(int argc, char** argv) +{ + nbody_conf_t conf = nbody_get_conf(argc, argv); + + conf.num_blocks = 1; + conf.num_particles = ROUNDUP(conf.num_particles, MIN_PARTICLES); + assert(conf.num_particles >= MIN_PARTICLES); + assert(conf.timesteps > 0); + + nbody_t nbody = nbody_setup(&conf); + + double start = get_time(); + nbody_solve(&nbody, conf.timesteps, conf.time_interval); + double end = get_time(); + + nbody_stats(&nbody, &conf, end - start); + + if (conf.save_result && !conf.force_generation) nbody_save_particles(&nbody); + if (conf.check_result) nbody_check(&nbody); + nbody_free(&nbody); + return 0; +} diff --git a/src/nbody/src/plain/nbody.h b/src/nbody/src/plain/nbody.h new file mode 100644 index 0000000000000000000000000000000000000000..d044022d57ec919bf6fe7a0706b15a30a5e2202f --- /dev/null +++ b/src/nbody/src/plain/nbody.h @@ -0,0 +1,73 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#ifndef NBODY_H +#define NBODY_H + +#include "common/common.h" + +#include +#include +#include + +#define MIN_PARTICLES 4096 +#define PARTICLE_MEMBERS 8 +#define FORCE_MEMBERS 3 + +// Solver structures +typedef struct { + float *position_x; /* m */ + float *position_y; /* m */ + float *position_z; /* m */ + float *velocity_x; /* m/s */ + float *velocity_y; /* m/s */ + float *velocity_z; /* m/s */ + float *mass; /* kg */ + float *weight; + void *_ptr; + size_t _size; +} particles_t; + +typedef struct { + float *x; /* x */ + float *y; /* y */ + float *z; /* z */ + void *_ptr; + size_t _size; +} forces_t; + +// Application structures +typedef struct { + size_t size; + char name[1000]; +} nbody_file_t; + +typedef struct { + particles_t particles; + forces_t forces; + int num_particles; + int timesteps; + nbody_file_t file; +} nbody_t; + +// Solver function +void nbody_solve(nbody_t *nbody, const int timesteps, const float time_interval); + +// Auxiliary functions +nbody_t nbody_setup(const nbody_conf_t *conf); +void nbody_setup_particles(nbody_t *nbody, const nbody_conf_t *conf); +void nbody_setup_forces(nbody_t *nbody, const nbody_conf_t *conf); +void nbody_link_particles(particles_t *reference, int num_particles, float *addr); +void nbody_particle_init(const nbody_conf_t *conf, particles_t *part); +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time); +void nbody_save_particles(const nbody_t *nbody); +void nbody_free(nbody_t *nbody); +void nbody_check(const nbody_t *nbody); +int nbody_compare_particles(const particles_t *local, const particles_t* reference, int num_particles); + +#endif // NBODY_H + diff --git a/src/nbody/src/plain/solver_omp.c b/src/nbody/src/plain/solver_omp.c new file mode 100644 index 0000000000000000000000000000000000000000..55c9182d7f080f25d0d3f481e1505e6943ad6ba0 --- /dev/null +++ b/src/nbody/src/plain/solver_omp.c @@ -0,0 +1,114 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "plain/nbody.h" + +#include +#include +#include +#include +#include + +#include + +static void calculate_forces_N2(forces_t *forces, const particles_t *particles, const int num_particles); +static void update_particles(particles_t *particles, forces_t *forces, const int num_particles, const float time_interval); + +void nbody_solve(nbody_t *nbody, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + + int num_particles = nbody->num_particles; + particles_t *particles = &nbody->particles; + forces_t *forces = &nbody->forces; + + #pragma omp parallel + for (int t = 0; t < timesteps; t++) { + calculate_forces(forces, particles, num_particles); + update_particles(particles, forces, num_particles, time_interval); + } +} + +void calculate_forces_N2(forces_t *forces, const particles_t *particles, const int num_particles) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const float *pos_x = particles->position_x; + const float *pos_y = particles->position_y; + const float *pos_z = particles->position_z; + const float *mass = particles->mass; + + #pragma omp for schedule(static) + for (int i = 0; i < num_particles; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < num_particles; j++) { + const float diff_x = pos_x[j] - pos_x[i]; + const float diff_y = pos_y[j] - pos_y[i]; + const float diff_z = pos_z[j] - pos_z[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (distance_squared != 0.0f) { + force = (mass[i] / (distance_squared * distance)) * (mass[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +void update_particles(particles_t *particles, forces_t *forces, const int num_particles, const float time_interval) +{ + #pragma omp for schedule(static) + for (int e = 0; e < num_particles; e++) { + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + #pragma omp single + memset(forces->_ptr, 0, forces->_size); +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + (void) conf; + + printf("bigo, %s, threads, %d, timesteps, %d, total_particles, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), omp_get_max_threads(), nbody->timesteps, nbody->num_particles, + time, nbody_compute_throughput(nbody->num_particles, nbody->timesteps, time) + ); +} diff --git a/src/nbody/src/plain/solver_seq.c b/src/nbody/src/plain/solver_seq.c new file mode 100644 index 0000000000000000000000000000000000000000..864745aba59369f14b318f5f06a102f9c3ed4ecb --- /dev/null +++ b/src/nbody/src/plain/solver_seq.c @@ -0,0 +1,108 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "plain/nbody.h" + +#include +#include +#include +#include +#include + + +static void calculate_forces_N2(forces_t *forces, const particles_t *particles, const int num_particles); +static void update_particles(particles_t *particles, forces_t *forces, const int num_particles, const float time_interval); + +void nbody_solve(nbody_t *nbody, const int timesteps, const float time_interval) +{ + assert(nbody != NULL); + + int num_particles = nbody->num_particles; + particles_t *particles = &nbody->particles; + forces_t *forces = &nbody->forces; + + for (int t = 0; t < timesteps; t++) { + calculate_forces(forces, particles, num_particles); + update_particles(particles, forces, num_particles, time_interval); + } +} + +void calculate_forces_N2(forces_t *forces, const particles_t *particles, const int num_particles) +{ + float *x = forces->x; + float *y = forces->y; + float *z = forces->z; + + const float *pos_x = particles->position_x; + const float *pos_y = particles->position_y; + const float *pos_z = particles->position_z; + const float *mass = particles->mass; + + for (int i = 0; i < num_particles; i++) { + float fx = x[i], fy = y[i], fz = z[i]; + for (int j = 0; j < num_particles; j++) { + const float diff_x = pos_x[j] - pos_x[i]; + const float diff_y = pos_y[j] - pos_y[i]; + const float diff_z = pos_z[j] - pos_z[i]; + + const float distance_squared = diff_x * diff_x + diff_y * diff_y + diff_z * diff_z; + const float distance = sqrtf(distance_squared); + + float force = 0.0f; + if (distance_squared != 0.0f) { + force = (mass[i] / (distance_squared * distance)) * (mass[j] * gravitational_constant); + } + fx += force * diff_x; + fy += force * diff_y; + fz += force * diff_z; + } + x[i] = fx; + y[i] = fy; + z[i] = fz; + } +} + +void update_particles(particles_t *particles, forces_t *forces, const int num_particles, const float time_interval) +{ + for (int e = 0; e < num_particles; e++) { + const float mass = particles->mass[e]; + const float velocity_x = particles->velocity_x[e]; + const float velocity_y = particles->velocity_y[e]; + const float velocity_z = particles->velocity_z[e]; + const float position_x = particles->position_x[e]; + const float position_y = particles->position_y[e]; + const float position_z = particles->position_z[e]; + + const float time_by_mass = time_interval / mass; + const float half_time_interval = 0.5f * time_interval; + + const float velocity_change_x = forces->x[e] * time_by_mass; + const float velocity_change_y = forces->y[e] * time_by_mass; + const float velocity_change_z = forces->z[e] * time_by_mass; + const float position_change_x = velocity_x + velocity_change_x * half_time_interval; + const float position_change_y = velocity_y + velocity_change_y * half_time_interval; + const float position_change_z = velocity_z + velocity_change_z * half_time_interval; + + particles->velocity_x[e] = velocity_x + velocity_change_x; + particles->velocity_y[e] = velocity_y + velocity_change_y; + particles->velocity_z[e] = velocity_z + velocity_change_z; + particles->position_x[e] = position_x + position_change_x; + particles->position_y[e] = position_y + position_change_y; + particles->position_z[e] = position_z + position_change_z; + } + + memset(forces->_ptr, 0, forces->_size); +} + +void nbody_stats(const nbody_t *nbody, const nbody_conf_t *conf, double time) +{ + (void) conf; + printf("bigo, %s, timesteps, %d, total_particles, %d, time, %.2f, performance, %.2f\n", + TOSTRING(BIGO), nbody->timesteps, nbody->num_particles, time, + nbody_compute_throughput(nbody->num_particles, nbody->timesteps, time) + ); +} diff --git a/src/nbody/src/plain/utils.c b/src/nbody/src/plain/utils.c new file mode 100644 index 0000000000000000000000000000000000000000..451c79b6ed87cb939daceee5f54e6a4d340c34ca --- /dev/null +++ b/src/nbody/src/plain/utils.c @@ -0,0 +1,261 @@ +// +// This file is part of NBody and is licensed under the terms contained +// in the LICENSE file. +// +// Copyright (C) 2021 Barcelona Supercomputing Center (BSC) +// + +#include "plain/nbody.h" + +#undef NDEBUG +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void nbody_particle_init(const nbody_conf_t *conf, particles_t *part) +{ + const int num_particles = conf->num_particles; + + for (int i = 0; i < num_particles; i++) { + part->position_x[i] = conf->domain_size_x * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->position_y[i] = conf->domain_size_y * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->position_z[i] = conf->domain_size_z * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->mass[i] = conf->mass_maximum * ((float)rand() / ((float)RAND_MAX + 1.0)); + part->weight[i] = gravitational_constant * part->mass[i]; + } +} + +int nbody_compare_particles(const particles_t *local, const particles_t *reference, int num_particles) +{ + double error = 0.0; + int count = 0; + for (int i = 0; i < num_particles; i++) { + if ((local->position_x[i] != reference->position_x[i]) || + (local->position_y[i] != reference->position_y[i]) || + (local->position_z[i] != reference->position_z[i])) { + error += fabs(((local->position_x[i] - reference->position_x[i])*100.0) / reference->position_x[i]) + + fabs(((local->position_y[i] - reference->position_y[i])*100.0) / reference->position_y[i]) + + fabs(((local->position_z[i] - reference->position_z[i])*100.0) / reference->position_z[i]); + count++; + } + } + + double relative_error = (count != 0) ? error / (3.0 * count) : 0.0; + if ((count * 100.0) / (num_particles) > 0.6 || relative_error > TOLERATED_ERROR) { + return 0; + } + return 1; +} + +void nbody_check(const nbody_t *nbody) +{ + char fname[1024]; + sprintf(fname, "%s.ref", nbody->file.name); + if (access(fname, F_OK) != 0) { + fprintf(stderr, "Warning: %s file does not exist. Skipping the check...\n", fname); + return; + } + + const int fd = open(fname, O_RDONLY, 0); + assert(fd >= 0); + + void *reference_addr = mmap(NULL, nbody->file.size, PROT_READ, MAP_SHARED, fd, 0); + assert(reference_addr != MAP_FAILED); + + particles_t reference; + nbody_link_particles(&reference, nbody->num_particles, reference_addr); + + if (nbody_compare_particles(&nbody->particles, &reference, nbody->num_particles)) { + printf("Result validation: OK\n"); + } else { + printf("Result validation: ERROR\n"); + } + + int err = munmap(reference_addr, nbody->file.size); + assert(!err); + + err = close(fd); + assert(!err); +} + +static void nbody_generate_particles(const nbody_conf_t *conf, nbody_file_t *file) +{ + char fname[1024]; + sprintf(fname, "%s.in", file->name); + + if (!access(fname, F_OK)) { + return; + } + + struct stat st = {0}; + if (stat("data", &st) == -1) { + mkdir("data", 0755); + } + + const int fd = open(fname, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IRGRP|S_IROTH); + assert(fd >= 0); + + const int size = file->size; + assert(size % PAGE_SIZE == 0); + + int err = ftruncate(fd, size); + assert(!err); + + void *addr = mmap(NULL, size, PROT_WRITE|PROT_READ, MAP_SHARED, fd, 0); + assert(addr != MAP_FAILED); + + particles_t particles; + nbody_link_particles(&particles, conf->num_particles, addr); + + // Initialize the particles + nbody_particle_init(conf, &particles); + + err = munmap(addr, size); + assert(!err); + + err = close(fd); + assert(!err); +} + +static void nbody_load_particles(nbody_t *nbody, const nbody_conf_t *conf, nbody_file_t *file) +{ + (void) conf; + + char fname[1024]; + sprintf(fname, "%s.in", file->name); + + const int fd = open(fname, O_RDONLY, 0); + assert(fd >= 0); + + void * addr = mmap(NULL, file->size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); + assert(addr != MAP_FAILED); + + memcpy(nbody->particles._ptr, addr, nbody->particles._size); + + int err = munmap(addr, file->size); + assert(!err); + + err = close(fd); + assert(!err); +} + +static void nbody_setup_file(nbody_t *nbody, const nbody_conf_t *conf) +{ + nbody_file_t file; + file.size = nbody->particles._size; + sprintf(file.name, "%s-%s-%d-%d-%d", conf->name, TOSTRING(BIGO), conf->num_particles, conf->num_particles, conf->timesteps); + nbody->file = file; +} + +void nbody_link_particles(particles_t *particles, int num_particles, float *addr) +{ + const size_t size = num_particles * sizeof(float); + particles->position_x = addr + 0; + particles->position_y = addr + 1; + particles->position_z = addr + 2; + particles->velocity_x = addr + 3; + particles->velocity_y = addr + 4; + particles->velocity_z = addr + 5; + particles->mass = addr + 6; + particles->weight = addr + 7; + particles->_ptr = addr; + particles->_size = size * PARTICLE_MEMBERS; +} + +void nbody_setup_particles(nbody_t *nbody, const nbody_conf_t *conf) +{ + void *addr = nbody_alloc(nbody->particles._size); + nbody_link_particles(&nbody->particles, nbody->num_particles, addr); + + nbody_generate_particles(conf, &nbody->file); + nbody_load_particles(nbody, conf, &nbody->file); +} + +void nbody_setup_forces(nbody_t *nbody, const nbody_conf_t *conf) +{ + char *base_addr = nbody_alloc(nbody->forces._size); + size_t array_size = conf->num_particles * sizeof(float); + + nbody->forces._ptr = base_addr; + nbody->forces.x = (float *) (base_addr + (0 * array_size)); + nbody->forces.y = (float *) (base_addr + (1 * array_size)); + nbody->forces.z = (float *) (base_addr + (2 * array_size)); +} + +nbody_t nbody_setup(const nbody_conf_t *conf) +{ + nbody_t nbody; + nbody.num_particles = conf->num_particles; + nbody.timesteps = conf->timesteps; + + // Compute the size of the structures + nbody.particles._size = nbody.num_particles * PARTICLE_MEMBERS * sizeof(float); + nbody.forces._size = nbody.num_particles * FORCE_MEMBERS * sizeof(float); + + // Set up the file + nbody_setup_file(&nbody, conf); + + if (conf->force_generation) { + void *addr = nbody_alloc(nbody.particles._size); + nbody_link_particles(&nbody.particles, nbody.num_particles, addr); + + // Initialize the particles + nbody_particle_init(conf, &nbody.particles); + + // Initialize the forces + nbody_setup_forces(&nbody, conf); + } + else { + // Initialize the particles + nbody_setup_particles(&nbody, conf); + + // Initialize the forces + nbody_setup_forces(&nbody, conf); + } + + return nbody; +} + +void nbody_save_particles(const nbody_t *nbody) +{ + char fname[1024]; + sprintf(fname, "%s.out", nbody->file.name); + + const int fd = open(fname, O_RDWR|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH); + assert(fd >= 0); + + const int size = nbody->file.size; + assert(size % PAGE_SIZE == 0); + + int err = ftruncate(fd, size); + assert(!err); + + void *particles = mmap(NULL, size, PROT_WRITE|PROT_READ, MAP_SHARED, fd, 0); + assert(particles != MAP_FAILED); + + memcpy(particles, nbody->particles._ptr, size); + + err = munmap(particles, size); + assert(!err); + + err = close(fd); + assert(!err); +} + +void nbody_free(nbody_t *nbody) +{ + int err = munmap(nbody->particles._ptr, nbody->particles._size); + err |= munmap(nbody->forces._ptr, nbody->forces._size); + assert(!err); +} +