Add baseline and simple LLAMA port of n-body

This makes it easier to define this comparison in the PhD thesis.
alpaka-group · Jan 10, 2024 · d7ea189 · d7ea189
1 parent e51a145
commit d7ea189
Show file tree

Hide file tree

Showing 5 changed files with 177 additions and 0 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -156,6 +156,7 @@ if (LLAMA_BUILD_EXAMPLES)
 	# general examples
 	add_subdirectory("examples/vectoradd")
 	add_subdirectory("examples/nbody")
+	add_subdirectory("examples/nbody_code_comp")
 	add_subdirectory("examples/heatequation")
 	add_subdirectory("examples/viewcopy")
 	add_subdirectory("examples/bufferguard")

diff --git a/examples/nbody_code_comp/CMakeLists.txt b/examples/nbody_code_comp/CMakeLists.txt
@@ -0,0 +1,16 @@
+# Copyright 2024 Bernhard Manfred Gruber
+# SPDX-License-Identifier: MPL-2.0
+
+cmake_minimum_required (VERSION 3.18.3)
+
+project(llama-nbody-baseline CXX)
+add_executable(${PROJECT_NAME} nbody_baseline.cpp)
+target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20)
+
+project(llama-nbody-ported CXX)
+if (NOT TARGET llama::llama)
+	find_package(llama CONFIG REQUIRED)
+endif()
+add_executable(${PROJECT_NAME} nbody_ported.cpp)
+target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20)
+target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama)
diff --git a/examples/nbody_code_comp/README.md b/examples/nbody_code_comp/README.md
@@ -0,0 +1,3 @@
+These code examples are intended to show the difficulty of porting a simple code to LLAMA.
+`nbody_baseline.cpp` contains a simple n-body simulation using an AoS layout.
+`nbody_ported.cpp` is the corresponding LLAMA port with the same layout.
diff --git a/examples/nbody_code_comp/nbody_baseline.cpp b/examples/nbody_code_comp/nbody_baseline.cpp
@@ -0,0 +1,90 @@
+// Copyright 2024 Bernhard Manfred Gruber
+// SPDX-License-Identifier: MPL-2.0
+
+// clang-format off
+
+#include <random>
+#include <span>
+#include <vector>
+
+using FP = float;
+constexpr FP timestep = 0.0001f, eps2 = 0.01f;
+constexpr int steps = 5, problemSize = 64 * 1024;
+
+struct Vec3 {
+    FP x, y, z;
+
+    auto operator*=(Vec3 v) -> Vec3& {
+        x *= v.x;
+        y *= v.y;
+        z *= v.z;
+        return *this;
+    }
+
+    auto operator+=(Vec3 v) -> Vec3& {
+        x += v.x;
+        y += v.y;
+        z += v.z;
+        return *this;
+    }
+
+    friend auto operator-(Vec3 a, Vec3 b) -> Vec3 {
+        return Vec3{a.x - b.x, a.y - b.y, a.z - b.z};
+    }
+
+    friend auto operator*(Vec3 a, FP s) -> Vec3 {
+        return Vec3{a.x * s, a.y * s, a.z * s};
+    }
+};
+
+struct Particle {
+    Vec3 pos, vel;
+    FP mass;
+};
+
+inline void pPInteraction(Particle& pi, const Particle& pj) {
+    auto dist = pi.pos - pj.pos;
+    dist *= dist;
+    const auto distSqr = eps2 + dist.x + dist.y + dist.z;
+    const auto distSixth = distSqr * distSqr * distSqr;
+    const auto invDistCube = FP{1} / std::sqrt(distSixth);
+    const auto sts = pj.mass * timestep * invDistCube;
+    pi.vel += dist * sts;
+}
+
+void update(std::span<Particle> particles) {
+#pragma GCC ivdep
+    for(int i = 0; i < problemSize; i++) {
+        Particle pi = particles[i];
+        for(std::size_t j = 0; j < problemSize; ++j)
+            pPInteraction(pi, particles[j]);
+        particles[i].vel = pi.vel;
+    }
+}
+
+void move(std::span<Particle> particles) {
+#pragma GCC ivdep
+    for(int i = 0; i < problemSize; i++)
+        particles[i].pos += particles[i].vel * timestep;
+}
+
+auto main() -> int {
+    auto particles = std::vector<Particle>(problemSize);
+    std::default_random_engine engine;
+    std::normal_distribution<FP> dist(FP{0}, FP{1});
+    for(auto& p : particles) {
+        p.pos.x = dist(engine);
+        p.pos.y = dist(engine);
+        p.pos.z = dist(engine);
+        p.vel.x = dist(engine) / FP{10};
+        p.vel.y = dist(engine) / FP{10};
+        p.vel.z = dist(engine) / FP{10};
+        p.mass = dist(engine) / FP{100};
+    }
+
+    for(int s = 0; s < steps; ++s) {
+        update(particles);
+        ::move(particles);
+    }
+    return 0;
+}
diff --git a/examples/nbody_code_comp/nbody_ported.cpp b/examples/nbody_code_comp/nbody_ported.cpp
@@ -0,0 +1,67 @@
+// Copyright 2024 Bernhard Manfred Gruber
+// SPDX-License-Identifier: MPL-2.0
+
+// clang-format off
+
+#include "llama/llama.hpp"
+
+#include <random>
+#include <vector>
+
+using FP = float;
+constexpr FP timestep = 0.0001f, eps2 = 0.01f;
+constexpr int steps = 5, problemSize = 64 * 1024;
+
+struct Pos{}; struct Vel{}; struct X{}; struct Y{}; struct Z{}; struct Mass{};
+using V3 = llama::Record<llama::Field<X, FP>, llama::Field<Y, FP>, llama::Field<Z, FP>>;
+using Particle = llama::Record<llama::Field<Pos, V3>, llama::Field<Vel, V3>, llama::Field<Mass, FP>>;
+
+LLAMA_FN_HOST_ACC_INLINE void pPInteraction(auto&& pi, auto&& pj) {
+    auto dist = pi(Pos{}) - pj(Pos{});
+    dist *= dist;
+    const auto distSqr = eps2 + dist(X{}) + dist(Y{}) + dist(Z{});
+    const auto distSixth = distSqr * distSqr * distSqr;
+    const auto invDistCube = FP{1} / std::sqrt(distSixth);
+    const auto sts = pj(Mass{}) * timestep * invDistCube;
+    pi(Vel{}) += dist * sts;
+}
+
+void update(auto&& particles) {
+    LLAMA_INDEPENDENT_DATA
+    for(int i = 0; i < problemSize; i++) {
+        llama::One<Particle> pi = particles[i];
+        for(std::size_t j = 0; j < problemSize; ++j)
+            pPInteraction(pi, particles[j]);
+        particles[i](Vel{}) = pi(Vel{});
+    }
+}
+
+void move(auto&& particles) {
+    LLAMA_INDEPENDENT_DATA
+    for(int i = 0; i < problemSize; i++)
+        particles[i](Pos{}) += particles[i](Vel{}) * timestep;
+}
+
+auto main() -> int {
+    using ArrayExtents = llama::ArrayExtentsDynamic<int, 1>;
+    const auto extents = ArrayExtents{problemSize};
+    auto mapping = llama::mapping::AoS<ArrayExtents, Particle>{extents};
+    auto particles = llama::allocViewUninitialized(mapping);
+    std::default_random_engine engine;
+    std::normal_distribution<FP> dist(FP{0}, FP{1});
+    for(auto&& p : particles) {
+        p(Pos{}, X{}) = dist(engine);
+        p(Pos{}, Y{}) = dist(engine);
+        p(Pos{}, Z{}) = dist(engine);
+        p(Vel{}, X{}) = dist(engine) / FP{10};
+        p(Vel{}, Y{}) = dist(engine) / FP{10};
+        p(Vel{}, Z{}) = dist(engine) / FP{10};
+        p(Mass{}) = dist(engine) / FP{100};
+    }
+
+    for(int s = 0; s < steps; ++s) {
+        update(particles);
+        ::move(particles);
+    }
+    return 0;
+}