-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add baseline and simple LLAMA port of n-body
This makes it easier to define this comparison in the PhD thesis.
- Loading branch information
1 parent
e51a145
commit d7ea189
Showing
5 changed files
with
177 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Copyright 2024 Bernhard Manfred Gruber | ||
# SPDX-License-Identifier: MPL-2.0 | ||
|
||
cmake_minimum_required (VERSION 3.18.3) | ||
|
||
project(llama-nbody-baseline CXX) | ||
add_executable(${PROJECT_NAME} nbody_baseline.cpp) | ||
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20) | ||
|
||
project(llama-nbody-ported CXX) | ||
if (NOT TARGET llama::llama) | ||
find_package(llama CONFIG REQUIRED) | ||
endif() | ||
add_executable(${PROJECT_NAME} nbody_ported.cpp) | ||
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20) | ||
target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
These code examples are intended to show the difficulty of porting a simple code to LLAMA. | ||
`nbody_baseline.cpp` contains a simple n-body simulation using an AoS layout. | ||
`nbody_ported.cpp` is the corresponding LLAMA port with the same layout. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
// Copyright 2024 Bernhard Manfred Gruber | ||
// SPDX-License-Identifier: MPL-2.0 | ||
|
||
// clang-format off | ||
|
||
#include <random> | ||
#include <span> | ||
#include <vector> | ||
|
||
using FP = float; | ||
constexpr FP timestep = 0.0001f, eps2 = 0.01f; | ||
constexpr int steps = 5, problemSize = 64 * 1024; | ||
|
||
struct Vec3 { | ||
FP x, y, z; | ||
|
||
auto operator*=(Vec3 v) -> Vec3& { | ||
x *= v.x; | ||
y *= v.y; | ||
z *= v.z; | ||
return *this; | ||
} | ||
|
||
auto operator+=(Vec3 v) -> Vec3& { | ||
x += v.x; | ||
y += v.y; | ||
z += v.z; | ||
return *this; | ||
} | ||
|
||
friend auto operator-(Vec3 a, Vec3 b) -> Vec3 { | ||
return Vec3{a.x - b.x, a.y - b.y, a.z - b.z}; | ||
} | ||
|
||
friend auto operator*(Vec3 a, FP s) -> Vec3 { | ||
return Vec3{a.x * s, a.y * s, a.z * s}; | ||
} | ||
}; | ||
|
||
struct Particle { | ||
Vec3 pos, vel; | ||
FP mass; | ||
}; | ||
|
||
inline void pPInteraction(Particle& pi, const Particle& pj) { | ||
auto dist = pi.pos - pj.pos; | ||
dist *= dist; | ||
const auto distSqr = eps2 + dist.x + dist.y + dist.z; | ||
const auto distSixth = distSqr * distSqr * distSqr; | ||
const auto invDistCube = FP{1} / std::sqrt(distSixth); | ||
const auto sts = pj.mass * timestep * invDistCube; | ||
pi.vel += dist * sts; | ||
} | ||
|
||
void update(std::span<Particle> particles) { | ||
#pragma GCC ivdep | ||
for(int i = 0; i < problemSize; i++) { | ||
Particle pi = particles[i]; | ||
for(std::size_t j = 0; j < problemSize; ++j) | ||
pPInteraction(pi, particles[j]); | ||
particles[i].vel = pi.vel; | ||
} | ||
} | ||
|
||
void move(std::span<Particle> particles) { | ||
#pragma GCC ivdep | ||
for(int i = 0; i < problemSize; i++) | ||
particles[i].pos += particles[i].vel * timestep; | ||
} | ||
|
||
auto main() -> int { | ||
auto particles = std::vector<Particle>(problemSize); | ||
std::default_random_engine engine; | ||
std::normal_distribution<FP> dist(FP{0}, FP{1}); | ||
for(auto& p : particles) { | ||
p.pos.x = dist(engine); | ||
p.pos.y = dist(engine); | ||
p.pos.z = dist(engine); | ||
p.vel.x = dist(engine) / FP{10}; | ||
p.vel.y = dist(engine) / FP{10}; | ||
p.vel.z = dist(engine) / FP{10}; | ||
p.mass = dist(engine) / FP{100}; | ||
} | ||
|
||
for(int s = 0; s < steps; ++s) { | ||
update(particles); | ||
::move(particles); | ||
} | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
// Copyright 2024 Bernhard Manfred Gruber | ||
// SPDX-License-Identifier: MPL-2.0 | ||
|
||
// clang-format off | ||
|
||
#include "llama/llama.hpp" | ||
|
||
#include <random> | ||
#include <vector> | ||
|
||
using FP = float; | ||
constexpr FP timestep = 0.0001f, eps2 = 0.01f; | ||
constexpr int steps = 5, problemSize = 64 * 1024; | ||
|
||
struct Pos{}; struct Vel{}; struct X{}; struct Y{}; struct Z{}; struct Mass{}; | ||
using V3 = llama::Record<llama::Field<X, FP>, llama::Field<Y, FP>, llama::Field<Z, FP>>; | ||
using Particle = llama::Record<llama::Field<Pos, V3>, llama::Field<Vel, V3>, llama::Field<Mass, FP>>; | ||
|
||
LLAMA_FN_HOST_ACC_INLINE void pPInteraction(auto&& pi, auto&& pj) { | ||
auto dist = pi(Pos{}) - pj(Pos{}); | ||
dist *= dist; | ||
const auto distSqr = eps2 + dist(X{}) + dist(Y{}) + dist(Z{}); | ||
const auto distSixth = distSqr * distSqr * distSqr; | ||
const auto invDistCube = FP{1} / std::sqrt(distSixth); | ||
const auto sts = pj(Mass{}) * timestep * invDistCube; | ||
pi(Vel{}) += dist * sts; | ||
} | ||
|
||
void update(auto&& particles) { | ||
LLAMA_INDEPENDENT_DATA | ||
for(int i = 0; i < problemSize; i++) { | ||
llama::One<Particle> pi = particles[i]; | ||
for(std::size_t j = 0; j < problemSize; ++j) | ||
pPInteraction(pi, particles[j]); | ||
particles[i](Vel{}) = pi(Vel{}); | ||
} | ||
} | ||
|
||
void move(auto&& particles) { | ||
LLAMA_INDEPENDENT_DATA | ||
for(int i = 0; i < problemSize; i++) | ||
particles[i](Pos{}) += particles[i](Vel{}) * timestep; | ||
} | ||
|
||
auto main() -> int { | ||
using ArrayExtents = llama::ArrayExtentsDynamic<int, 1>; | ||
const auto extents = ArrayExtents{problemSize}; | ||
auto mapping = llama::mapping::AoS<ArrayExtents, Particle>{extents}; | ||
auto particles = llama::allocViewUninitialized(mapping); | ||
std::default_random_engine engine; | ||
std::normal_distribution<FP> dist(FP{0}, FP{1}); | ||
for(auto&& p : particles) { | ||
p(Pos{}, X{}) = dist(engine); | ||
p(Pos{}, Y{}) = dist(engine); | ||
p(Pos{}, Z{}) = dist(engine); | ||
p(Vel{}, X{}) = dist(engine) / FP{10}; | ||
p(Vel{}, Y{}) = dist(engine) / FP{10}; | ||
p(Vel{}, Z{}) = dist(engine) / FP{10}; | ||
p(Mass{}) = dist(engine) / FP{100}; | ||
} | ||
|
||
for(int s = 0; s < steps; ++s) { | ||
update(particles); | ||
::move(particles); | ||
} | ||
return 0; | ||
} |