Skip to content

Commit

Permalink
Merge branch 'master' of github.com:CFT-HY/HILA
Browse files Browse the repository at this point in the history
  • Loading branch information
Haaaaron committed Dec 20, 2024
2 parents 1f05f6a + 51dc4de commit 362dc52
Show file tree
Hide file tree
Showing 19 changed files with 150 additions and 134 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/generate-doxygen-for-pages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ jobs:
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v2
uses: actions/deploy-pages@v2
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
/// @brief Example usage of the multicanonical tools
/// @details Simple working method for using multicanonical update on a scalar field where
/// the system exhibits critical freezing.
///
////////////////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <stdlib.h>
Expand Down
2 changes: 1 addition & 1 deletion docs/config
Original file line number Diff line number Diff line change
Expand Up @@ -2258,7 +2258,7 @@ INCLUDE_FILE_PATTERNS =
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.

PREDEFINED = RELEASE
PREDEFINED = RELEASE=1 CUDA=1

# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The
Expand Down
16 changes: 8 additions & 8 deletions hilapp/src/loop_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,15 @@ void TopLevelVisitor::handle_function_call_in_loop(Stmt *s, bool is_assignment)
// '\n';

// check if lambda function call:
if(CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)){
if(isLambdaCallOperator(MD)){
if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)) {
if (isLambdaCallOperator(MD)) {
// CXXRecordDecl *RD = MD->getParent();
// check if lambda is defined inside the loop:
for(var_decl &vd : var_decl_list){
if(vd.scope >= 0 && vd.decl->hasInit()){
if(LambdaExpr *LE = dyn_cast<LambdaExpr>(vd.decl->getInit())){
if(LE->getCallOperator() == MD){
// found local decl for the lambda
for (var_decl &vd : var_decl_list) {
if (vd.scope >= 0 && vd.decl->hasInit()) {
if (LambdaExpr *LE = dyn_cast<LambdaExpr>(vd.decl->getInit())) {
if (LE->getCallOperator() == MD) {
// found local decl for the lambda
ci.is_loop_local_lambda = true;
break;
}
Expand Down Expand Up @@ -769,5 +769,5 @@ bool TopLevelVisitor::handle_special_loop_function(CallExpr *Call) {
void TopLevelVisitor::process_loop_functions() {

// spin off to a new visitor
visit_loop_functions(loop_function_calls);
visit_loop_function_calls(loop_function_calls);
}
4 changes: 2 additions & 2 deletions hilapp/src/loop_function_visitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ class loopFunctionVisitor : public GeneralVisitor, public RecursiveASTVisitor<lo
}

///////////////////////////////////////////////////////////////////////////////////
/// Loop through functions seen - almost copy of the visit_loop_functions below, but
/// Loop through functions seen - almost copy of the visit_loop_function_calls below, but
/// callable from the visitor itself
///////////////////////////////////////////////////////////////////////////////////

Expand All @@ -650,7 +650,7 @@ class loopFunctionVisitor : public GeneralVisitor, public RecursiveASTVisitor<lo
/// Entry point from top level here
///////////////////////////////////////////////////////////////////////////////////

void TopLevelVisitor::visit_loop_functions(std::vector<call_info_struct> &calls) {
void TopLevelVisitor::visit_loop_function_calls(std::vector<call_info_struct> &calls) {

visited_decls.clear();

Expand Down
5 changes: 4 additions & 1 deletion hilapp/src/toplevelvisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2569,7 +2569,10 @@ void TopLevelVisitor::specialize_function_or_method(FunctionDecl *f) {
// Declarations with a trailing return type behave weirdly, they have empty
// ReturnTypeSourceRange, but the getDeclaredReturnType is the explicit return
// type.
if (TheRewriter.getRewrittenText(f->getReturnTypeSourceRange()) == "") {

if (f->getReturnType().getAsString() == "void") {
funcBuf.insert(0, " void ", true, true);
} else if (TheRewriter.getRewrittenText(f->getReturnTypeSourceRange()) == "") {
// So this one has a trailing return type. Just add auto.
funcBuf.insert(0, " auto ", true, true);
} else {
Expand Down
2 changes: 1 addition & 1 deletion hilapp/src/toplevelvisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class TopLevelVisitor : public GeneralVisitor, public RecursiveASTVisitor<TopLev

void process_loop_functions();

void visit_loop_functions(std::vector<call_info_struct> &calls);
void visit_loop_function_calls(std::vector<call_info_struct> &calls);

bool handle_special_loop_function(CallExpr *Call);

Expand Down
6 changes: 0 additions & 6 deletions libraries/plumbing/backend_gpu/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ void free_device_rng();
#include <cuda_runtime.h>
#include <cub/cub.cuh>

// THis set in params.h now
// #define N_threads 256 // Threads per block for CUDA TODO: make configurable?

using gpuError = cudaError;
#define gpuSuccess cudaSuccess

Expand Down Expand Up @@ -101,9 +98,6 @@ using gpuError = cudaError;

//#include <hipcub/hipcub.hpp>*

// Set in params.h now
// #define N_threads 256 // Threads per block for CUDAs

using gpuError = hipError_t;
#define gpuSuccess hipSuccess

Expand Down
25 changes: 20 additions & 5 deletions libraries/plumbing/backend_gpu/field_storage_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,23 @@ __device__ inline auto field_storage<T>::get(const unsigned i,
// assert(i < field_alloc_size);
using base_t = hila::arithmetic_type<T>;
constexpr unsigned n_elements = sizeof(T) / sizeof(base_t);
T value;
base_t *value_f = (base_t *)&value;
base_t *fp = (base_t *)(fieldbuf);
union {
T value;
base_t arr[n_elements];
} u;
const base_t *fp = (base_t *)(fieldbuf);
for (unsigned e = 0; e < n_elements; e++) {
value_f[e] = fp[e * field_alloc_size + i];
u.arr[e] = fp[e * field_alloc_size + i];
}
return value;
return u.value;

// T value;
// base_t *value_f = (base_t *)&value;
// base_t *fp = (base_t *)(fieldbuf);
// for (unsigned e = 0; e < n_elements; e++) {
// value_f[e] = fp[e * field_alloc_size + i];
// }
// return value;
}

template <typename T>
Expand All @@ -51,6 +61,7 @@ __device__ inline void field_storage<T>::set(const T &value, const unsigned i,
// assert(i < field_alloc_size);
using base_t = hila::arithmetic_type<T>;
constexpr unsigned n_elements = sizeof(T) / sizeof(base_t);

const base_t *value_f = (base_t *)&value;
base_t *fp = (base_t *)(fieldbuf);
for (unsigned e = 0; e < n_elements; e++) {
Expand Down Expand Up @@ -150,6 +161,8 @@ void field_storage<T>::gather_elements(T *RESTRICT buffer, const unsigned *RESTR
gpuFree(d_buffer);
}

#ifdef SPECIAL_BOUNDARY_CONDITIONS

/// A kernel that gathers elements negated
// requires unary -
template <typename T, std::enable_if_t<hila::has_unary_minus<T>::value, int> = 0>
Expand Down Expand Up @@ -194,6 +207,8 @@ void field_storage<T>::gather_elements_negated(T *RESTRICT buffer,
}
}

#endif

template <typename T>
__global__ void gather_comm_elements_kernel(field_storage<T> field, T *buffer, unsigned *site_index,
const int n, const unsigned field_alloc_size) {
Expand Down
4 changes: 2 additions & 2 deletions libraries/plumbing/backend_gpu/hila_gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ void gpu_device_info() {
hila::out << " Max grid dimensions: [ " << props.maxGridSize[0] << ", "
<< props.maxGridSize[1] << ", " << props.maxGridSize[2] << " ]" << '\n';

hila::out << "Threads in use: " << N_threads << '\n';
hila::out << "Thread block size used: " << N_threads << '\n';

// Following should be OK in open MPI
#ifdef OPEN_MPI
Expand Down Expand Up @@ -355,7 +355,7 @@ void gpu_device_info() {
<< props.maxThreadsDim[1] << ", " << props.maxThreadsDim[2] << " ]" << '\n';
hila::out << " Max grid dimensions: [ " << props.maxGridSize[0] << ", "
<< props.maxGridSize[1] << ", " << props.maxGridSize[2] << " ]" << '\n';
hila::out << "Threads in use: " << N_threads << '\n';
hila::out << "Thread block size used: " << N_threads << '\n';
}
}

Expand Down
3 changes: 1 addition & 2 deletions libraries/plumbing/com_mpi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ hila::timer reduction_timer("MPI reduction");
hila::timer reduction_wait_timer("MPI reduction wait");
hila::timer broadcast_timer("MPI broadcast");
hila::timer send_timer("MPI send field");
hila::timer cancel_send_timer("MPI cancel send");
hila::timer cancel_receive_timer("MPI cancel receive");
hila::timer drop_comms_timer("MPI wait drop_comms");
hila::timer partition_sync_timer("partition sync");

// let us house the partitions-struct here
Expand Down
3 changes: 1 addition & 2 deletions libraries/plumbing/com_mpi.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ extern hila::timer
reduction_wait_timer,
broadcast_timer,
send_timer,
cancel_send_timer,
cancel_receive_timer,
drop_comms_timer,
partition_sync_timer;
// clang-format on

Expand Down
55 changes: 28 additions & 27 deletions libraries/plumbing/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,6 @@ class Field {
void wait_gather(Direction d, Parity p) const;
void gather(Direction d, Parity p = ALL) const;
void drop_comms(Direction d, Parity p) const;
void cancel_comm(Direction d, Parity p) const;

/**
* @brief Create a periodically shifted copy of the field
Expand Down Expand Up @@ -1903,42 +1902,44 @@ Field<T> Field<T>::shift(const CoordinateVector &v) const {


/// @internal
/// drop_comms(): if field is changed or deleted,
/// cancel ongoing communications. This should happen very seldom,
/// only if there are "by-hand" start_gather operations and these are not needed
/// drop_comms(): if field is changed or deleted, 'cancel' ongoing communications. Now just wait
/// for the communications to finish don't actually cancel them. Still separate this from using
/// only wait_gather since this needs to be called in ~Field() and we need to check if there are
/// ongoing communications. User gets nottified if there was redundant communications if
/// drop_comms_timer is in the run print out.
///
/// This should happen very seldom, only if there are "by-hand" start_gather operations and these
/// are not needed.
template <typename T>
void Field<T>::drop_comms(Direction d, Parity p) const {

if (is_comm_initialized()) {
if (is_gather_started(d, ALL))
cancel_comm(d, ALL);
if (is_gather_started(d, ALL)) {
drop_comms_timer.start();
wait_gather(d, ALL);
drop_comms_timer.stop();
}
if (p != ALL) {
if (is_gather_started(d, p))
cancel_comm(d, p);
if (is_gather_started(d, p)) {
drop_comms_timer.start();
wait_gather(d, p);
drop_comms_timer.stop();
}
} else {
if (is_gather_started(d, EVEN))
cancel_comm(d, EVEN);
if (is_gather_started(d, ODD))
cancel_comm(d, ODD);
if (is_gather_started(d, EVEN)) {
drop_comms_timer.start();
wait_gather(d, EVEN);
drop_comms_timer.stop();
}
if (is_gather_started(d, ODD)) {
drop_comms_timer.start();
wait_gather(d, ODD);
drop_comms_timer.stop();
}
}
}
}

/// @internal cancel ongoing send and receive
template <typename T>
void Field<T>::cancel_comm(Direction d, Parity p) const {
if (lattice.nn_comminfo[d].from_node.rank != hila::myrank()) {
cancel_receive_timer.start();
MPI_Cancel(&fs->receive_request[(int)p - 1][d]);
cancel_receive_timer.stop();
}
if (lattice.nn_comminfo[d].to_node.rank != hila::myrank()) {
cancel_send_timer.start();
MPI_Cancel(&fs->send_request[(int)p - 1][d]);
cancel_send_timer.stop();
}
}


/// @internal And a convenience combi function
template <typename T>
Expand Down
11 changes: 2 additions & 9 deletions libraries/plumbing/field_comm.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,6 @@ dir_mask_t Field<T>::start_gather(Direction d, Parity p) const {
/// @internal
/// wait_gather(): Wait for communication at parity par from
/// Direction d completes the communication in the function.
/// If the communication has not started yet, also calls
/// start_gather()
///
/// NOTE: This will be called even if the field is marked const.
/// Therefore this function is const, even though it does change
Expand Down Expand Up @@ -426,9 +424,7 @@ void Field<T>::wait_gather(Direction d, Parity p) const {
// care

// check here consistency, this should never happen
if (p != ALL && is_gather_started(d, p) && is_gather_started(d, ALL)) {
exit(1);
}
assert(!(p != ALL && is_gather_started(d, p) && is_gather_started(d, ALL)));

Parity par;
int n_wait = 1;
Expand All @@ -449,15 +445,12 @@ void Field<T>::wait_gather(Direction d, Parity p) const {
par = EVEN;
else if (is_gather_started(d, EVEN) && is_gather_started(d, ODD)) {
n_wait = 2; // need to wait for both!
par = ALL;
par = EVEN; // will be flipped
} else {
exit(1);
}
}

if (n_wait == 2)
par = EVEN; // we'll flip both

for (int wait_i = 0; wait_i < n_wait; ++wait_i) {

int par_i = (int)par - 1;
Expand Down
10 changes: 0 additions & 10 deletions libraries/plumbing/has_unary_minus.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,6 @@ class has_unary_minus {
static constexpr bool value = false;
};

/**
*@brief Conditionally reture `bool` type `true` if type `T` has unary `-` operator
*
*@details If the type T has been implemented `-T` (unary `-`) operator, i.e.
*\code{.cpp}
* T T::operator-() const { ... }
*\endcode,
*`has_unary_minus::value` is `true`. This is needed for antiperiodic boundary conditions
* @note `value` is false for `unsigned` type, whereas c++ allows `-unsigned`
*/
template <typename T>
class has_unary_minus<
T, typename std::enable_if_t<!std::is_unsigned<hila::arithmetic_type<T>>::value &&
Expand Down
26 changes: 25 additions & 1 deletion libraries/plumbing/hilapp_mpi.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,15 @@ enum MPI_Op : int { MPI_SUM, MPI_PROD, MPI_MAX, MPI_MIN, MPI_MAXLOC, MPI_MINLOC

typedef void *MPI_Comm;
typedef void *MPI_Request;
typedef int MPI_Status;
typedef struct ompi_status_public_t MPI_Status;
typedef void *MPI_Comm;
typedef int MPI_Fint;
typedef void *MPI_Errhandler;
#define MPI_IN_PLACE nullptr
#define MPI_COMM_WORLD nullptr
#define MPI_STATUS_IGNORE nullptr
#define MPI_ERRORS_RETURN nullptr
#define MPI_REQUEST_NULL nullptr
#define MPI_SUCCESS 1

enum MPI_thread_level : int {
Expand All @@ -51,6 +54,19 @@ enum MPI_thread_level : int {
MPI_THREAD_MULTIPLE
};

struct ompi_status_public_t {
/* These fields are publicly defined in the MPI specification.
User applications may freely read from these fields. */
int MPI_SOURCE;
int MPI_TAG;
int MPI_ERROR;
/* The following two fields are internal to the Open MPI
implementation and should not be accessed by MPI applications.
They are subject to change at any time. These are not the
droids you're looking for. */
int _cancelled;
size_t _ucount;
};

int MPI_Init(int *argc, char ***argv);

Expand All @@ -62,6 +78,8 @@ int MPI_Comm_size(MPI_Comm comm, int *size);

int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm);

int MPI_Comm_set_errhandler(MPI_Comm comm, MPI_Errhandler errhandler);

int MPI_Bcast(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm);

int MPI_Reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
Expand Down Expand Up @@ -99,6 +117,12 @@ int MPI_Ibarrier(MPI_Comm comm, MPI_Request *request);

int MPI_Cancel(MPI_Request *request);

int MPI_Test(MPI_Request *request, int *flag, MPI_Status *status);

int MPI_Test_cancelled(const MPI_Status *status, int *flag);

int MPI_Request_free(MPI_Request *request);

int MPI_Abort(MPI_Comm comm, int errorcode);

MPI_Fint MPI_Comm_c2f(MPI_Comm comm);
Expand Down
Loading

0 comments on commit 362dc52

Please sign in to comment.