Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend 'Derivs' to cover 6th and 8th order finite difference #307

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 148 additions & 74 deletions Derivs/src/derivs.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ using namespace Loop;
template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_copy(const GF3D5<T> &gf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const GF3D2<const T> &gf0,
const vect<T, dim> dx) {
const GridDescBaseDevice &grid, const GF3D2<const T> &gf0) {
using vreal = simd<T>;
using vbool = simdl<T>;
constexpr std::size_t vsize = std::tuple_size_v<vreal>;
Expand All @@ -27,6 +26,24 @@ calc_copy(const GF3D5<T> &gf, const GF3D5layout layout,
});
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_copy(const vec<GF3D5<T>, dim> &gf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const vec<GF3D2<const T>, dim> &gf0) {
for (int a = 0; a < dim; ++a)
calc_copy<CI, CJ, CK>(gf(a), layout, grid, gf0(a));
};
lwJi marked this conversation as resolved.
Show resolved Hide resolved

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void calc_copy(const smat<GF3D5<T>, dim> &gf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const smat<GF3D2<const T>, dim> &gf0) {
for (int a = 0; a < dim; ++a)
for (int b = a; b < dim; ++b)
calc_copy<CI, CJ, CK>(gf(a, b), layout, grid, gf0(a, b));
};

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
Expand All @@ -44,9 +61,8 @@ calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 2 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<2>(gf0, mask1, p.I, dx);
Expand All @@ -60,9 +76,8 @@ calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 4 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<4>(gf0, mask1, p.I, dx);
Expand All @@ -76,9 +91,8 @@ calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 6 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<6>(gf0, mask1, p.I, dx);
Expand All @@ -87,11 +101,50 @@ calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
});
break;

case 8:
grid.loop_int_device<CI, CJ, CK, vsize>(
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 8 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<8>(gf0, mask1, p.I, dx);
gf.store(mask, index, val);
dgf.store(mask, index, dval);
});
break;

default:
CCTK_VERROR("Unsupported derivative order %d", deriv_order);
}
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs(const vec<GF3D5<T>, dim> &gf,
const vec<vec<GF3D5<T>, dim>, dim> &dgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const vec<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order) {
for (int a = 0; a < dim; ++a)
calc_derivs<CI, CJ, CK>(gf(a), dgf(a), layout, grid, gf0(a), dx,
deriv_order);
}
lwJi marked this conversation as resolved.
Show resolved Hide resolved

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs(const smat<GF3D5<T>, dim> &gf,
const smat<vec<GF3D5<T>, dim>, dim> &dgf, const GF3D5layout layout,
const GridDescBaseDevice &grid,
const smat<GF3D2<const T>, dim> &gf0, const vect<T, dim> dx,
const int deriv_order) {
for (int a = 0; a < dim; ++a)
for (int b = a; b < dim; ++b)
calc_derivs<CI, CJ, CK>(gf(a, b), dgf(a, b), layout, grid, gf0(a, b), dx,
deriv_order);
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
Expand All @@ -109,9 +162,8 @@ calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 2 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<2>(gf0, mask1, p.I, dx);
Expand All @@ -127,9 +179,8 @@ calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 4 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<4>(gf0, mask1, p.I, dx);
Expand All @@ -145,9 +196,8 @@ calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take account of ghost points
const vbool mask1 =
mask_for_loop_tail<vbool>(p.i, p.imax + deriv_order / 2);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 6 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<6>(gf0, mask1, p.I, dx);
Expand All @@ -158,84 +208,108 @@ calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
});
break;

case 8:
grid.loop_int_device<CI, CJ, CK, vsize>(
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
// Take ghost points into account
const vbool mask1 = mask_for_loop_tail<vbool>(p.i, p.imax + 8 / 2);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<8>(gf0, mask1, p.I, dx);
const auto ddval = calc_deriv2<8>(gf0, mask1, p.I, dx);
gf.store(mask, index, val);
dgf.store(mask, index, dval);
ddgf.store(mask, index, ddval);
});
break;

default:
CCTK_VERROR("Unsupported derivative order %d", deriv_order);
}
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void calc_derivs2(
const vec<GF3D5<T>, dim> &gf, const vec<vec<GF3D5<T>, dim>, dim> &dgf,
const vec<smat<GF3D5<T>, dim>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const vec<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order) {
for (int a = 0; a < dim; ++a)
calc_derivs2<CI, CJ, CK>(gf(a), dgf(a), ddgf(a), layout, grid, gf0(a), dx,
deriv_order);
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void calc_derivs2(
const smat<GF3D5<T>, dim> &gf, const smat<vec<GF3D5<T>, dim>, dim> &dgf,
const smat<smat<GF3D5<T>, dim>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const smat<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order) {
for (int a = 0; a < dim; ++a)
for (int b = a; b < dim; ++b)
calc_derivs2<CI, CJ, CK>(gf(a, b), dgf(a, b), ddgf(a, b), layout, grid,
gf0(a, b), dx, deriv_order);
}

////////////////////////////////////////////////////////////////////////////////

// Template instantiations

using T = CCTK_REAL;

template CCTK_DEVICE CCTK_HOST Arith::vec<Arith::simd<T>, Loop::dim>
calc_deriv<2>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<Arith::simd<T>, Loop::dim>
calc_deriv<4>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<Arith::simd<T>, Loop::dim>
calc_deriv<6>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template CCTK_DEVICE CCTK_HOST Arith::vec<T, Loop::dim>
calc_deriv<2>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<T, Loop::dim>
calc_deriv<4>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<T, Loop::dim>
calc_deriv<6>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template CCTK_DEVICE CCTK_HOST Arith::smat<Arith::simd<T>, Loop::dim>
calc_deriv2<2>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<Arith::simd<T>, Loop::dim>
calc_deriv2<4>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<Arith::simd<T>, Loop::dim>
calc_deriv2<6>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template CCTK_DEVICE CCTK_HOST Arith::smat<T, Loop::dim>
calc_deriv2<2>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<T, Loop::dim>
calc_deriv2<4>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<T, Loop::dim>
calc_deriv2<6>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template void calc_copy<0, 0, 0>(const GF3D5<T> &gf, const GF3D5layout layout,
const GridDescBaseDevice &grid,
const GF3D2<const T> &gf0,
const vect<T, dim> dx);
const GF3D2<const T> &gf0);

template void calc_copy<0, 0, 0>(const vec<GF3D5<T>, dim> &gf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const vec<GF3D2<const T>, dim> &gf0);

template void calc_copy<0, 0, 0>(const smat<GF3D5<T>, dim> &gf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const smat<GF3D2<const T>, dim> &gf0);
template void
calc_derivs<0, 0, 0>(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
const GF3D5layout layout, const GridDescBaseDevice &grid,
const GF3D2<const T> &gf0, const vect<T, dim> dx,
const int deriv_order);

template void calc_derivs<0, 0, 0>(const vec<GF3D5<T>, dim> &gf,
const vec<vec<GF3D5<T>, dim>, dim> &dgf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const vec<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx,
const int deriv_order);

template void calc_derivs<0, 0, 0>(const smat<GF3D5<T>, dim> &gf,
const smat<vec<GF3D5<T>, dim>, dim> &dgf,
const GF3D5layout layout,
const GridDescBaseDevice &grid,
const smat<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx,
const int deriv_order);

template void
calc_derivs2<0, 0, 0>(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
const smat<GF3D5<T>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const GF3D2<const T> &gf0,
const vect<T, dim> dx, const int deriv_order);

template void calc_derivs2<0, 0, 0>(
const vec<GF3D5<T>, dim> &gf, const vec<vec<GF3D5<T>, dim>, dim> &dgf,
const vec<smat<GF3D5<T>, dim>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const vec<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order);

template void calc_derivs2<0, 0, 0>(
const smat<GF3D5<T>, dim> &gf, const smat<vec<GF3D5<T>, dim>, dim> &dgf,
const smat<smat<GF3D5<T>, dim>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const smat<GF3D2<const T>, dim> &gf0,
const vect<T, dim> dx, const int deriv_order);

} // namespace Derivs
Loading