Skip to content
Snippets Groups Projects
Commit 3c5922d7 authored by Christian Engwer's avatar Christian Engwer
Browse files

[!670] Loopsimd alignment

Merge branch 'loopsimd_alignment' into 'master'

ref:core/dune-common\> This MR adds a additional template parameter to
LoopSIMD to specify the alignment.

The default alignment is the alignment of the underlying type.

I'm not sure how to determine the proper alignment for some return types or
RebindType. To mitigate that problem I define conversion constructors and
assignment operators from LoopSIMD types with different alignment.

This MR depends on [!669].

Feel free to push at the source branch!

See merge request [!670]

  [!669]: gitlab.dune-project.org/NoneNone/merge_requests/669
  [!670]: gitlab.dune-project.org/core/dune-common/merge_requests/670
parents 32c2fdd3 bbe30918
Branches
Tags
1 merge request!670Loopsimd alignment
Pipeline #24939 passed with warnings
......@@ -30,19 +30,28 @@ namespace Dune {
* and is usable with the SIMD-interface.
*/
template<class T, std::size_t S>
class LoopSIMD : public std::array<T,S> {
template<class T, std::size_t S, std::size_t A = alignof(T)>
class alignas(A) LoopSIMD : public std::array<T,S> {
public:
//default constructor
LoopSIMD() {}
LoopSIMD() {
assert(reinterpret_cast<uintptr_t>(this) % std::min(alignof(LoopSIMD<T,S,A>),alignof(std::max_align_t)) == 0);
}
// broadcast constructor initializing the content with a given value
LoopSIMD(Simd::Scalar<T> i) : LoopSIMD() {
this->fill(i);
}
template<std::size_t OA>
explicit LoopSIMD(const LoopSIMD<T,S,OA>& other)
: std::array<T,S>(other)
{
assert(reinterpret_cast<uintptr_t>(this) % std::min(alignof(LoopSIMD<T,S,A>),alignof(std::max_align_t)) == 0);
}
/*
* Definition of basic operators
*/
......@@ -64,7 +73,7 @@ namespace Dune {
//Unary operators
#define DUNE_SIMD_LOOP_UNARY_OP(SYMBOL) \
auto operator SYMBOL() const { \
LoopSIMD<T,S> out; \
LoopSIMD<T,S,A> out; \
for(std::size_t i=0; i<S; i++){ \
out[i] = SYMBOL((*this)[i]); \
} \
......@@ -77,7 +86,7 @@ namespace Dune {
DUNE_SIMD_LOOP_UNARY_OP(~);
auto operator!() const {
LoopSIMD<Simd::Mask<T>,S> out;
Simd::Mask<LoopSIMD<T,S,A>> out;
for(std::size_t i=0; i<S; i++){
out[i] = !((*this)[i]);
}
......@@ -88,7 +97,7 @@ namespace Dune {
//Postfix operators
#define DUNE_SIMD_LOOP_POSTFIX_OP(SYMBOL) \
auto operator SYMBOL(int){ \
LoopSIMD<T,S> out = *this; \
LoopSIMD<T,S,A> out = *this; \
SYMBOL(*this); \
return out; \
} \
......@@ -106,7 +115,8 @@ namespace Dune {
} \
return *this; \
} \
auto operator SYMBOL(const LoopSIMD<T,S> &v) { \
\
auto operator SYMBOL(const LoopSIMD<T,S,A> &v) { \
for(std::size_t i=0; i<S; i++){ \
(*this)[i] SYMBOL v[i]; \
} \
......@@ -129,26 +139,26 @@ namespace Dune {
//Arithmetic operators
#define DUNE_SIMD_LOOP_BINARY_OP(SYMBOL) \
template<class T, std::size_t S> \
auto operator SYMBOL(const LoopSIMD<T,S> &v, const Simd::Scalar<T> s) { \
LoopSIMD<T,S> out; \
template<class T, std::size_t S, std::size_t A> \
auto operator SYMBOL(const LoopSIMD<T,S,A> &v, const Simd::Scalar<T> s) { \
LoopSIMD<T,S,A> out; \
for(std::size_t i=0; i<S; i++){ \
out[i] = v[i] SYMBOL s; \
} \
return out; \
} \
template<class T, std::size_t S> \
auto operator SYMBOL(const Simd::Scalar<T> s, const LoopSIMD<T,S> &v) { \
LoopSIMD<T,S> out; \
template<class T, std::size_t S, std::size_t A> \
auto operator SYMBOL(const Simd::Scalar<T> s, const LoopSIMD<T,S,A> &v) { \
LoopSIMD<T,S,A> out; \
for(std::size_t i=0; i<S; i++){ \
out[i] = s SYMBOL v[i]; \
} \
return out; \
} \
template<class T, std::size_t S> \
auto operator SYMBOL(const LoopSIMD<T,S> &v, \
const LoopSIMD<T,S> &w) { \
LoopSIMD<T,S> out; \
template<class T, std::size_t S, std::size_t A> \
auto operator SYMBOL(const LoopSIMD<T,S,A> &v, \
const LoopSIMD<T,S,A> &w) { \
LoopSIMD<T,S,A> out; \
for(std::size_t i=0; i<S; i++){ \
out[i] = v[i] SYMBOL w[i]; \
} \
......@@ -170,18 +180,18 @@ namespace Dune {
//Bitshift operators
#define DUNE_SIMD_LOOP_BITSHIFT_OP(SYMBOL) \
template<class T, std::size_t S, class U> \
auto operator SYMBOL(const LoopSIMD<T,S> &v, const U s) { \
LoopSIMD<T,S> out; \
template<class T, std::size_t S, std::size_t A, class U> \
auto operator SYMBOL(const LoopSIMD<T,S,A> &v, const U s) { \
LoopSIMD<T,S,A> out; \
for(std::size_t i=0; i<S; i++){ \
out[i] = v[i] SYMBOL s; \
} \
return out; \
} \
template<class T, std::size_t S, class U> \
auto operator SYMBOL(const LoopSIMD<T,S> &v, \
const LoopSIMD<U,S> &w) { \
LoopSIMD<T,S> out; \
template<class T, std::size_t S, std::size_t A, class U, std::size_t AU> \
auto operator SYMBOL(const LoopSIMD<T,S,A> &v, \
const LoopSIMD<U,S,AU> &w) { \
LoopSIMD<T,S,A> out; \
for(std::size_t i=0; i<S; i++){ \
out[i] = v[i] SYMBOL w[i]; \
} \
......@@ -196,26 +206,26 @@ namespace Dune {
//Comparison operators
#define DUNE_SIMD_LOOP_COMPARISON_OP(SYMBOL) \
template<class T, std::size_t S, class U> \
auto operator SYMBOL(const LoopSIMD<T,S> &v, const U s) { \
LoopSIMD<Simd::Mask<T>,S> out; \
template<class T, std::size_t S, std::size_t A, class U> \
auto operator SYMBOL(const LoopSIMD<T,S,A> &v, const U s) { \
Simd::Mask<LoopSIMD<T,S,A>> out; \
for(std::size_t i=0; i<S; i++){ \
out[i] = v[i] SYMBOL s; \
} \
return out; \
} \
template<class T, std::size_t S> \
auto operator SYMBOL(const Simd::Scalar<T> s, const LoopSIMD<T,S> &v) { \
LoopSIMD<Simd::Mask<T>,S> out; \
template<class T, std::size_t S, std::size_t A> \
auto operator SYMBOL(const Simd::Scalar<T> s, const LoopSIMD<T,S,A> &v) { \
Simd::Mask<LoopSIMD<T,S,A>> out; \
for(std::size_t i=0; i<S; i++){ \
out[i] = s SYMBOL v[i]; \
} \
return out; \
} \
template<class T, std::size_t S> \
auto operator SYMBOL(const LoopSIMD<T,S> &v, \
const LoopSIMD<T,S> &w) { \
LoopSIMD<Simd::Mask<T>,S> out; \
template<class T, std::size_t S, std::size_t A> \
auto operator SYMBOL(const LoopSIMD<T,S,A> &v, \
const LoopSIMD<T,S,A> &w) { \
Simd::Mask<LoopSIMD<T,S,A>> out; \
for(std::size_t i=0; i<S; i++){ \
out[i] = v[i] SYMBOL w[i]; \
} \
......@@ -233,26 +243,26 @@ namespace Dune {
//Boolean operators
#define DUNE_SIMD_LOOP_BOOLEAN_OP(SYMBOL) \
template<class T, std::size_t S> \
auto operator SYMBOL(const LoopSIMD<T,S> &v, const Simd::Scalar<T> s) { \
LoopSIMD<Simd::Mask<T>,S> out; \
template<class T, std::size_t S, std::size_t A> \
auto operator SYMBOL(const LoopSIMD<T,S,A> &v, const Simd::Scalar<T> s) { \
Simd::Mask<LoopSIMD<T,S,A>> out; \
for(std::size_t i=0; i<S; i++){ \
out[i] = v[i] SYMBOL s; \
} \
return out; \
} \
template<class T, std::size_t S> \
auto operator SYMBOL(const Simd::Mask<T> s, const LoopSIMD<T,S> &v) { \
LoopSIMD<Simd::Mask<T>,S> out; \
template<class T, std::size_t S, std::size_t A> \
auto operator SYMBOL(const Simd::Mask<T> s, const LoopSIMD<T,S,A> &v) { \
Simd::Mask<LoopSIMD<T,S,A>> out; \
for(std::size_t i=0; i<S; i++){ \
out[i] = s SYMBOL v[i]; \
} \
return out; \
} \
template<class T, std::size_t S> \
auto operator SYMBOL(const LoopSIMD<T,S> &v, \
const LoopSIMD<T,S> &w) { \
LoopSIMD<Simd::Mask<T>,S> out; \
template<class T, std::size_t S, std::size_t A> \
auto operator SYMBOL(const LoopSIMD<T,S,A> &v, \
const LoopSIMD<T,S,A> &w) { \
Simd::Mask<LoopSIMD<T,S,A>> out; \
for(std::size_t i=0; i<S; i++){ \
out[i] = v[i] SYMBOL w[i]; \
} \
......@@ -265,8 +275,8 @@ namespace Dune {
#undef DUNE_SIMD_LOOP_BOOLEAN_OP
//prints a given LoopSIMD
template<class T, std::size_t S>
std::ostream& operator<< (std::ostream &os, const LoopSIMD<T,S> &v) {
template<class T, std::size_t S, std::size_t A>
std::ostream& operator<< (std::ostream &os, const LoopSIMD<T,S,A> &v) {
os << "[";
for(std::size_t i=0; i<S-1; i++) {
os << v[i] << ", ";
......@@ -283,64 +293,64 @@ namespace Dune {
*/
//Implementation of SIMD-interface-types
template<class T, std::size_t S>
struct ScalarType<LoopSIMD<T,S>> {
template<class T, std::size_t S, std::size_t A>
struct ScalarType<LoopSIMD<T,S,A>> {
using type = Simd::Scalar<T>;
};
template<class U, class T, std::size_t S>
struct RebindType<U, LoopSIMD<T,S>> {
using type = LoopSIMD<Simd::Rebind<U, T>,S>;
template<class U, class T, std::size_t S, std::size_t A>
struct RebindType<U, LoopSIMD<T,S,A>> {
using type = LoopSIMD<Simd::Rebind<U, T>,S,std::max(A, alignof(Simd::Rebind<U, T>))>;
};
//Implementation of SIMD-interface-functionality
template<class T, std::size_t S>
struct LaneCount<LoopSIMD<T,S>> : index_constant<S*lanes<T>()> {};
template<class T, std::size_t S, std::size_t A>
struct LaneCount<LoopSIMD<T,S,A>> : index_constant<S*lanes<T>()> {};
template<class T, std::size_t S>
auto lane(ADLTag<5>, std::size_t l, LoopSIMD<T,S> &&v)
template<class T, std::size_t S, std::size_t A>
auto lane(ADLTag<5>, std::size_t l, LoopSIMD<T,S,A> &&v)
-> decltype(std::move(Simd::lane(l%lanes<T>(), v[l/lanes<T>()])))
{
return std::move(Simd::lane(l%lanes<T>(), v[l/lanes<T>()]));
}
template<class T, std::size_t S>
auto lane(ADLTag<5>, std::size_t l, const LoopSIMD<T,S> &v)
template<class T, std::size_t S, std::size_t A>
auto lane(ADLTag<5>, std::size_t l, const LoopSIMD<T,S,A> &v)
-> decltype(Simd::lane(l%lanes<T>(), v[l/lanes<T>()]))
{
return Simd::lane(l%lanes<T>(), v[l/lanes<T>()]);
}
template<class T, std::size_t S>
auto lane(ADLTag<5>, std::size_t l, LoopSIMD<T,S> &v)
template<class T, std::size_t S, std::size_t A>
auto lane(ADLTag<5>, std::size_t l, LoopSIMD<T,S,A> &v)
-> decltype(Simd::lane(l%lanes<T>(), v[l/lanes<T>()]))
{
return Simd::lane(l%lanes<T>(), v[l/lanes<T>()]);
}
template<class T, std::size_t S>
auto cond(ADLTag<5>, LoopSIMD<Simd::Mask<T>,S> mask,
LoopSIMD<T,S> ifTrue, LoopSIMD<T,S> ifFalse) {
LoopSIMD<T,S> out;
template<class T, std::size_t S, std::size_t AM, std::size_t AD>
auto cond(ADLTag<5>, Simd::Mask<LoopSIMD<T,S,AM>> mask,
LoopSIMD<T,S,AD> ifTrue, LoopSIMD<T,S,AD> ifFalse) {
LoopSIMD<T,S,AD> out;
for(std::size_t i=0; i<S; i++) {
out[i] = Simd::cond(mask[i], ifTrue[i], ifFalse[i]);
}
return out;
}
template<class M, class T, std::size_t S>
template<class M, class T, std::size_t S, std::size_t A>
auto cond(ADLTag<5, std::is_same<bool, Simd::Scalar<M> >::value
&& Simd::lanes<M>() == Simd::lanes<LoopSIMD<T,S> >()>,
M mask, LoopSIMD<T,S> ifTrue, LoopSIMD<T,S> ifFalse)
&& Simd::lanes<M>() == Simd::lanes<LoopSIMD<T,S,A> >()>,
M mask, LoopSIMD<T,S,A> ifTrue, LoopSIMD<T,S,A> ifFalse)
{
LoopSIMD<T,S> out;
LoopSIMD<T,S,A> out;
for(auto l : range(Simd::lanes(mask)))
Simd::lane(l, out) = Simd::lane(l, mask) ? Simd::lane(l, ifTrue) : Simd::lane(l, ifFalse);
return out;
}
template<class M, std::size_t S>
bool anyTrue(ADLTag<5>, LoopSIMD<M,S> mask) {
template<class M, std::size_t S, std::size_t A>
bool anyTrue(ADLTag<5>, LoopSIMD<M,S,A> mask) {
bool out = false;
for(std::size_t i=0; i<S; i++) {
out |= Simd::anyTrue(mask[i]);
......@@ -348,8 +358,8 @@ namespace Dune {
return out;
}
template<class M, std::size_t S>
bool allTrue(ADLTag<5>, LoopSIMD<M,S> mask) {
template<class M, std::size_t S, std::size_t A>
bool allTrue(ADLTag<5>, LoopSIMD<M,S,A> mask) {
bool out = true;
for(std::size_t i=0; i<S; i++) {
out &= Simd::allTrue(mask[i]);
......@@ -357,8 +367,8 @@ namespace Dune {
return out;
}
template<class M, std::size_t S>
bool anyFalse(ADLTag<5>, LoopSIMD<M,S> mask) {
template<class M, std::size_t S, std::size_t A>
bool anyFalse(ADLTag<5>, LoopSIMD<M,S,A> mask) {
bool out = false;
for(std::size_t i=0; i<S; i++) {
out |= Simd::anyFalse(mask[i]);
......@@ -366,8 +376,8 @@ namespace Dune {
return out;
}
template<class M, std::size_t S>
bool allFalse(ADLTag<5>, LoopSIMD<M,S> mask) {
template<class M, std::size_t S, std::size_t A>
bool allFalse(ADLTag<5>, LoopSIMD<M,S,A> mask) {
bool out = true;
for(std::size_t i=0; i<S; i++) {
out &= Simd::allFalse(mask[i]);
......@@ -387,11 +397,11 @@ namespace Dune {
*/
#define DUNE_SIMD_LOOP_CMATH_UNARY_OP(expr) \
template<class T, std::size_t S, typename Sfinae = \
template<class T, std::size_t S, std::size_t A, typename Sfinae = \
typename std::enable_if_t<!std::is_integral<Simd::Scalar<T>>::value> > \
auto expr(const LoopSIMD<T,S> &v) { \
auto expr(const LoopSIMD<T,S,A> &v) { \
using std::expr; \
LoopSIMD<T,S> out; \
LoopSIMD<T,S,A> out; \
for(std::size_t i=0; i<S; i++) { \
out[i] = expr(v[i]); \
} \
......@@ -400,9 +410,9 @@ namespace Dune {
static_assert(true, "expecting ;")
#define DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN(expr, returnType) \
template<class T, std::size_t S, typename Sfinae = \
template<class T, std::size_t S, std::size_t A, typename Sfinae = \
typename std::enable_if_t<!std::is_integral<Simd::Scalar<T>>::value> > \
auto expr(const LoopSIMD<T,S> &v) { \
auto expr(const LoopSIMD<T,S,A> &v) { \
using std::expr; \
LoopSIMD<returnType,S> out; \
for(std::size_t i=0; i<S; i++) { \
......@@ -480,21 +490,21 @@ namespace Dune {
* More overloads will be provided should the need arise.
*/
#define DUNE_SIMD_LOOP_STD_UNARY_OP(expr) \
template<class T, std::size_t S> \
auto expr(const LoopSIMD<T,S> &v) { \
#define DUNE_SIMD_LOOP_STD_UNARY_OP(expr) \
template<class T, std::size_t S, std::size_t A> \
auto expr(const LoopSIMD<T,S,A> &v) { \
using std::expr; \
LoopSIMD<T,S> out; \
LoopSIMD<T,S,A> out; \
for(std::size_t i=0; i<S; i++) { \
out[i] = expr(v[i]); \
} \
return out; \
} \
\
template<class T, std::size_t S> \
auto expr(const LoopSIMD<std::complex<T>,S> &v) { \
template<class T, std::size_t S, std::size_t A> \
auto expr(const LoopSIMD<std::complex<T>,S,A> &v) { \
using std::expr; \
LoopSIMD<T,S> out; \
LoopSIMD<T,S,A> out; \
for(std::size_t i=0; i<S; i++) { \
out[i] = expr(v[i]); \
} \
......@@ -508,10 +518,10 @@ namespace Dune {
#undef DUNE_SIMD_LOOP_STD_UNARY_OP
#define DUNE_SIMD_LOOP_STD_BINARY_OP(expr) \
template<class T, std::size_t S> \
auto expr(const LoopSIMD<T,S> &v, const LoopSIMD<T,S> &w) { \
template<class T, std::size_t S, std::size_t A> \
auto expr(const LoopSIMD<T,S,A> &v, const LoopSIMD<T,S,A> &w) { \
using std::expr; \
LoopSIMD<T,S> out; \
LoopSIMD<T,S,A> out; \
for(std::size_t i=0; i<S; i++) { \
out[i] = expr(v[i],w[i]); \
} \
......@@ -525,33 +535,33 @@ namespace Dune {
#undef DUNE_SIMD_LOOP_STD_BINARY_OP
namespace MathOverloads {
template<class T, std::size_t S>
auto isNaN(const LoopSIMD<T,S> &v, PriorityTag<3>, ADLTag) {
LoopSIMD<Simd::Mask<T>,S> out;
template<class T, std::size_t S, std::size_t A>
auto isNaN(const LoopSIMD<T,S,A> &v, PriorityTag<3>, ADLTag) {
Simd::Mask<LoopSIMD<T,S,A>> out;
for(auto l : range(S))
out[l] = Dune::isNaN(v[l]);
return out;
}
template<class T, std::size_t S>
auto isInf(const LoopSIMD<T,S> &v, PriorityTag<3>, ADLTag) {
LoopSIMD<Simd::Mask<T>,S> out;
template<class T, std::size_t S, std::size_t A>
auto isInf(const LoopSIMD<T,S,A> &v, PriorityTag<3>, ADLTag) {
Simd::Mask<LoopSIMD<T,S,A>> out;
for(auto l : range(S))
out[l] = Dune::isInf(v[l]);
return out;
}
template<class T, std::size_t S>
auto isFinite(const LoopSIMD<T,S> &v, PriorityTag<3>, ADLTag) {
LoopSIMD<Simd::Mask<T>,S> out;
template<class T, std::size_t S, std::size_t A>
auto isFinite(const LoopSIMD<T,S,A> &v, PriorityTag<3>, ADLTag) {
Simd::Mask<LoopSIMD<T,S,A>> out;
for(auto l : range(S))
out[l] = Dune::isFinite(v[l]);
return out;
}
} //namepace MathOverloads
template<class T, std::size_t S>
struct IsNumber<LoopSIMD<T,S>> :
template<class T, std::size_t S, std::size_t A>
struct IsNumber<LoopSIMD<T,S,A>> :
public std::integral_constant<bool, IsNumber<T>::value>{
};
......
......@@ -14,10 +14,10 @@
template<class> struct RebindAccept : std::false_type {};
#cmake @template@
template<>
struct RebindAccept<Dune::LoopSIMD<@SCALAR@, 5> > : std::true_type {};
template<>
struct RebindAccept<Dune::LoopSIMD<Dune::LoopSIMD<@SCALAR@, 2>, 5> > : std::true_type {};
template<std::size_t A>
struct RebindAccept<Dune::LoopSIMD<@SCALAR@, 5, A> > : std::true_type {};
template<std::size_t A1, std::size_t A2>
struct RebindAccept<Dune::LoopSIMD<Dune::LoopSIMD<@SCALAR@, 2, A1>, 5, A2> > : std::true_type {};
#cmake @endtemplate@
using Rebinds = Dune::Simd::RebindList<
......@@ -33,6 +33,8 @@ int main()
#cmake @template@
test.check<Dune::LoopSIMD<@SCALAR@, 5>,
Rebinds, Dune::Std::to_false_type, RebindAccept>();
test.check<Dune::LoopSIMD<@SCALAR@, 5, 64>,
Rebinds, Dune::Std::to_false_type, RebindAccept>();
test.check<Dune::LoopSIMD<Dune::LoopSIMD<@SCALAR@, 2>, 5>,
Rebinds, Dune::Std::to_false_type, RebindAccept>();
#cmake @endtemplate@
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment