Commit d3919038 authored by Edwin Carlinet's avatar Edwin Carlinet
Browse files

Add SIMD library.

parent 7c3a92a0
This diff is collapsed.
This diff is collapsed.
// Simd _Combine ABI specific implementations -*- C++ -*-
// Copyright © 2015-2019 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH
// Matthias Kretz <m.kretz@gsi.de>
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the names of contributing organizations nor the
// names of its contributors may be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_COMBINE_H_
#define _GLIBCXX_EXPERIMENTAL_SIMD_COMBINE_H_
#if __cplusplus >= 201703L
_GLIBCXX_SIMD_BEGIN_NAMESPACE
template <int _Np, typename _Abi> struct _SimdImplCombine;
template <int _Np, typename _Abi> struct _MaskImplCombine;
// simd_abi::_Combine {{{
template <int _Np, typename _Abi>
struct simd_abi::_Combine
{
template <typename _Tp>
static constexpr size_t size = _Np* _Abi::template size<_Tp>;
template <typename _Tp>
static constexpr size_t _S_full_size = size<_Tp>;
static constexpr int _S_factor = _Np;
using _MemberAbi = _Abi;
// validity traits {{{
// allow 2x, 3x, and 4x "unroll"
struct _IsValidAbiTag : conjunction<__bool_constant<(_Np > 1 && _Np <= 4)>,
typename _Abi::_IsValidAbiTag>
{
};
template <typename _Tp>
struct _IsValidSizeFor : _Abi::template _IsValidSizeFor<_Tp>
{
};
template <typename _Tp>
struct _IsValid
: conjunction<_IsValidAbiTag, typename _Abi::template _IsValid<_Tp>>
{
};
template <typename _Tp>
static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value;
// }}}
// _SimdImpl/_MaskImpl {{{
using _SimdImpl = _SimdImplCombine<_Np, _Abi>;
using _MaskImpl = _MaskImplCombine<_Np, _Abi>;
// }}}
// __traits {{{
template <typename _Tp, bool = _S_is_valid_v<_Tp>>
struct __traits : _InvalidTraits
{
};
template <typename _Tp>
struct __traits<_Tp, true>
{
using _IsValid = true_type;
using _SimdImpl = _SimdImplCombine<_Np, _Abi>;
using _MaskImpl = _MaskImplCombine<_Np, _Abi>;
// simd and simd_mask member types {{{
using _SimdMember =
std::array<typename _Abi::template __traits<_Tp>::_SimdMember, _Np>;
using _MaskMember =
std::array<typename _Abi::template __traits<_Tp>::_MaskMember, _Np>;
static constexpr size_t _S_simd_align =
_Abi::template __traits<_Tp>::_S_simd_align;
static constexpr size_t _S_mask_align =
_Abi::template __traits<_Tp>::_S_mask_align;
// }}}
// _SimdBase / base class for simd, providing extra conversions {{{
struct _SimdBase
{
explicit operator const _SimdMember&() const
{
return static_cast<const simd<_Tp, _Combine>*>(this)->_M_data;
}
};
// }}}
// _MaskBase {{{
// empty. The std::bitset interface suffices
struct _MaskBase
{
explicit operator const _MaskMember&() const
{
return static_cast<const simd_mask<_Tp, _Combine>*>(this)->_M_data;
}
};
// }}}
// _SimdCastType {{{
struct _SimdCastType
{
_SimdCastType(const _SimdMember& dd)
: _M_data(dd)
{
}
explicit operator const _SimdMember&() const { return _M_data; }
private:
const _SimdMember& _M_data;
};
// }}}
// _MaskCastType {{{
struct _MaskCastType
{
_MaskCastType(const _MaskMember& dd)
: _M_data(dd)
{
}
explicit operator const _MaskMember&() const { return _M_data; }
private:
const _MaskMember& _M_data;
};
//}}}
};
//}}}
};
// }}}
template <int _Np, typename _Abi>
struct _SimdImplCombine
{
// member types {{{
using abi_type = simd_abi::_Combine<_Np, _Abi>;
template <typename _Tp> using _TypeTag = _Tp *;
using _PartImpl = typename _Abi::_SimdImpl;
template <typename _Tp>
using _SimdMember =
std::array<typename _Abi::template __traits<_Tp>::_SimdMember, _Np>;
// }}}
// broadcast {{{
template <typename _Tp>
_GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
__broadcast(_Tp __x) noexcept
{
return __generate_from_n_evaluations<_Np, _SimdMember<_Tp>>(
[&](int) constexpr { return _PartImpl::__broadcast(__x); });
}
// }}}
// load {{{
//X template <typename _Tp, typename _Up, typename _Fp>
//X _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
//X __load(const _U* __mem, _Fp, _TypeTag<_Tp> __x) noexcept
//X {
//X return __generate_from_n_evaluations<_Np, _Tp>(
//X [&](auto __i) constexpr { return _PartImpl::__load(__mem + __i * __part_size<_Tp>, _Fp{}, __x); });
//X }
//X
// }}}
};
template <int _Np, typename _MemberAbi>
struct _MaskImplCombine
{
using _Abi = simd_abi::_Combine<_Np, _MemberAbi>;
using _MemberMaskImpl = typename _MemberAbi::_MaskImpl;
template <typename _Tp>
using _MemberSimdMask = simd_mask<_Tp, _MemberAbi>;
// __all_of {{{
template <typename _Tp>
_GLIBCXX_SIMD_INTRINSIC static bool __all_of(simd_mask<_Tp, _Abi> __k)
{
bool __r =
_MemberMaskImpl::__all_of(_MemberSimdMask<_Tp>(__private_init, __k[0]));
__execute_n_times<_Np - 1>([&](auto __i) {
__r = __r && _MemberMaskImpl::__all_of(
_MemberSimdMask<_Tp>(__private_init, __k[__i + 1]));
});
return __r;
}
// }}}
// __any_of {{{
template <typename _Tp>
_GLIBCXX_SIMD_INTRINSIC static bool __any_of(simd_mask<_Tp, _Abi> __k)
{
bool __r =
_MemberMaskImpl::__any_of(_MemberSimdMask<_Tp>(__private_init, __k[0]));
__execute_n_times<_Np - 1>([&](auto __i) {
__r = __r || _MemberMaskImpl::__any_of(
_MemberSimdMask<_Tp>(__private_init, __k[__i + 1]));
});
return __r;
}
// }}}
// __none_of {{{
template <typename _Tp>
_GLIBCXX_SIMD_INTRINSIC static bool __none_of(simd_mask<_Tp, _Abi> __k)
{
bool __r =
_MemberMaskImpl::__none_of(_MemberSimdMask<_Tp>(__private_init, __k[0]));
__execute_n_times<_Np - 1>([&](auto __i) {
__r = __r && _MemberMaskImpl::__none_of(
_MemberSimdMask<_Tp>(__private_init, __k[__i + 1]));
});
return __r;
}
// }}}
// __some_of {{{
template <typename _Tp>
_GLIBCXX_SIMD_INTRINSIC static bool __some_of(simd_mask<_Tp, _Abi> __k)
{
return __any_of(__k) && !__all_of(__k);
}
// }}}
// __popcount {{{
template <typename _Tp>
_GLIBCXX_SIMD_INTRINSIC static int __popcount(simd_mask<_Tp, _Abi> __k)
{
int __count =
_MemberMaskImpl::__popcount(_MemberSimdMask<_Tp>(__private_init, __k[0]));
__execute_n_times<_Np - 1>([&](auto __i) {
__count += _MemberMaskImpl::__popcount(
_MemberSimdMask<_Tp>(__private_init, __k[__i + 1]));
});
return __count;
}
// }}}
// __find_first_set {{{
template <typename _Tp>
_GLIBCXX_SIMD_INTRINSIC static int __find_first_set(simd_mask<_Tp, _Abi> __k)
{
for (int __i = 0; __i < _Np - 1; ++__i)
if (_MemberMaskImpl::__any_of(
_MemberSimdMask<_Tp>(__private_init, __k[__i])))
return __i * simd_size_v<_Tp, _MemberAbi> +
_MemberMaskImpl::__find_first_set(
_MemberSimdMask<_Tp>(__private_init, __k[__i]));
return (_Np - 1) * simd_size_v<_Tp, _MemberAbi> +
__find_first_set(__k[_Abi::_S_factor - 1]);
}
// }}}
// __find_last_set {{{
template <typename _Tp>
_GLIBCXX_SIMD_INTRINSIC static int __find_last_set(simd_mask<_Tp, _Abi> __k)
{
// FIXME: I think the order of this one is reversed
for (int __i = 0; __i < _Np - 1; ++__i)
if (_MemberMaskImpl::__any_of(
_MemberSimdMask<_Tp>(__private_init, __k[__i])))
return __i * simd_size_v<_Tp, _MemberAbi> +
_MemberMaskImpl::__find_last_set(
_MemberSimdMask<_Tp>(__private_init, __k[__i]));
return (_Np - 1) * simd_size_v<_Tp, _MemberAbi> +
__find_last_set(__k[_Abi::_S_factor - 1]);
}
// }}}
};
_GLIBCXX_SIMD_END_NAMESPACE
#endif // __cplusplus >= 201703L
#endif // _GLIBCXX_EXPERIMENTAL_SIMD_COMBINE_H_
// vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80
// Generic simd conversions -*- C++ -*-
// Copyright © 2015-2019 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH
// Matthias Kretz <m.kretz@gsi.de>
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the names of contributing organizations nor the
// names of its contributors may be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_CONVERTER_H_
#define _GLIBCXX_EXPERIMENTAL_SIMD_CONVERTER_H_
#if __cplusplus >= 201703L
_GLIBCXX_SIMD_BEGIN_NAMESPACE
// _SimdConverter scalar -> scalar {{{
template <typename _From, typename _To>
struct _SimdConverter<_From,
simd_abi::scalar,
_To,
simd_abi::scalar,
std::enable_if_t<!std::is_same_v<_From, _To>>>
{
_GLIBCXX_SIMD_INTRINSIC _To operator()(_From __a) const noexcept
{
return static_cast<_To>(__a);
}
};
// }}}
// _SimdConverter "native" -> scalar {{{
template <typename _From, typename _To, typename _Abi>
struct _SimdConverter<_From,
_Abi,
_To,
simd_abi::scalar,
std::enable_if_t<!std::is_same_v<_Abi, simd_abi::scalar>>>
{
using _Arg = typename _Abi::template __traits<_From>::_SimdMember;
static constexpr size_t _S_n = _Arg::_S_width;
_GLIBCXX_SIMD_INTRINSIC std::array<_To, _S_n> __all(_Arg __a) const noexcept
{
return __call_with_subscripts(
__a, make_index_sequence<_S_n>(),
[&](auto... __values) constexpr -> std::array<_To, _S_n> {
return {static_cast<_To>(__values)...};
});
}
};
// }}}
// _SimdConverter scalar -> "native" {{{
template <typename _From, typename _To, typename _Abi>
struct _SimdConverter<_From,
simd_abi::scalar,
_To,
_Abi,
std::enable_if_t<!std::is_same_v<_Abi, simd_abi::scalar>>>
{
using _Ret = typename _Abi::template __traits<_To>::_SimdMember;
template <typename... _More>
_GLIBCXX_SIMD_INTRINSIC constexpr _Ret
operator()(_From __a, _More... __more) const noexcept
{
static_assert(sizeof...(_More) + 1 == _Abi::template size<_To>);
static_assert(std::conjunction_v<std::is_same<_From, _More>...>);
return __make_vector<_To>(__a, __more...);
}
};
// }}}
// _SimdConverter "native 1" -> "native 2" {{{
template <typename _From, typename _To, typename _AFrom, typename _ATo>
struct _SimdConverter<
_From,
_AFrom,
_To,
_ATo,
std::enable_if_t<!std::disjunction_v<
__is_fixed_size_abi<_AFrom>,
__is_fixed_size_abi<_ATo>,
std::is_same<_AFrom, simd_abi::scalar>,
std::is_same<_ATo, simd_abi::scalar>,
std::conjunction<std::is_same<_From, _To>, std::is_same<_AFrom, _ATo>>>>>
{
using _Arg = typename _AFrom::template __traits<_From>::_SimdMember;
using _Ret = typename _ATo::template __traits<_To>::_SimdMember;
using _V = __vector_type_t<_To, simd_size_v<_To, _ATo>>;
_GLIBCXX_SIMD_INTRINSIC auto __all(_Arg __a) const noexcept
{
return __convert_all<_V>(__a);
}
template <typename... _More>
_GLIBCXX_SIMD_INTRINSIC _Ret operator()(_Arg __a, _More... __more) const
noexcept
{
return __convert<_V>(__a, __more...);
}
};
// }}}
// _SimdConverter scalar -> fixed_size<1> {{{1
template <typename _From, typename _To>
struct _SimdConverter<_From,
simd_abi::scalar,
_To,
simd_abi::fixed_size<1>,
void>
{
_SimdTuple<_To, simd_abi::scalar> operator()(_From __x) const noexcept
{
return {static_cast<_To>(__x)};
}
};
// _SimdConverter fixed_size<1> -> scalar {{{1
template <typename _From, typename _To>
struct _SimdConverter<_From,
simd_abi::fixed_size<1>,
_To,
simd_abi::scalar,
void>
{
_GLIBCXX_SIMD_INTRINSIC _To
operator()(_SimdTuple<_From, simd_abi::scalar> __x) const noexcept
{
return {static_cast<_To>(__x.first)};
}
};
// _SimdConverter fixed_size<_Np> -> fixed_size<_Np> {{{1
template <typename _From, typename _To, int _Np>
struct _SimdConverter<_From,
simd_abi::fixed_size<_Np>,
_To,
simd_abi::fixed_size<_Np>,
std::enable_if_t<!std::is_same_v<_From, _To>>>
{
using _Ret = __fixed_size_storage_t<_To, _Np>;
using _Arg = __fixed_size_storage_t<_From, _Np>;
_GLIBCXX_SIMD_INTRINSIC _Ret operator()(const _Arg& __x) const noexcept
{
if constexpr (std::is_same_v<_From, _To>)
return __x;
// special case (optimize) int signedness casts
else if constexpr (sizeof(_From) == sizeof(_To) &&
std::is_integral_v<_From> && std::is_integral_v<_To>)
return __bit_cast<_Ret>(__x);
// special case if all ABI tags in _Ret are scalar
else if constexpr (__is_scalar_abi<typename _Ret::_FirstAbi>())
{
return __call_with_subscripts(
__x, make_index_sequence<_Np>(), [](auto... __values) constexpr->_Ret {
return __make_simd_tuple<_To, decltype((void)__values,
simd_abi::scalar())...>(
static_cast<_To>(__values)...);
});
}
// from one vector to one vector
else if constexpr (_Arg::_S_first_size == _Ret::_S_first_size)
{
_SimdConverter<_From, typename _Arg::_FirstAbi, _To,
typename _Ret::_FirstAbi>
__native_cvt;
if constexpr (_Arg::_S_tuple_size == 1)
return {__native_cvt(__x.first)};
else
{
constexpr size_t _NRemain = _Np - _Arg::_S_first_size;
_SimdConverter<_From, simd_abi::fixed_size<_NRemain>, _To,
simd_abi::fixed_size<_NRemain>>
__remainder_cvt;
return {__native_cvt(__x.first), __remainder_cvt(__x.second)};
}
}
// from one vector to multiple vectors
else if constexpr (_Arg::_S_first_size > _Ret::_S_first_size)
{
const auto __multiple_return_chunks =
__convert_all<__vector_type_t<_To, _Ret::_S_first_size>>(__x.first);
constexpr auto __converted = __multiple_return_chunks.size() *
_Ret::_FirstAbi::template size<_To>;
constexpr auto __remaining = _Np - __converted;
if constexpr (_Arg::_S_tuple_size == 1 && __remaining == 0)
return __to_simd_tuple<_To, _Np>(__multiple_return_chunks);
else if constexpr (_Arg::_S_tuple_size == 1)
{ // e.g. <int, 3> -> <double, 2, 1> or <short, 7> -> <double, 4, 2,
// 1>
using _RetRem = __remove_cvref_t<decltype(
__simd_tuple_pop_front<__multiple_return_chunks.size()>(_Ret()))>;
const auto __return_chunks2 =
__convert_all<__vector_type_t<_To, _RetRem::_S_first_size>, 0,
__converted>(__x.first);
constexpr auto __converted2 =
__converted + __return_chunks2.size() * _RetRem::_S_first_size;
if constexpr (__converted2 == _Np)
return __to_simd_tuple<_To, _Np>(__multiple_return_chunks,
__return_chunks2);
else
{
using _RetRem2 = __remove_cvref_t<decltype(
__simd_tuple_pop_front<__return_chunks2.size()>(_RetRem()))>;
const auto __return_chunks3 =
__convert_all<__vector_type_t<_To, _RetRem2::_S_first_size>,
0, __converted2>(__x.first);
constexpr auto __converted3 =
__converted2 +
__return_chunks3.size() * _RetRem2::_S_first_size;
if constexpr (__converted3 == _Np)
return __to_simd_tuple<_To, _Np>(__multiple_return_chunks,
__return_chunks2, __return_chunks3);
else
{
using _RetRem3 = __remove_cvref_t<decltype(
__simd_tuple_pop_front<__return_chunks3.size()>(
_RetRem2()))>;
const auto __return_chunks4 = __convert_all<
__vector_type_t<_To, _RetRem3::_S_first_size>, 0,
__converted3>(__x.first);
constexpr auto __converted4 =
__converted3 +
__return_chunks4.size() * _RetRem3::_S_first_size;
if constexpr (__converted4 == _Np)
return __to_simd_tuple<_To, _Np>(
__multiple_return_chunks, __return_chunks2,
__return_chunks3, __return_chunks4);
else
__assert_unreachable<_To>();
}
}
}
else
{
constexpr size_t _NRemain = _Np - _Arg::_S_first_size;
_SimdConverter<_From, simd_abi::fixed_size<_NRemain>, _To,
simd_abi::fixed_size<_NRemain>>
__remainder_cvt;
return __simd_tuple_concat(
__to_simd_tuple<_To, _Arg::_S_first_size>(
__multiple_return_chunks),
__remainder_cvt(__x.second));
}
}
// from multiple vectors to one vector
// _Arg::_S_first_size < _Ret::_S_first_size
// a) heterogeneous input at the end of the tuple (possible with partial
// native registers in _Ret)
else if constexpr (_Ret::_S_tuple_size == 1 && _Np % _Arg::_S_first_size != 0)
{
static_assert(_Ret::_FirstAbi::_S_is_partial);
return _Ret{__generate_from_n_evaluations<
_Np, typename _VectorTraits<typename _Ret::_FirstType>::type>(
[&](auto __i) { return static_cast<_To>(__x[__i]); })};
}
else
{
static_assert(_Arg::_S_tuple_size > 1);
constexpr auto __n =
__div_roundup(_Ret::_S_first_size, _Arg::_S_first_size);
return __call_with_n_evaluations<__n>(
[&__x](auto... __uncvted) {
// assuming _Arg Abi tags for all __i are _Arg::_FirstAbi
_SimdConverter<_From, typename _Arg::_FirstAbi, _To,
typename _Ret::_FirstAbi>
__native_cvt;
if constexpr (_Ret::_S_tuple_size == 1)
return _Ret{__native_cvt(__uncvted...)};
else