mirror of
https://github.com/pocketpy/pocketpy
synced 2025-10-20 11:30:18 +00:00
371 lines
14 KiB
C++
371 lines
14 KiB
C++
/***************************************************************************
|
|
* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht *
|
|
* Copyright (c) QuantStack *
|
|
* *
|
|
* Distributed under the terms of the BSD 3-Clause License. *
|
|
* *
|
|
* The full license is in the file LICENSE, distributed with this software. *
|
|
****************************************************************************/
|
|
|
|
#ifndef XTENSOR_ACCUMULATOR_HPP
|
|
#define XTENSOR_ACCUMULATOR_HPP
|
|
|
|
#include <algorithm>
|
|
#include <cstddef>
|
|
#include <numeric>
|
|
#include <type_traits>
|
|
|
|
#include "xexpression.hpp"
|
|
#include "xstrides.hpp"
|
|
#include "xtensor_config.hpp"
|
|
#include "xtensor_forward.hpp"
|
|
|
|
namespace xt
|
|
{
|
|
|
|
#define DEFAULT_STRATEGY_ACCUMULATORS evaluation_strategy::immediate_type
|
|
|
|
namespace detail
|
|
{
|
|
template <class V = void>
|
|
struct accumulator_identity : xtl::identity
|
|
{
|
|
using value_type = V;
|
|
};
|
|
}
|
|
|
|
/**************
|
|
* accumulate *
|
|
**************/
|
|
|
|
template <class ACCUMULATE_FUNC, class INIT_FUNC = detail::accumulator_identity<void>>
|
|
struct xaccumulator_functor : public std::tuple<ACCUMULATE_FUNC, INIT_FUNC>
|
|
{
|
|
using self_type = xaccumulator_functor<ACCUMULATE_FUNC, INIT_FUNC>;
|
|
using base_type = std::tuple<ACCUMULATE_FUNC, INIT_FUNC>;
|
|
using accumulate_functor_type = ACCUMULATE_FUNC;
|
|
using init_functor_type = INIT_FUNC;
|
|
using init_value_type = typename init_functor_type::value_type;
|
|
|
|
xaccumulator_functor()
|
|
: base_type()
|
|
{
|
|
}
|
|
|
|
template <class RF>
|
|
xaccumulator_functor(RF&& accumulate_func)
|
|
: base_type(std::forward<RF>(accumulate_func), INIT_FUNC())
|
|
{
|
|
}
|
|
|
|
template <class RF, class IF>
|
|
xaccumulator_functor(RF&& accumulate_func, IF&& init_func)
|
|
: base_type(std::forward<RF>(accumulate_func), std::forward<IF>(init_func))
|
|
{
|
|
}
|
|
};
|
|
|
|
template <class RF>
|
|
auto make_xaccumulator_functor(RF&& accumulate_func)
|
|
{
|
|
using accumulator_type = xaccumulator_functor<std::remove_reference_t<RF>>;
|
|
return accumulator_type(std::forward<RF>(accumulate_func));
|
|
}
|
|
|
|
template <class RF, class IF>
|
|
auto make_xaccumulator_functor(RF&& accumulate_func, IF&& init_func)
|
|
{
|
|
using accumulator_type = xaccumulator_functor<std::remove_reference_t<RF>, std::remove_reference_t<IF>>;
|
|
return accumulator_type(std::forward<RF>(accumulate_func), std::forward<IF>(init_func));
|
|
}
|
|
|
|
namespace detail
|
|
{
|
|
template <class F, class E, class EVS>
|
|
xarray<typename std::decay_t<E>::value_type> accumulator_impl(F&&, E&&, std::size_t, EVS)
|
|
{
|
|
static_assert(
|
|
!std::is_same<evaluation_strategy::lazy_type, EVS>::value,
|
|
"Lazy accumulators not yet implemented."
|
|
);
|
|
}
|
|
|
|
template <class F, class E, class EVS>
|
|
xarray<typename std::decay_t<E>::value_type> accumulator_impl(F&&, E&&, EVS)
|
|
{
|
|
static_assert(
|
|
!std::is_same<evaluation_strategy::lazy_type, EVS>::value,
|
|
"Lazy accumulators not yet implemented."
|
|
);
|
|
}
|
|
|
|
template <class T, class R>
|
|
struct xaccumulator_return_type
|
|
{
|
|
using type = xarray<R>;
|
|
};
|
|
|
|
template <class T, layout_type L, class R>
|
|
struct xaccumulator_return_type<xarray<T, L>, R>
|
|
{
|
|
using type = xarray<R, L>;
|
|
};
|
|
|
|
template <class T, std::size_t N, layout_type L, class R>
|
|
struct xaccumulator_return_type<xtensor<T, N, L>, R>
|
|
{
|
|
using type = xtensor<R, N, L>;
|
|
};
|
|
|
|
template <class T, std::size_t... I, layout_type L, class R>
|
|
struct xaccumulator_return_type<xtensor_fixed<T, xshape<I...>, L>, R>
|
|
{
|
|
using type = xtensor_fixed<R, xshape<I...>, L>;
|
|
};
|
|
|
|
template <class T, class R>
|
|
using xaccumulator_return_type_t = typename xaccumulator_return_type<T, R>::type;
|
|
|
|
template <class T>
|
|
struct fixed_compute_size;
|
|
|
|
template <class T, class R>
|
|
struct xaccumulator_linear_return_type
|
|
{
|
|
using type = xtensor<R, 1>;
|
|
};
|
|
|
|
template <class T, layout_type L, class R>
|
|
struct xaccumulator_linear_return_type<xarray<T, L>, R>
|
|
{
|
|
using type = xtensor<R, 1, L>;
|
|
};
|
|
|
|
template <class T, std::size_t N, layout_type L, class R>
|
|
struct xaccumulator_linear_return_type<xtensor<T, N, L>, R>
|
|
{
|
|
using type = xtensor<R, 1, L>;
|
|
};
|
|
|
|
template <class T, std::size_t... I, layout_type L, class R>
|
|
struct xaccumulator_linear_return_type<xtensor_fixed<T, xshape<I...>, L>, R>
|
|
{
|
|
using type = xtensor_fixed<R, xshape<fixed_compute_size<xshape<I...>>::value>, L>;
|
|
};
|
|
|
|
template <class T, class R>
|
|
using xaccumulator_linear_return_type_t = typename xaccumulator_linear_return_type<T, R>::type;
|
|
|
|
template <class F, class E>
|
|
inline auto accumulator_init_with_f(F&& f, E& e, std::size_t axis)
|
|
{
|
|
// this function is the equivalent (but hopefully faster) to (if axis == 1)
|
|
// e[:, 0, :, :, ...] = f(e[:, 0, :, :, ...])
|
|
// so that all "first" values are initialized in a first pass
|
|
|
|
std::size_t outer_loop_size, inner_loop_size, pos = 0;
|
|
std::size_t outer_stride, inner_stride;
|
|
|
|
auto set_loop_sizes = [&outer_loop_size, &inner_loop_size](auto first, auto last, std::ptrdiff_t ax)
|
|
{
|
|
outer_loop_size = std::accumulate(
|
|
first,
|
|
first + ax,
|
|
std::size_t(1),
|
|
std::multiplies<std::size_t>()
|
|
);
|
|
inner_loop_size = std::accumulate(
|
|
first + ax + 1,
|
|
last,
|
|
std::size_t(1),
|
|
std::multiplies<std::size_t>()
|
|
);
|
|
};
|
|
|
|
// Note: add check that strides > 0
|
|
auto set_loop_strides = [&outer_stride, &inner_stride](auto first, auto last, std::ptrdiff_t ax)
|
|
{
|
|
outer_stride = static_cast<std::size_t>(ax == 0 ? 1 : *std::min_element(first, first + ax));
|
|
inner_stride = static_cast<std::size_t>(
|
|
(ax == std::distance(first, last) - 1) ? 1 : *std::min_element(first + ax + 1, last)
|
|
);
|
|
};
|
|
|
|
set_loop_sizes(e.shape().begin(), e.shape().end(), static_cast<std::ptrdiff_t>(axis));
|
|
set_loop_strides(e.strides().begin(), e.strides().end(), static_cast<std::ptrdiff_t>(axis));
|
|
|
|
if (e.layout() == layout_type::column_major)
|
|
{
|
|
// swap for better memory locality (smaller stride in the inner loop)
|
|
std::swap(outer_loop_size, inner_loop_size);
|
|
std::swap(outer_stride, inner_stride);
|
|
}
|
|
|
|
for (std::size_t i = 0; i < outer_loop_size; ++i)
|
|
{
|
|
pos = i * outer_stride;
|
|
for (std::size_t j = 0; j < inner_loop_size; ++j)
|
|
{
|
|
e.storage()[pos] = f(e.storage()[pos]);
|
|
pos += inner_stride;
|
|
}
|
|
}
|
|
}
|
|
|
|
template <class F, class E>
|
|
inline auto accumulator_impl(F&& f, E&& e, std::size_t axis, evaluation_strategy::immediate_type)
|
|
{
|
|
using init_type = typename F::init_value_type;
|
|
using accumulate_functor_type = typename F::accumulate_functor_type;
|
|
using expr_value_type = typename std::decay_t<E>::value_type;
|
|
// using return_type = std::conditional_t<std::is_same<init_type, void>::value, typename
|
|
// std::decay_t<E>::value_type, init_type>;
|
|
|
|
using return_type = std::decay_t<decltype(std::declval<accumulate_functor_type>()(
|
|
std::declval<init_type>(),
|
|
std::declval<expr_value_type>()
|
|
))>;
|
|
|
|
using result_type = xaccumulator_return_type_t<std::decay_t<E>, return_type>;
|
|
|
|
if (axis >= e.dimension())
|
|
{
|
|
XTENSOR_THROW(std::runtime_error, "Axis larger than expression dimension in accumulator.");
|
|
}
|
|
|
|
result_type res = e; // assign + make a copy, we need it anyways
|
|
|
|
if (res.shape(axis) != std::size_t(0))
|
|
{
|
|
std::size_t inner_stride = static_cast<std::size_t>(res.strides()[axis]);
|
|
std::size_t outer_stride = 1; // either row- or column-wise (strides.back / strides.front)
|
|
std::size_t outer_loop_size = 0;
|
|
std::size_t inner_loop_size = 0;
|
|
std::size_t init_size = e.shape()[axis] != std::size_t(1) ? std::size_t(1) : std::size_t(0);
|
|
|
|
auto set_loop_sizes =
|
|
[&outer_loop_size, &inner_loop_size, init_size](auto first, auto last, std::ptrdiff_t ax)
|
|
{
|
|
outer_loop_size = std::accumulate(first, first + ax, init_size, std::multiplies<std::size_t>());
|
|
|
|
inner_loop_size = std::accumulate(
|
|
first + ax,
|
|
last,
|
|
std::size_t(1),
|
|
std::multiplies<std::size_t>()
|
|
);
|
|
};
|
|
|
|
if (result_type::static_layout == layout_type::row_major)
|
|
{
|
|
set_loop_sizes(res.shape().cbegin(), res.shape().cend(), static_cast<std::ptrdiff_t>(axis));
|
|
}
|
|
else
|
|
{
|
|
set_loop_sizes(res.shape().cbegin(), res.shape().cend(), static_cast<std::ptrdiff_t>(axis + 1));
|
|
std::swap(inner_loop_size, outer_loop_size);
|
|
}
|
|
|
|
std::size_t pos = 0;
|
|
|
|
inner_loop_size = inner_loop_size - inner_stride;
|
|
|
|
// activate the init loop if we have an init function other than identity
|
|
if (!std::is_same<
|
|
std::decay_t<typename F::init_functor_type>,
|
|
typename detail::accumulator_identity<init_type>>::value)
|
|
{
|
|
accumulator_init_with_f(xt::get<1>(f), res, axis);
|
|
}
|
|
|
|
pos = 0;
|
|
for (std::size_t i = 0; i < outer_loop_size; ++i)
|
|
{
|
|
for (std::size_t j = 0; j < inner_loop_size; ++j)
|
|
{
|
|
res.storage()[pos + inner_stride] = xt::get<0>(f)(
|
|
res.storage()[pos],
|
|
res.storage()[pos + inner_stride]
|
|
);
|
|
|
|
pos += outer_stride;
|
|
}
|
|
pos += inner_stride;
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
template <class F, class E>
|
|
inline auto accumulator_impl(F&& f, E&& e, evaluation_strategy::immediate_type)
|
|
{
|
|
using init_type = typename F::init_value_type;
|
|
using expr_value_type = typename std::decay_t<E>::value_type;
|
|
using accumulate_functor_type = typename F::accumulate_functor_type;
|
|
using return_type = std::decay_t<decltype(std::declval<accumulate_functor_type>()(
|
|
std::declval<init_type>(),
|
|
std::declval<expr_value_type>()
|
|
))>;
|
|
// using return_type = std::conditional_t<std::is_same<init_type, void>::value, typename
|
|
// std::decay_t<E>::value_type, init_type>;
|
|
|
|
using result_type = xaccumulator_return_type_t<std::decay_t<E>, return_type>;
|
|
|
|
std::size_t sz = e.size();
|
|
auto result = result_type::from_shape({sz});
|
|
|
|
if (sz != std::size_t(0))
|
|
{
|
|
auto it = e.template begin<XTENSOR_DEFAULT_TRAVERSAL>();
|
|
result.storage()[0] = xt::get<1>(f)(*it);
|
|
++it;
|
|
|
|
for (std::size_t idx = 0; it != e.template end<XTENSOR_DEFAULT_TRAVERSAL>(); ++it)
|
|
{
|
|
result.storage()[idx + 1] = xt::get<0>(f)(result.storage()[idx], *it);
|
|
++idx;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Accumulate and flatten array
|
|
* **NOTE** This function is not lazy!
|
|
*
|
|
* @param f functor to use for accumulation
|
|
* @param e xexpression to be accumulated
|
|
* @param evaluation_strategy evaluation strategy of the accumulation
|
|
*
|
|
* @return returns xarray<T> filled with accumulated values
|
|
*/
|
|
template <class F, class E, class EVS = DEFAULT_STRATEGY_ACCUMULATORS, XTL_REQUIRES(is_evaluation_strategy<EVS>)>
|
|
inline auto accumulate(F&& f, E&& e, EVS evaluation_strategy = EVS())
|
|
{
|
|
// Note we need to check is_integral above in order to prohibit EVS = int, and not taking the
|
|
// std::size_t overload below!
|
|
return detail::accumulator_impl(std::forward<F>(f), std::forward<E>(e), evaluation_strategy);
|
|
}
|
|
|
|
/**
|
|
* Accumulate over axis
|
|
* **NOTE** This function is not lazy!
|
|
*
|
|
* @param f Functor to use for accumulation
|
|
* @param e xexpression to accumulate
|
|
* @param axis Axis to perform accumulation over
|
|
* @param evaluation_strategy evaluation strategy of the accumulation
|
|
*
|
|
* @return returns xarray<T> filled with accumulated values
|
|
*/
|
|
template <class F, class E, class EVS = DEFAULT_STRATEGY_ACCUMULATORS>
|
|
inline auto accumulate(F&& f, E&& e, std::ptrdiff_t axis, EVS evaluation_strategy = EVS())
|
|
{
|
|
std::size_t ax = normalize_axis(e.dimension(), axis);
|
|
return detail::accumulator_impl(std::forward<F>(f), std::forward<E>(e), ax, evaluation_strategy);
|
|
}
|
|
}
|
|
|
|
#endif
|