pocketpy/3rd/numpy/include/xtensor/xaccumulator.hpp
Anurag Bhat 86b4fc623c
Merge numpy to pocketpy (#303)
* Merge numpy to pocketpy

* Add CI

* Fix CI
2024-09-02 16:22:41 +08:00

371 lines
14 KiB
C++

/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht *
* Copyright (c) QuantStack *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/
#ifndef XTENSOR_ACCUMULATOR_HPP
#define XTENSOR_ACCUMULATOR_HPP
#include <algorithm>
#include <cstddef>
#include <numeric>
#include <type_traits>
#include "xexpression.hpp"
#include "xstrides.hpp"
#include "xtensor_config.hpp"
#include "xtensor_forward.hpp"
namespace xt
{
#define DEFAULT_STRATEGY_ACCUMULATORS evaluation_strategy::immediate_type
namespace detail
{
template <class V = void>
struct accumulator_identity : xtl::identity
{
using value_type = V;
};
}
/**************
* accumulate *
**************/
template <class ACCUMULATE_FUNC, class INIT_FUNC = detail::accumulator_identity<void>>
struct xaccumulator_functor : public std::tuple<ACCUMULATE_FUNC, INIT_FUNC>
{
using self_type = xaccumulator_functor<ACCUMULATE_FUNC, INIT_FUNC>;
using base_type = std::tuple<ACCUMULATE_FUNC, INIT_FUNC>;
using accumulate_functor_type = ACCUMULATE_FUNC;
using init_functor_type = INIT_FUNC;
using init_value_type = typename init_functor_type::value_type;
xaccumulator_functor()
: base_type()
{
}
template <class RF>
xaccumulator_functor(RF&& accumulate_func)
: base_type(std::forward<RF>(accumulate_func), INIT_FUNC())
{
}
template <class RF, class IF>
xaccumulator_functor(RF&& accumulate_func, IF&& init_func)
: base_type(std::forward<RF>(accumulate_func), std::forward<IF>(init_func))
{
}
};
template <class RF>
auto make_xaccumulator_functor(RF&& accumulate_func)
{
using accumulator_type = xaccumulator_functor<std::remove_reference_t<RF>>;
return accumulator_type(std::forward<RF>(accumulate_func));
}
template <class RF, class IF>
auto make_xaccumulator_functor(RF&& accumulate_func, IF&& init_func)
{
using accumulator_type = xaccumulator_functor<std::remove_reference_t<RF>, std::remove_reference_t<IF>>;
return accumulator_type(std::forward<RF>(accumulate_func), std::forward<IF>(init_func));
}
namespace detail
{
template <class F, class E, class EVS>
xarray<typename std::decay_t<E>::value_type> accumulator_impl(F&&, E&&, std::size_t, EVS)
{
static_assert(
!std::is_same<evaluation_strategy::lazy_type, EVS>::value,
"Lazy accumulators not yet implemented."
);
}
template <class F, class E, class EVS>
xarray<typename std::decay_t<E>::value_type> accumulator_impl(F&&, E&&, EVS)
{
static_assert(
!std::is_same<evaluation_strategy::lazy_type, EVS>::value,
"Lazy accumulators not yet implemented."
);
}
template <class T, class R>
struct xaccumulator_return_type
{
using type = xarray<R>;
};
template <class T, layout_type L, class R>
struct xaccumulator_return_type<xarray<T, L>, R>
{
using type = xarray<R, L>;
};
template <class T, std::size_t N, layout_type L, class R>
struct xaccumulator_return_type<xtensor<T, N, L>, R>
{
using type = xtensor<R, N, L>;
};
template <class T, std::size_t... I, layout_type L, class R>
struct xaccumulator_return_type<xtensor_fixed<T, xshape<I...>, L>, R>
{
using type = xtensor_fixed<R, xshape<I...>, L>;
};
template <class T, class R>
using xaccumulator_return_type_t = typename xaccumulator_return_type<T, R>::type;
template <class T>
struct fixed_compute_size;
template <class T, class R>
struct xaccumulator_linear_return_type
{
using type = xtensor<R, 1>;
};
template <class T, layout_type L, class R>
struct xaccumulator_linear_return_type<xarray<T, L>, R>
{
using type = xtensor<R, 1, L>;
};
template <class T, std::size_t N, layout_type L, class R>
struct xaccumulator_linear_return_type<xtensor<T, N, L>, R>
{
using type = xtensor<R, 1, L>;
};
template <class T, std::size_t... I, layout_type L, class R>
struct xaccumulator_linear_return_type<xtensor_fixed<T, xshape<I...>, L>, R>
{
using type = xtensor_fixed<R, xshape<fixed_compute_size<xshape<I...>>::value>, L>;
};
template <class T, class R>
using xaccumulator_linear_return_type_t = typename xaccumulator_linear_return_type<T, R>::type;
template <class F, class E>
inline auto accumulator_init_with_f(F&& f, E& e, std::size_t axis)
{
// this function is the equivalent (but hopefully faster) to (if axis == 1)
// e[:, 0, :, :, ...] = f(e[:, 0, :, :, ...])
// so that all "first" values are initialized in a first pass
std::size_t outer_loop_size, inner_loop_size, pos = 0;
std::size_t outer_stride, inner_stride;
auto set_loop_sizes = [&outer_loop_size, &inner_loop_size](auto first, auto last, std::ptrdiff_t ax)
{
outer_loop_size = std::accumulate(
first,
first + ax,
std::size_t(1),
std::multiplies<std::size_t>()
);
inner_loop_size = std::accumulate(
first + ax + 1,
last,
std::size_t(1),
std::multiplies<std::size_t>()
);
};
// Note: add check that strides > 0
auto set_loop_strides = [&outer_stride, &inner_stride](auto first, auto last, std::ptrdiff_t ax)
{
outer_stride = static_cast<std::size_t>(ax == 0 ? 1 : *std::min_element(first, first + ax));
inner_stride = static_cast<std::size_t>(
(ax == std::distance(first, last) - 1) ? 1 : *std::min_element(first + ax + 1, last)
);
};
set_loop_sizes(e.shape().begin(), e.shape().end(), static_cast<std::ptrdiff_t>(axis));
set_loop_strides(e.strides().begin(), e.strides().end(), static_cast<std::ptrdiff_t>(axis));
if (e.layout() == layout_type::column_major)
{
// swap for better memory locality (smaller stride in the inner loop)
std::swap(outer_loop_size, inner_loop_size);
std::swap(outer_stride, inner_stride);
}
for (std::size_t i = 0; i < outer_loop_size; ++i)
{
pos = i * outer_stride;
for (std::size_t j = 0; j < inner_loop_size; ++j)
{
e.storage()[pos] = f(e.storage()[pos]);
pos += inner_stride;
}
}
}
template <class F, class E>
inline auto accumulator_impl(F&& f, E&& e, std::size_t axis, evaluation_strategy::immediate_type)
{
using init_type = typename F::init_value_type;
using accumulate_functor_type = typename F::accumulate_functor_type;
using expr_value_type = typename std::decay_t<E>::value_type;
// using return_type = std::conditional_t<std::is_same<init_type, void>::value, typename
// std::decay_t<E>::value_type, init_type>;
using return_type = std::decay_t<decltype(std::declval<accumulate_functor_type>()(
std::declval<init_type>(),
std::declval<expr_value_type>()
))>;
using result_type = xaccumulator_return_type_t<std::decay_t<E>, return_type>;
if (axis >= e.dimension())
{
XTENSOR_THROW(std::runtime_error, "Axis larger than expression dimension in accumulator.");
}
result_type res = e; // assign + make a copy, we need it anyways
if (res.shape(axis) != std::size_t(0))
{
std::size_t inner_stride = static_cast<std::size_t>(res.strides()[axis]);
std::size_t outer_stride = 1; // either row- or column-wise (strides.back / strides.front)
std::size_t outer_loop_size = 0;
std::size_t inner_loop_size = 0;
std::size_t init_size = e.shape()[axis] != std::size_t(1) ? std::size_t(1) : std::size_t(0);
auto set_loop_sizes =
[&outer_loop_size, &inner_loop_size, init_size](auto first, auto last, std::ptrdiff_t ax)
{
outer_loop_size = std::accumulate(first, first + ax, init_size, std::multiplies<std::size_t>());
inner_loop_size = std::accumulate(
first + ax,
last,
std::size_t(1),
std::multiplies<std::size_t>()
);
};
if (result_type::static_layout == layout_type::row_major)
{
set_loop_sizes(res.shape().cbegin(), res.shape().cend(), static_cast<std::ptrdiff_t>(axis));
}
else
{
set_loop_sizes(res.shape().cbegin(), res.shape().cend(), static_cast<std::ptrdiff_t>(axis + 1));
std::swap(inner_loop_size, outer_loop_size);
}
std::size_t pos = 0;
inner_loop_size = inner_loop_size - inner_stride;
// activate the init loop if we have an init function other than identity
if (!std::is_same<
std::decay_t<typename F::init_functor_type>,
typename detail::accumulator_identity<init_type>>::value)
{
accumulator_init_with_f(xt::get<1>(f), res, axis);
}
pos = 0;
for (std::size_t i = 0; i < outer_loop_size; ++i)
{
for (std::size_t j = 0; j < inner_loop_size; ++j)
{
res.storage()[pos + inner_stride] = xt::get<0>(f)(
res.storage()[pos],
res.storage()[pos + inner_stride]
);
pos += outer_stride;
}
pos += inner_stride;
}
}
return res;
}
template <class F, class E>
inline auto accumulator_impl(F&& f, E&& e, evaluation_strategy::immediate_type)
{
using init_type = typename F::init_value_type;
using expr_value_type = typename std::decay_t<E>::value_type;
using accumulate_functor_type = typename F::accumulate_functor_type;
using return_type = std::decay_t<decltype(std::declval<accumulate_functor_type>()(
std::declval<init_type>(),
std::declval<expr_value_type>()
))>;
// using return_type = std::conditional_t<std::is_same<init_type, void>::value, typename
// std::decay_t<E>::value_type, init_type>;
using result_type = xaccumulator_return_type_t<std::decay_t<E>, return_type>;
std::size_t sz = e.size();
auto result = result_type::from_shape({sz});
if (sz != std::size_t(0))
{
auto it = e.template begin<XTENSOR_DEFAULT_TRAVERSAL>();
result.storage()[0] = xt::get<1>(f)(*it);
++it;
for (std::size_t idx = 0; it != e.template end<XTENSOR_DEFAULT_TRAVERSAL>(); ++it)
{
result.storage()[idx + 1] = xt::get<0>(f)(result.storage()[idx], *it);
++idx;
}
}
return result;
}
}
/**
* Accumulate and flatten array
* **NOTE** This function is not lazy!
*
* @param f functor to use for accumulation
* @param e xexpression to be accumulated
* @param evaluation_strategy evaluation strategy of the accumulation
*
* @return returns xarray<T> filled with accumulated values
*/
template <class F, class E, class EVS = DEFAULT_STRATEGY_ACCUMULATORS, XTL_REQUIRES(is_evaluation_strategy<EVS>)>
inline auto accumulate(F&& f, E&& e, EVS evaluation_strategy = EVS())
{
// Note we need to check is_integral above in order to prohibit EVS = int, and not taking the
// std::size_t overload below!
return detail::accumulator_impl(std::forward<F>(f), std::forward<E>(e), evaluation_strategy);
}
/**
* Accumulate over axis
* **NOTE** This function is not lazy!
*
* @param f Functor to use for accumulation
* @param e xexpression to accumulate
* @param axis Axis to perform accumulation over
* @param evaluation_strategy evaluation strategy of the accumulation
*
* @return returns xarray<T> filled with accumulated values
*/
template <class F, class E, class EVS = DEFAULT_STRATEGY_ACCUMULATORS>
inline auto accumulate(F&& f, E&& e, std::ptrdiff_t axis, EVS evaluation_strategy = EVS())
{
std::size_t ax = normalize_axis(e.dimension(), axis);
return detail::accumulator_impl(std::forward<F>(f), std::forward<E>(e), ax, evaluation_strategy);
}
}
#endif