/*************************************************************************** * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht * * Copyright (c) QuantStack * * * * Distributed under the terms of the BSD 3-Clause License. * * * * The full license is in the file LICENSE, distributed with this software. * ****************************************************************************/ #ifndef XTENSOR_ACCUMULATOR_HPP #define XTENSOR_ACCUMULATOR_HPP #include #include #include #include #include "xexpression.hpp" #include "xstrides.hpp" #include "xtensor_config.hpp" #include "xtensor_forward.hpp" namespace xt { #define DEFAULT_STRATEGY_ACCUMULATORS evaluation_strategy::immediate_type namespace detail { template struct accumulator_identity : xtl::identity { using value_type = V; }; } /************** * accumulate * **************/ template > struct xaccumulator_functor : public std::tuple { using self_type = xaccumulator_functor; using base_type = std::tuple; using accumulate_functor_type = ACCUMULATE_FUNC; using init_functor_type = INIT_FUNC; using init_value_type = typename init_functor_type::value_type; xaccumulator_functor() : base_type() { } template xaccumulator_functor(RF&& accumulate_func) : base_type(std::forward(accumulate_func), INIT_FUNC()) { } template xaccumulator_functor(RF&& accumulate_func, IF&& init_func) : base_type(std::forward(accumulate_func), std::forward(init_func)) { } }; template auto make_xaccumulator_functor(RF&& accumulate_func) { using accumulator_type = xaccumulator_functor>; return accumulator_type(std::forward(accumulate_func)); } template auto make_xaccumulator_functor(RF&& accumulate_func, IF&& init_func) { using accumulator_type = xaccumulator_functor, std::remove_reference_t>; return accumulator_type(std::forward(accumulate_func), std::forward(init_func)); } namespace detail { template xarray::value_type> accumulator_impl(F&&, E&&, std::size_t, EVS) { static_assert( !std::is_same::value, "Lazy accumulators not yet implemented." ); } template xarray::value_type> accumulator_impl(F&&, E&&, EVS) { static_assert( !std::is_same::value, "Lazy accumulators not yet implemented." ); } template struct xaccumulator_return_type { using type = xarray; }; template struct xaccumulator_return_type, R> { using type = xarray; }; template struct xaccumulator_return_type, R> { using type = xtensor; }; template struct xaccumulator_return_type, L>, R> { using type = xtensor_fixed, L>; }; template using xaccumulator_return_type_t = typename xaccumulator_return_type::type; template struct fixed_compute_size; template struct xaccumulator_linear_return_type { using type = xtensor; }; template struct xaccumulator_linear_return_type, R> { using type = xtensor; }; template struct xaccumulator_linear_return_type, R> { using type = xtensor; }; template struct xaccumulator_linear_return_type, L>, R> { using type = xtensor_fixed>::value>, L>; }; template using xaccumulator_linear_return_type_t = typename xaccumulator_linear_return_type::type; template inline auto accumulator_init_with_f(F&& f, E& e, std::size_t axis) { // this function is the equivalent (but hopefully faster) to (if axis == 1) // e[:, 0, :, :, ...] = f(e[:, 0, :, :, ...]) // so that all "first" values are initialized in a first pass std::size_t outer_loop_size, inner_loop_size, pos = 0; std::size_t outer_stride, inner_stride; auto set_loop_sizes = [&outer_loop_size, &inner_loop_size](auto first, auto last, std::ptrdiff_t ax) { outer_loop_size = std::accumulate( first, first + ax, std::size_t(1), std::multiplies() ); inner_loop_size = std::accumulate( first + ax + 1, last, std::size_t(1), std::multiplies() ); }; // Note: add check that strides > 0 auto set_loop_strides = [&outer_stride, &inner_stride](auto first, auto last, std::ptrdiff_t ax) { outer_stride = static_cast(ax == 0 ? 1 : *std::min_element(first, first + ax)); inner_stride = static_cast( (ax == std::distance(first, last) - 1) ? 1 : *std::min_element(first + ax + 1, last) ); }; set_loop_sizes(e.shape().begin(), e.shape().end(), static_cast(axis)); set_loop_strides(e.strides().begin(), e.strides().end(), static_cast(axis)); if (e.layout() == layout_type::column_major) { // swap for better memory locality (smaller stride in the inner loop) std::swap(outer_loop_size, inner_loop_size); std::swap(outer_stride, inner_stride); } for (std::size_t i = 0; i < outer_loop_size; ++i) { pos = i * outer_stride; for (std::size_t j = 0; j < inner_loop_size; ++j) { e.storage()[pos] = f(e.storage()[pos]); pos += inner_stride; } } } template inline auto accumulator_impl(F&& f, E&& e, std::size_t axis, evaluation_strategy::immediate_type) { using init_type = typename F::init_value_type; using accumulate_functor_type = typename F::accumulate_functor_type; using expr_value_type = typename std::decay_t::value_type; // using return_type = std::conditional_t::value, typename // std::decay_t::value_type, init_type>; using return_type = std::decay_t()( std::declval(), std::declval() ))>; using result_type = xaccumulator_return_type_t, return_type>; if (axis >= e.dimension()) { XTENSOR_THROW(std::runtime_error, "Axis larger than expression dimension in accumulator."); } result_type res = e; // assign + make a copy, we need it anyways if (res.shape(axis) != std::size_t(0)) { std::size_t inner_stride = static_cast(res.strides()[axis]); std::size_t outer_stride = 1; // either row- or column-wise (strides.back / strides.front) std::size_t outer_loop_size = 0; std::size_t inner_loop_size = 0; std::size_t init_size = e.shape()[axis] != std::size_t(1) ? std::size_t(1) : std::size_t(0); auto set_loop_sizes = [&outer_loop_size, &inner_loop_size, init_size](auto first, auto last, std::ptrdiff_t ax) { outer_loop_size = std::accumulate(first, first + ax, init_size, std::multiplies()); inner_loop_size = std::accumulate( first + ax, last, std::size_t(1), std::multiplies() ); }; if (result_type::static_layout == layout_type::row_major) { set_loop_sizes(res.shape().cbegin(), res.shape().cend(), static_cast(axis)); } else { set_loop_sizes(res.shape().cbegin(), res.shape().cend(), static_cast(axis + 1)); std::swap(inner_loop_size, outer_loop_size); } std::size_t pos = 0; inner_loop_size = inner_loop_size - inner_stride; // activate the init loop if we have an init function other than identity if (!std::is_same< std::decay_t, typename detail::accumulator_identity>::value) { accumulator_init_with_f(xt::get<1>(f), res, axis); } pos = 0; for (std::size_t i = 0; i < outer_loop_size; ++i) { for (std::size_t j = 0; j < inner_loop_size; ++j) { res.storage()[pos + inner_stride] = xt::get<0>(f)( res.storage()[pos], res.storage()[pos + inner_stride] ); pos += outer_stride; } pos += inner_stride; } } return res; } template inline auto accumulator_impl(F&& f, E&& e, evaluation_strategy::immediate_type) { using init_type = typename F::init_value_type; using expr_value_type = typename std::decay_t::value_type; using accumulate_functor_type = typename F::accumulate_functor_type; using return_type = std::decay_t()( std::declval(), std::declval() ))>; // using return_type = std::conditional_t::value, typename // std::decay_t::value_type, init_type>; using result_type = xaccumulator_return_type_t, return_type>; std::size_t sz = e.size(); auto result = result_type::from_shape({sz}); if (sz != std::size_t(0)) { auto it = e.template begin(); result.storage()[0] = xt::get<1>(f)(*it); ++it; for (std::size_t idx = 0; it != e.template end(); ++it) { result.storage()[idx + 1] = xt::get<0>(f)(result.storage()[idx], *it); ++idx; } } return result; } } /** * Accumulate and flatten array * **NOTE** This function is not lazy! * * @param f functor to use for accumulation * @param e xexpression to be accumulated * @param evaluation_strategy evaluation strategy of the accumulation * * @return returns xarray filled with accumulated values */ template )> inline auto accumulate(F&& f, E&& e, EVS evaluation_strategy = EVS()) { // Note we need to check is_integral above in order to prohibit EVS = int, and not taking the // std::size_t overload below! return detail::accumulator_impl(std::forward(f), std::forward(e), evaluation_strategy); } /** * Accumulate over axis * **NOTE** This function is not lazy! * * @param f Functor to use for accumulation * @param e xexpression to accumulate * @param axis Axis to perform accumulation over * @param evaluation_strategy evaluation strategy of the accumulation * * @return returns xarray filled with accumulated values */ template inline auto accumulate(F&& f, E&& e, std::ptrdiff_t axis, EVS evaluation_strategy = EVS()) { std::size_t ax = normalize_axis(e.dimension(), axis); return detail::accumulator_impl(std::forward(f), std::forward(e), ax, evaluation_strategy); } } #endif