/*************************************************************************** * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht * * Copyright (c) QuantStack * * * * Distributed under the terms of the BSD 3-Clause License. * * * * The full license is in the file LICENSE, distributed with this software. * ****************************************************************************/ #ifndef XTENSOR_SORT_HPP #define XTENSOR_SORT_HPP #include #include #include #include #include #include "xadapt.hpp" #include "xarray.hpp" #include "xeval.hpp" #include "xindex_view.hpp" #include "xmanipulation.hpp" #include "xmath.hpp" #include "xslice.hpp" // for xnone #include "xtensor.hpp" #include "xtensor_config.hpp" #include "xtensor_forward.hpp" #include "xview.hpp" namespace xt { /** * @defgroup xt_xsort Sorting functions. * * Because sorting functions need to access the tensor data repeatedly, they evaluate their * input and may allocate temporaries. */ namespace detail { template std::ptrdiff_t adjust_secondary_stride(std::ptrdiff_t stride, T shape) { return stride != 0 ? stride : static_cast(shape); } template inline std::ptrdiff_t get_secondary_stride(const E& ev) { if (ev.layout() == layout_type::row_major) { return adjust_secondary_stride(ev.strides()[ev.dimension() - 2], *(ev.shape().end() - 1)); } return adjust_secondary_stride(ev.strides()[1], *(ev.shape().begin())); } template inline std::size_t leading_axis_n_iters(const E& ev) { if (ev.layout() == layout_type::row_major) { return std::accumulate( ev.shape().begin(), ev.shape().end() - 1, std::size_t(1), std::multiplies<>() ); } return std::accumulate(ev.shape().begin() + 1, ev.shape().end(), std::size_t(1), std::multiplies<>()); } template inline void call_over_leading_axis(E& ev, F&& fct) { XTENSOR_ASSERT(ev.dimension() >= 2); const std::size_t n_iters = leading_axis_n_iters(ev); const std::ptrdiff_t secondary_stride = get_secondary_stride(ev); const auto begin = ev.data(); const auto end = begin + n_iters * secondary_stride; for (auto iter = begin; iter != end; iter += secondary_stride) { fct(iter, iter + secondary_stride); } } template inline void call_over_leading_axis(E1& e1, E2& e2, F&& fct) { XTENSOR_ASSERT(e1.dimension() >= 2); XTENSOR_ASSERT(e1.dimension() == e2.dimension()); const std::size_t n_iters = leading_axis_n_iters(e1); const std::ptrdiff_t secondary_stride1 = get_secondary_stride(e1); const std::ptrdiff_t secondary_stride2 = get_secondary_stride(e2); XTENSOR_ASSERT(secondary_stride1 == secondary_stride2); const auto begin1 = e1.data(); const auto end1 = begin1 + n_iters * secondary_stride1; const auto begin2 = e2.data(); const auto end2 = begin2 + n_iters * secondary_stride2; auto iter1 = begin1; auto iter2 = begin2; for (; (iter1 != end1) && (iter2 != end2); iter1 += secondary_stride1, iter2 += secondary_stride2) { fct(iter1, iter1 + secondary_stride1, iter2, iter2 + secondary_stride2); } } template inline std::size_t leading_axis(const E& e) { if (e.layout() == layout_type::row_major) { return e.dimension() - 1; } else if (e.layout() == layout_type::column_major) { return 0; } XTENSOR_THROW(std::runtime_error, "Layout not supported."); } // get permutations to transpose and reverse-transpose array inline std::pair, dynamic_shape> get_permutations(std::size_t dim, std::size_t ax, layout_type layout) { dynamic_shape permutation(dim); std::iota(permutation.begin(), permutation.end(), std::size_t(0)); permutation.erase(permutation.begin() + std::ptrdiff_t(ax)); if (layout == layout_type::row_major) { permutation.push_back(ax); } else { permutation.insert(permutation.begin(), ax); } // TODO find a more clever way to get reverse permutation? dynamic_shape reverse_permutation; for (std::size_t i = 0; i < dim; ++i) { auto it = std::find(permutation.begin(), permutation.end(), i); reverse_permutation.push_back(std::size_t(std::distance(permutation.begin(), it))); } return std::make_pair(std::move(permutation), std::move(reverse_permutation)); } template inline R map_axis(const E& e, std::ptrdiff_t axis, F&& lambda) { if (e.dimension() == 1) { R res = e; lambda(res.begin(), res.end()); return res; } const std::size_t ax = normalize_axis(e.dimension(), axis); if (ax == detail::leading_axis(e)) { R res = e; detail::call_over_leading_axis(res, std::forward(lambda)); return res; } dynamic_shape permutation, reverse_permutation; std::tie(permutation, reverse_permutation) = get_permutations(e.dimension(), ax, e.layout()); R res = transpose(e, permutation); detail::call_over_leading_axis(res, std::forward(lambda)); res = transpose(res, reverse_permutation); return res; } template struct flatten_sort_result_type_impl { using type = VT; }; template struct flatten_sort_result_type_impl> { using type = xtensor; }; template struct flatten_sort_result_type_impl> { using type = xtensor_fixed::value>, L>; }; template struct flatten_sort_result_type : flatten_sort_result_type_impl> { }; template using flatten_sort_result_type_t = typename flatten_sort_result_type::type; template > inline auto flat_sort_impl(const xexpression& e) { const auto& de = e.derived_cast(); R ev; ev.resize({static_cast(de.size())}); std::copy(de.cbegin(), de.cend(), ev.begin()); std::sort(ev.begin(), ev.end()); return ev; } } template inline auto sort(const xexpression& e, placeholders::xtuph /*t*/) { return detail::flat_sort_impl(e); } namespace detail { template struct sort_eval_type { using type = typename T::temporary_type; }; template struct sort_eval_type, L>> { using type = xtensor; }; } /** * Sort xexpression (optionally along axis) * The sort is performed using the ``std::sort`` functions. * A copy of the xexpression is created and returned. * * @ingroup xt_xsort * @param e xexpression to sort * @param axis axis along which sort is performed * * @return sorted array (copy) */ template inline auto sort(const xexpression& e, std::ptrdiff_t axis = -1) { using eval_type = typename detail::sort_eval_type::type; return detail::map_axis( e.derived_cast(), axis, [](auto begin, auto end) { std::sort(begin, end); } ); } /***************************** * Implementation of argsort * *****************************/ /** * Sorting method. * Predefined methods for performing indirect sorting. * @see argsort(const xexpression&, std::ptrdiff_t, sorting_method) */ enum class sorting_method { /** * Faster method but with no guarantee on preservation of order of equal elements * https://en.cppreference.com/w/cpp/algorithm/sort. */ quick, /** * Slower method but with guarantee on preservation of order of equal elements * https://en.cppreference.com/w/cpp/algorithm/stable_sort. */ stable, }; namespace detail { template inline void argsort_iter( ConstRandomIt data_begin, ConstRandomIt data_end, RandomIt idx_begin, RandomIt idx_end, Compare comp, Method method ) { XTENSOR_ASSERT(std::distance(data_begin, data_end) >= 0); XTENSOR_ASSERT(std::distance(idx_begin, idx_end) == std::distance(data_begin, data_end)); (void) idx_end; // TODO(C++17) [[maybe_unused]] only used in assertion. std::iota(idx_begin, idx_end, 0); switch (method) { case (sorting_method::quick): { std::sort( idx_begin, idx_end, [&](const auto i, const auto j) { return comp(*(data_begin + i), *(data_begin + j)); } ); } case (sorting_method::stable): { std::stable_sort( idx_begin, idx_end, [&](const auto i, const auto j) { return comp(*(data_begin + i), *(data_begin + j)); } ); } } } template inline void argsort_iter(ConstRandomIt data_begin, ConstRandomIt data_end, RandomIt idx_begin, RandomIt idx_end, Method method) { return argsort_iter( std::move(data_begin), std::move(data_end), std::move(idx_begin), std::move(idx_end), [](const auto& x, const auto& y) -> bool { return x < y; }, method ); } template struct rebind_value_type { using type = xarray; }; template struct rebind_value_type> { using type = xarray; }; template struct rebind_value_type> { using type = xtensor; }; template struct rebind_value_type> { using type = xtensor_fixed; }; template struct flatten_rebind_value_type { using type = typename rebind_value_type::type; }; template struct flatten_rebind_value_type> { using type = xtensor; }; template struct flatten_rebind_value_type> { using type = xtensor_fixed::value>, L>; }; template struct argsort_result_type { using type = typename rebind_value_type::type; }; template struct linear_argsort_result_type { using type = typename flatten_rebind_value_type< typename T::temporary_type::size_type, typename T::temporary_type>::type; }; template ::type, class Method> inline auto flatten_argsort_impl(const xexpression& e, Method method) { const auto& de = e.derived_cast(); auto cit = de.template begin(); using const_iterator = decltype(cit); auto ad = xiterator_adaptor(cit, cit, de.size()); using result_type = R; result_type result; result.resize({de.size()}); detail::argsort_iter(de.cbegin(), de.cend(), result.begin(), result.end(), method); return result; } } template inline auto argsort(const xexpression& e, placeholders::xtuph /*t*/, sorting_method method = sorting_method::quick) { return detail::flatten_argsort_impl(e, method); } /** * Argsort xexpression (optionally along axis) * Performs an indirect sort along the given axis. Returns an xarray * of indices of the same shape as e that index data along the given axis in * sorted order. * * @ingroup xt_xsort * @param e xexpression to argsort * @param axis axis along which argsort is performed * @param method sorting algorithm to use * * @return argsorted index array * * @see xt::sorting_method */ template inline auto argsort(const xexpression& e, std::ptrdiff_t axis = -1, sorting_method method = sorting_method::quick) { using eval_type = typename detail::sort_eval_type::type; using result_type = typename detail::argsort_result_type::type; const auto& de = e.derived_cast(); std::size_t ax = normalize_axis(de.dimension(), axis); if (de.dimension() == 1) { return detail::flatten_argsort_impl(e, method); } const auto argsort = [&method](auto res_begin, auto res_end, auto ev_begin, auto ev_end) { detail::argsort_iter(ev_begin, ev_end, res_begin, res_end, method); }; if (ax == detail::leading_axis(de)) { result_type res = result_type::from_shape(de.shape()); detail::call_over_leading_axis(res, de, argsort); return res; } dynamic_shape permutation, reverse_permutation; std::tie(permutation, reverse_permutation) = detail::get_permutations(de.dimension(), ax, de.layout()); eval_type ev = transpose(de, permutation); result_type res = result_type::from_shape(ev.shape()); detail::call_over_leading_axis(res, ev, argsort); res = transpose(res, reverse_permutation); return res; } /************************************************ * Implementation of partition and argpartition * ************************************************/ namespace detail { /** * Partition a given random iterator. * * @param data_begin Start of the data to partition. * @param data_end Past end of the data to partition. * @param kth_start Start of the indices to partition. * Indices must be sorted in decreasing order. * @param kth_end Past end of the indices to partition. * Indices must be sorted in decreasing order. * @param comp Comparison function for `x < y`. */ template inline void partition_iter(RandomIt data_begin, RandomIt data_end, Iter kth_begin, Iter kth_end, Compare comp) { XTENSOR_ASSERT(std::distance(data_begin, data_end) >= 0); XTENSOR_ASSERT(std::distance(kth_begin, kth_end) >= 0); using idx_type = typename std::iterator_traits::value_type; idx_type k_last = static_cast(std::distance(data_begin, data_end)); for (; kth_begin != kth_end; ++kth_begin) { std::nth_element(data_begin, data_begin + *kth_begin, data_begin + k_last, std::move(comp)); k_last = *kth_begin; } } template inline void partition_iter(RandomIt data_begin, RandomIt data_end, Iter kth_begin, Iter kth_end) { return partition_iter( std::move(data_begin), std::move(data_end), std::move(kth_begin), std::move(kth_end), [](const auto& x, const auto& y) -> bool { return x < y; } ); } } /** * Partially sort xexpression * * Partition shuffles the xexpression in a way so that the kth element * in the returned xexpression is in the place it would appear in a sorted * array and all elements smaller than this entry are placed (unsorted) before. * * The optional third parameter can either be an axis or ``xnone()`` in which case * the xexpression will be flattened. * * This function uses ``std::nth_element`` internally. * * @code{cpp} * xt::xarray a = {1, 10, -10, 123}; * std::cout << xt::partition(a, 0) << std::endl; // {-10, 1, 123, 10} the correct entry at index 0 * std::cout << xt::partition(a, 3) << std::endl; // {1, 10, -10, 123} the correct entry at index 3 * std::cout << xt::partition(a, {0, 3}) << std::endl; // {-10, 1, 10, 123} the correct entries at index 0 * and 3 \endcode * * @ingroup xt_xsort * @param e input xexpression * @param kth_container a container of ``indices`` that should contain the correctly sorted value * @param axis either integer (default = -1) to sort along last axis or ``xnone()`` to flatten before * sorting * * @return partially sorted xcontainer */ template < class E, class C, class R = detail::flatten_sort_result_type_t, class = std::enable_if_t::value, int>> inline R partition(const xexpression& e, C kth_container, placeholders::xtuph /*ax*/) { const auto& de = e.derived_cast(); R ev = R::from_shape({de.size()}); std::sort(kth_container.begin(), kth_container.end()); std::copy(de.linear_cbegin(), de.linear_cend(), ev.linear_begin()); // flatten detail::partition_iter(ev.linear_begin(), ev.linear_end(), kth_container.rbegin(), kth_container.rend()); return ev; } template > inline R partition(const xexpression& e, const I (&kth_container)[N], placeholders::xtuph tag) { return partition( e, xtl::forward_sequence, decltype(kth_container)>(kth_container), tag ); } template > inline R partition(const xexpression& e, std::size_t kth, placeholders::xtuph tag) { return partition(e, std::array({kth}), tag); } template ::value, int>> inline auto partition(const xexpression& e, C kth_container, std::ptrdiff_t axis = -1) { using eval_type = typename detail::sort_eval_type::type; std::sort(kth_container.begin(), kth_container.end()); return detail::map_axis( e.derived_cast(), axis, [&kth_container](auto begin, auto end) { detail::partition_iter(begin, end, kth_container.rbegin(), kth_container.rend()); } ); } template inline auto partition(const xexpression& e, const T (&kth_container)[N], std::ptrdiff_t axis = -1) { return partition( e, xtl::forward_sequence, decltype(kth_container)>(kth_container), axis ); } template inline auto partition(const xexpression& e, std::size_t kth, std::ptrdiff_t axis = -1) { return partition(e, std::array({kth}), axis); } /** * Partially sort arguments * * Argpartition shuffles the indices to a xexpression in a way so that the index for the * kth element in the returned xexpression is in the place it would appear in a sorted * array and all elements smaller than this entry are placed (unsorted) before. * * The optional third parameter can either be an axis or ``xnone()`` in which case * the xexpression will be flattened. * * This function uses ``std::nth_element`` internally. * * @code{cpp} * xt::xarray a = {1, 10, -10, 123}; * std::cout << xt::argpartition(a, 0) << std::endl; // {2, 0, 3, 1} the correct entry at index 0 * std::cout << xt::argpartition(a, 3) << std::endl; // {0, 1, 2, 3} the correct entry at index 3 * std::cout << xt::argpartition(a, {0, 3}) << std::endl; // {2, 0, 1, 3} the correct entries at index 0 * and 3 \endcode * * @ingroup xt_xsort * @param e input xexpression * @param kth_container a container of ``indices`` that should contain the correctly sorted value * @param axis either integer (default = -1) to sort along last axis or ``xnone()`` to flatten before * sorting * * @return xcontainer with indices of partial sort of input */ template < class E, class C, class R = typename detail::linear_argsort_result_type::type>::type, class = std::enable_if_t::value, int>> inline R argpartition(const xexpression& e, C kth_container, placeholders::xtuph) { using eval_type = typename detail::sort_eval_type::type; using result_type = typename detail::linear_argsort_result_type::type; const auto& de = e.derived_cast(); result_type res = result_type::from_shape({de.size()}); std::sort(kth_container.begin(), kth_container.end()); std::iota(res.linear_begin(), res.linear_end(), 0); detail::partition_iter( res.linear_begin(), res.linear_end(), kth_container.rbegin(), kth_container.rend(), [&de](std::size_t a, std::size_t b) { return de[a] < de[b]; } ); return res; } template inline auto argpartition(const xexpression& e, const I (&kth_container)[N], placeholders::xtuph tag) { return argpartition( e, xtl::forward_sequence, decltype(kth_container)>(kth_container), tag ); } template inline auto argpartition(const xexpression& e, std::size_t kth, placeholders::xtuph tag) { return argpartition(e, std::array({kth}), tag); } template ::value, int>> inline auto argpartition(const xexpression& e, C kth_container, std::ptrdiff_t axis = -1) { using eval_type = typename detail::sort_eval_type::type; using result_type = typename detail::argsort_result_type::type; const auto& de = e.derived_cast(); if (de.dimension() == 1) { return argpartition(e, std::forward(kth_container), xnone()); } std::sort(kth_container.begin(), kth_container.end()); const auto argpartition_w_kth = [&kth_container](auto res_begin, auto res_end, auto ev_begin, auto /*ev_end*/) { std::iota(res_begin, res_end, 0); detail::partition_iter( res_begin, res_end, kth_container.rbegin(), kth_container.rend(), [&ev_begin](auto const& i, auto const& j) { return *(ev_begin + i) < *(ev_begin + j); } ); }; const std::size_t ax = normalize_axis(de.dimension(), axis); if (ax == detail::leading_axis(de)) { result_type res = result_type::from_shape(de.shape()); detail::call_over_leading_axis(res, de, argpartition_w_kth); return res; } dynamic_shape permutation, reverse_permutation; std::tie(permutation, reverse_permutation) = detail::get_permutations(de.dimension(), ax, de.layout()); eval_type ev = transpose(de, permutation); result_type res = result_type::from_shape(ev.shape()); detail::call_over_leading_axis(res, ev, argpartition_w_kth); res = transpose(res, reverse_permutation); return res; } template inline auto argpartition(const xexpression& e, const I (&kth_container)[N], std::ptrdiff_t axis = -1) { return argpartition( e, xtl::forward_sequence, decltype(kth_container)>(kth_container), axis ); } template inline auto argpartition(const xexpression& e, std::size_t kth, std::ptrdiff_t axis = -1) { return argpartition(e, std::array({kth}), axis); } /****************** * xt::quantile * ******************/ namespace detail { template inline void select_indices_impl( const S& shape, const I& indices, std::size_t axis, std::size_t current_dim, const K& current_index, O& out ) { using id_t = typename K::value_type; if ((current_dim < shape.size() - 1) && (current_dim == axis)) { for (auto i : indices) { auto idx = current_index; idx[current_dim] = i; select_indices_impl(shape, indices, axis, current_dim + 1, idx, out); } } else if ((current_dim < shape.size() - 1) && (current_dim != axis)) { for (id_t i = 0; xtl::cmp_less(i, shape[current_dim]); ++i) { auto idx = current_index; idx[current_dim] = i; select_indices_impl(shape, indices, axis, current_dim + 1, idx, out); } } else if ((current_dim == shape.size() - 1) && (current_dim == axis)) { for (auto i : indices) { auto idx = current_index; idx[current_dim] = i; out.push_back(std::move(idx)); } } else if ((current_dim == shape.size() - 1) && (current_dim != axis)) { for (id_t i = 0; xtl::cmp_less(i, shape[current_dim]); ++i) { auto idx = current_index; idx[current_dim] = i; out.push_back(std::move(idx)); } } } template inline auto select_indices(const S& shape, const I& indices, std::size_t axis) { using index_type = get_strides_t; auto out = std::vector(); select_indices_impl(shape, indices, axis, 0, xtl::make_sequence(shape.size()), out); return out; } // TODO remove when fancy index views are implemented // Poor man's indexing along a single axis as in NumPy a[:, [1, 3, 4]] template inline auto fancy_indexing(E&& e, const I& indices, std::ptrdiff_t axis) { const std::size_t ax = normalize_axis(e.dimension(), axis); using shape_t = get_strides_t::shape_type>; auto shape = xtl::forward_sequence(e.shape()); shape[ax] = indices.size(); return reshape_view( index_view(std::forward(e), select_indices(e.shape(), indices, ax)), std::move(shape) ); } template inline auto quantile_kth_gamma(std::size_t n, const P& probas, T alpha, T beta) { const auto m = alpha + probas * (T(1) - alpha - beta); // Evaluting since reused a lot const auto p_n_m = eval(probas * static_cast(n) + m - 1); // Previous (virtual) index, may be out of bounds const auto j = floor(p_n_m); const auto j_jp1 = concatenate(xtuple(j, j + 1)); // Both interpolation indices, k and k+1 const auto k_kp1 = xt::cast(clip(j_jp1, T(0), T(n - 1))); // Both interpolation coefficients, 1-gamma and gamma const auto omg_g = concatenate(xtuple(T(1) - (p_n_m - j), p_n_m - j)); return std::make_pair(eval(k_kp1), eval(omg_g)); } // TODO should implement unsqueeze rather template inline auto unsqueeze_shape(const S& shape, std::size_t axis) { XTENSOR_ASSERT(axis <= shape.size()); auto new_shape = xtl::forward_sequence, decltype(shape)>(shape); new_shape.insert(new_shape.begin() + axis, 1); return new_shape; } } /** * Compute quantiles over the given axis. * * In a sorted array represneting a distribution of numbers, the quantile of a probability ``p`` * is the the cut value ``q`` such that a fraction ``p`` of the distribution is lesser or equal * to ``q``. * When the cutpoint falls between two elemnts of the sample distribution, a interpolation is * computed using the @p alpha and @p beta coefficients, as descripted in * (Hyndman and Fan, 1996). * * The algorithm partially sorts entries in a copy along the @p axis axis. * * @ingroup xt_xsort * @param e Expression containing the distribution over which the quantiles are computed. * @param probas An list of probability associated with each desired quantiles. * All elements must be in the range ``[0, 1]``. * @param axis The dimension in which to compute the quantiles, *i.e* the axis representing the * distribution. * @param alpha Interpolation parameter. Must be in the range ``[0, 1]]``. * @param beta Interpolation parameter. Must be in the range ``[0, 1]]``. * @tparam T The type in which the quantile are computed. * @return An expression with as many dimensions as the input @p e. * The first axis correspond to the quantiles. * The other axes are the axes that remain after the reduction of @p e. * @see (Hyndman and Fan, 1996) R. J. Hyndman and Y. Fan, * "Sample quantiles in statistical packages", The American Statistician, * 50(4), pp. 361-365, 1996 * @see https://en.wikipedia.org/wiki/Quantile */ template inline auto quantile(E&& e, const P& probas, std::ptrdiff_t axis, T alpha, T beta) { XTENSOR_ASSERT(all(0. <= probas)); XTENSOR_ASSERT(all(probas <= 1.)); XTENSOR_ASSERT(0. <= alpha); XTENSOR_ASSERT(alpha <= 1.); XTENSOR_ASSERT(0. <= beta); XTENSOR_ASSERT(beta <= 1.); using tmp_shape_t = get_strides_t::shape_type>; using id_t = typename tmp_shape_t::value_type; const std::size_t ax = normalize_axis(e.dimension(), axis); const std::size_t n = e.shape()[ax]; auto kth_gamma = detail::quantile_kth_gamma(n, probas, alpha, beta); // Select relevant values for computing interpolating quantiles auto e_partition = xt::partition(std::forward(e), kth_gamma.first, ax); auto e_kth = detail::fancy_indexing(std::move(e_partition), std::move(kth_gamma.first), ax); // Reshape interpolation coefficients auto gm1_g_shape = xtl::make_sequence(e.dimension(), 1); gm1_g_shape[ax] = kth_gamma.second.size(); auto gm1_g_reshaped = reshape_view(std::move(kth_gamma.second), std::move(gm1_g_shape)); // Compute interpolation // TODO(C++20) use (and create) xt::lerp in C++ auto e_kth_g = std::move(e_kth) * std::move(gm1_g_reshaped); // Reshape pairwise interpolate for suming along new axis auto e_kth_g_shape = detail::unsqueeze_shape(e_kth_g.shape(), ax); e_kth_g_shape[ax] = 2; e_kth_g_shape[ax + 1] /= 2; auto quantiles = xt::sum(reshape_view(std::move(e_kth_g), std::move(e_kth_g_shape)), ax); // Cannot do a transpose on a non-strided expression so we have to eval return moveaxis(eval(std::move(quantiles)), ax, 0); } // Static proba array overload template inline auto quantile(E&& e, const T (&probas)[N], std::ptrdiff_t axis, T alpha, T beta) { return quantile(std::forward(e), adapt(probas, {N}), axis, alpha, beta); } /** * Compute quantiles of the whole expression. * * The quantiles are computed over the whole expression, as if flatten in a one-dimensional * expression. * * @ingroup xt_xsort * @see xt::quantile(E&& e, P const& probas, std::ptrdiff_t axis, T alpha, T beta) */ template inline auto quantile(E&& e, const P& probas, T alpha, T beta) { return quantile(xt::ravel(std::forward(e)), probas, 0, alpha, beta); } // Static proba array overload template inline auto quantile(E&& e, const T (&probas)[N], T alpha, T beta) { return quantile(std::forward(e), adapt(probas, {N}), alpha, beta); } /** * Quantile interpolation method. * * Predefined methods for interpolating quantiles, as defined in (Hyndman and Fan, 1996). * * @ingroup xt_xsort * @see (Hyndman and Fan, 1996) R. J. Hyndman and Y. Fan, * "Sample quantiles in statistical packages", The American Statistician, * 50(4), pp. 361-365, 1996 * @see xt::quantile(E&& e, P const& probas, std::ptrdiff_t axis, xt::quantile_method method) */ enum class quantile_method { /** Method 4 of (Hyndman and Fan, 1996) with ``alpha=0`` and ``beta=1``. */ interpolated_inverted_cdf = 4, /** Method 5 of (Hyndman and Fan, 1996) with ``alpha=1/2`` and ``beta=1/2``. */ hazen, /** Method 6 of (Hyndman and Fan, 1996) with ``alpha=0`` and ``beta=0``. */ weibull, /** Method 7 of (Hyndman and Fan, 1996) with ``alpha=1`` and ``beta=1``. */ linear, /** Method 8 of (Hyndman and Fan, 1996) with ``alpha=1/3`` and ``beta=1/3``. */ median_unbiased, /** Method 9 of (Hyndman and Fan, 1996) with ``alpha=3/8`` and ``beta=3/8``. */ normal_unbiased, }; /** * Compute quantiles over the given axis. * * The function takes the name of a predefined method to compute to interpolate between values. * * @ingroup xt_xsort * @see xt::quantile_method * @see xt::quantile(E&& e, P const& probas, std::ptrdiff_t axis, T alpha, T beta) */ template inline auto quantile(E&& e, const P& probas, std::ptrdiff_t axis, quantile_method method = quantile_method::linear) { T alpha = 0.; T beta = 0.; switch (method) { case (quantile_method::interpolated_inverted_cdf): { alpha = 0.; beta = 1.; break; } case (quantile_method::hazen): { alpha = 0.5; beta = 0.5; break; } case (quantile_method::weibull): { alpha = 0.; beta = 0.; break; } case (quantile_method::linear): { alpha = 1.; beta = 1.; break; } case (quantile_method::median_unbiased): { alpha = 1. / 3.; beta = 1. / 3.; break; } case (quantile_method::normal_unbiased): { alpha = 3. / 8.; beta = 3. / 8.; break; } } return quantile(std::forward(e), probas, axis, alpha, beta); } // Static proba array overload template inline auto quantile(E&& e, const T (&probas)[N], std::ptrdiff_t axis, quantile_method method = quantile_method::linear) { return quantile(std::forward(e), adapt(probas, {N}), axis, method); } /** * Compute quantiles of the whole expression. * * The quantiles are computed over the whole expression, as if flatten in a one-dimensional * expression. * The function takes the name of a predefined method to compute to interpolate between values. * * @ingroup xt_xsort * @see xt::quantile_method * @see xt::quantile(E&& e, P const& probas, std::ptrdiff_t axis, xt::quantile_method method) */ template inline auto quantile(E&& e, const P& probas, quantile_method method = quantile_method::linear) { return quantile(xt::ravel(std::forward(e)), probas, 0, method); } // Static proba array overload template inline auto quantile(E&& e, const T (&probas)[N], quantile_method method = quantile_method::linear) { return quantile(std::forward(e), adapt(probas, {N}), method); } /**************** * xt::median * ****************/ template inline typename std::decay_t::value_type median(E&& e) { using value_type = typename std::decay_t::value_type; auto sz = e.size(); if (sz % 2 == 0) { std::size_t szh = sz / 2; // integer floor div std::array kth = {szh - 1, szh}; auto values = xt::partition(xt::flatten(e), kth); return (values[kth[0]] + values[kth[1]]) / value_type(2); } else { std::array kth = {(sz - 1) / 2}; auto values = xt::partition(xt::flatten(e), kth); return values[kth[0]]; } } /** * Find the median along the specified axis * * Given a vector V of length N, the median of V is the middle value of a * sorted copy of V, V_sorted - i e., V_sorted[(N-1)/2], when N is odd, * and the average of the two middle values of V_sorted when N is even. * * @ingroup xt_xsort * @param axis axis along which the medians are computed. * If not set, computes the median along a flattened version of the input. * @param e input xexpression * @return median value */ template inline auto median(E&& e, std::ptrdiff_t axis) { std::size_t ax = normalize_axis(e.dimension(), axis); std::size_t sz = e.shape()[ax]; xstrided_slice_vector sv(e.dimension(), xt::all()); if (sz % 2 == 0) { std::size_t szh = sz / 2; // integer floor div std::array kth = {szh - 1, szh}; auto values = xt::partition(std::forward(e), kth, static_cast(ax)); sv[ax] = xt::range(szh - 1, szh + 1); return xt::mean(xt::strided_view(std::move(values), std::move(sv)), {ax}); } else { std::size_t szh = (sz - 1) / 2; std::array kth = {(sz - 1) / 2}; auto values = xt::partition(std::forward(e), kth, static_cast(ax)); sv[ax] = xt::range(szh, szh + 1); return xt::mean(xt::strided_view(std::move(values), std::move(sv)), {ax}); } } namespace detail { template struct argfunc_result_type { using type = xarray; }; template struct argfunc_result_type> { using type = xtensor; }; template inline typename argfunc_result_type::type arg_func_impl(const E& e, std::size_t axis, F&& cmp) { using eval_type = typename detail::sort_eval_type::type; using value_type = typename E::value_type; using result_type = typename argfunc_result_type::type; using result_shape_type = typename result_type::shape_type; if (e.dimension() == 1) { auto begin = e.template begin(); auto end = e.template end(); // todo C++17 : constexpr if (std::is_same>::value) { std::size_t i = static_cast(std::distance(begin, std::min_element(begin, end))); return xtensor{i}; } else { std::size_t i = static_cast(std::distance(begin, std::max_element(begin, end))); return xtensor{i}; } } result_shape_type alt_shape; xt::resize_container(alt_shape, e.dimension() - 1); // Excluding copy, copy all of shape except for axis std::copy(e.shape().cbegin(), e.shape().cbegin() + std::ptrdiff_t(axis), alt_shape.begin()); std::copy( e.shape().cbegin() + std::ptrdiff_t(axis) + 1, e.shape().cend(), alt_shape.begin() + std::ptrdiff_t(axis) ); result_type result = result_type::from_shape(std::move(alt_shape)); auto result_iter = result.template begin(); auto arg_func_lambda = [&result_iter, &cmp](auto begin, auto end) { std::size_t idx = 0; value_type val = *begin; ++begin; for (std::size_t i = 1; begin != end; ++begin, ++i) { if (cmp(*begin, val)) { val = *begin; idx = i; } } *result_iter = idx; ++result_iter; }; if (axis != detail::leading_axis(e)) { dynamic_shape permutation, reverse_permutation; std::tie( permutation, reverse_permutation ) = detail::get_permutations(e.dimension(), axis, e.layout()); // note: creating copy eval_type input = transpose(e, permutation); detail::call_over_leading_axis(input, arg_func_lambda); return result; } else { auto&& input = eval(e); detail::call_over_leading_axis(input, arg_func_lambda); return result; } } } template inline auto argmin(const xexpression& e) { using value_type = typename E::value_type; auto&& ed = eval(e.derived_cast()); auto begin = ed.template begin(); auto end = ed.template end(); std::size_t i = static_cast(std::distance(begin, std::min_element(begin, end))); return xtensor{i}; } /** * Find position of minimal value in xexpression. * By default, the returned index is into the flattened array. * If `axis` is specified, the indices are along the specified axis. * * @param e input xexpression * @param axis select axis (optional) * * @return returns xarray with positions of minimal value */ template inline auto argmin(const xexpression& e, std::ptrdiff_t axis) { using value_type = typename E::value_type; auto&& ed = eval(e.derived_cast()); std::size_t ax = normalize_axis(ed.dimension(), axis); return detail::arg_func_impl(ed, ax, std::less()); } template inline auto argmax(const xexpression& e) { using value_type = typename E::value_type; auto&& ed = eval(e.derived_cast()); auto begin = ed.template begin(); auto end = ed.template end(); std::size_t i = static_cast(std::distance(begin, std::max_element(begin, end))); return xtensor{i}; } /** * Find position of maximal value in xexpression * By default, the returned index is into the flattened array. * If `axis` is specified, the indices are along the specified axis. * * @ingroup xt_xsort * @param e input xexpression * @param axis select axis (optional) * * @return returns xarray with positions of maximal value */ template inline auto argmax(const xexpression& e, std::ptrdiff_t axis) { using value_type = typename E::value_type; auto&& ed = eval(e.derived_cast()); std::size_t ax = normalize_axis(ed.dimension(), axis); return detail::arg_func_impl(ed, ax, std::greater()); } /** * Find unique elements of a xexpression. This returns a flattened xtensor with * sorted, unique elements from the original expression. * * @ingroup xt_xsort * @param e input xexpression (will be flattened) */ template inline auto unique(const xexpression& e) { auto sorted = sort(e, xnone()); auto end = std::unique(sorted.begin(), sorted.end()); std::size_t sz = static_cast(std::distance(sorted.begin(), end)); // TODO check if we can shrink the vector without reallocation using value_type = typename E::value_type; auto result = xtensor::from_shape({sz}); std::copy(sorted.begin(), end, result.begin()); return result; } /** * Find the set difference of two xexpressions. This returns a flattened xtensor with * the sorted, unique values in ar1 that are not in ar2. * * @ingroup xt_xsort * @param ar1 input xexpression (will be flattened) * @param ar2 input xexpression */ template inline auto setdiff1d(const xexpression& ar1, const xexpression& ar2) { using value_type = typename E1::value_type; auto unique1 = unique(ar1); auto unique2 = unique(ar2); auto tmp = xtensor::from_shape({unique1.size()}); auto end = std::set_difference(unique1.begin(), unique1.end(), unique2.begin(), unique2.end(), tmp.begin()); std::size_t sz = static_cast(std::distance(tmp.begin(), end)); auto result = xtensor::from_shape({sz}); std::copy(tmp.begin(), end, result.begin()); return result; } } #endif