//  Copyright (c) 2007-2021 Hartmut Kaiser
//  Copyright (c) 2016 Thomas Heller
//
//  SPDX-License-Identifier: BSL-1.0
//  Distributed under the Boost Software License, Version 1.0. (See accompanying
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

/// \file parallel/algorithms/for_loop.hpp

#pragma once

#if defined(DOXYGEN)
namespace hpx {
    /// The for_loop implements loop functionality over a range specified by
    /// integral or iterator bounds. For the iterator case, these algorithms
    /// resemble for_each from the Parallelism TS, but leave to the programmer
    /// when and if to dereference the iterator.
    ///
    /// The execution of for_loop without specifying an execution policy is
    /// equivalent to specifying \a hpx::execution::seq as the execution
    /// policy.
    ///
    /// \tparam I           The type of the iteration variable. This could be
    ///                     an (forward) iterator type or an integral type.
    /// \tparam Args        A parameter pack, it's last element is a function
    ///                     object to be invoked for each iteration, the others
    ///                     have to be either conforming to the induction or
    ///                     reduction concept.
    ///
    /// \param first        Refers to the beginning of the sequence of elements
    ///                     the algorithm will be applied to.
    /// \param last         Refers to the end of the sequence of elements
    ///                     the algorithm will be applied to.
    /// \param args         The last element of this parameter pack is the
    ///                     function (object) to invoke, while the remaining
    ///                     elements of the parameter pack are instances of
    ///                     either induction or reduction objects.
    ///                     The function (or function object) which will be
    ///                     invoked for each of the elements in the sequence
    ///                     specified by [first, last) should expose a signature
    ///                     equivalent to:
    ///                     \code
    ///                     <ignored> pred(I const& a, ...);
    ///                     \endcode \n
    ///                     The signature does not need to have const&. It will
    ///                     receive the current value of the iteration variable
    ///                     and one argument for each of the induction or
    ///                     reduction objects passed to the algorithms,
    ///                     representing their current values.
    ///
    /// Requires: \a I shall be an integral type or meet the requirements
    ///           of an input iterator type. The \a args parameter pack shall
    ///           have at least one element, comprising objects returned by
    ///           invocations of \a reduction and/or \a induction function
    ///           templates followed by exactly one element invocable
    ///           element-access function, \a f. \a f shall meet the
    ///           requirements of MoveConstructible.
    ///
    /// Effects:  Applies \a f to each element in the input sequence, with
    ///           additional arguments corresponding to the reductions and
    ///           inductions in the \a args parameter pack. The length of the
    ///           input sequence is last - first.
    ///
    /// The first element in the input sequence is specified by \a first. Each
    /// subsequent element is generated by incrementing the previous element.
    ///
    /// \note As described in the C++ standard, arithmetic on non-random-access
    ///       iterators is performed using advance and distance.
    ///
    /// \note The order of the elements of the input sequence is important for
    ///       determining ordinal position of an application of \a f, even
    ///       though the applications themselves may be unordered.
    ///
    /// Along with an element from the input sequence, for each member of the
    /// \a args parameter pack excluding \a f, an additional argument is passed
    /// to each application of \a f as follows:
    ///
    /// If the pack member is an object returned by a call to a reduction
    /// function listed in section, then the
    /// additional argument is a reference to a view of that reduction object.
    /// If the pack member is an object returned by a call to induction, then
    /// the additional argument is the induction value for that induction object
    /// corresponding to the position of the application of \a f in the input
    /// sequence.
    ///
    /// Complexity: Applies \a f exactly once for each element of the input
    ///             sequence.
    ///
    /// Remarks: If \a f returns a result, the result is ignored.
    ///
    template <typename I, typename... Args>
    void for_loop(std::decay_t<I> first, I last, Args&&... args);

    /// The for_loop implements loop functionality over a range specified by
    /// integral or iterator bounds. For the iterator case, these algorithms
    /// resemble for_each from the Parallelism TS, but leave to the programmer
    /// when and if to dereference the iterator.
    ///
    /// \tparam ExPolicy    The type of the execution policy to use (deduced).
    ///                     It describes the manner in which the execution
    ///                     of the algorithm may be parallelized and the manner
    ///                     in which it applies user-provided function objects.
    /// \tparam I           The type of the iteration variable. This could be
    ///                     an (forward) iterator type or an integral type.
    /// \tparam Args        A parameter pack, it's last element is a function
    ///                     object to be invoked for each iteration, the others
    ///                     have to be either conforming to the induction or
    ///                     reduction concept.
    ///
    /// \param policy       The execution policy to use for the scheduling of
    ///                     the iterations.
    /// \param first        Refers to the beginning of the sequence of elements
    ///                     the algorithm will be applied to.
    /// \param last         Refers to the end of the sequence of elements
    ///                     the algorithm will be applied to.
    /// \param args         The last element of this parameter pack is the
    ///                     function (object) to invoke, while the remaining
    ///                     elements of the parameter pack are instances of
    ///                     either induction or reduction objects.
    ///                     The function (or function object) which will be
    ///                     invoked for each of the elements in the sequence
    ///                     specified by [first, last) should expose a signature
    ///                     equivalent to:
    ///                     \code
    ///                     <ignored> pred(I const& a, ...);
    ///                     \endcode \n
    ///                     The signature does not need to have const&. It will
    ///                     receive the current value of the iteration variable
    ///                     and one argument for each of the induction or
    ///                     reduction objects passed to the algorithms,
    ///                     representing their current values.
    ///
    /// Requires: \a I shall be an integral type or meet the requirements
    ///           of an input iterator type. The \a args parameter pack shall
    ///           have at least one element, comprising objects returned by
    ///           invocations of \a reduction and/or \a induction function
    ///           templates followed by exactly one element invocable
    ///           element-access function, \a f. \a f shall meet the
    ///           requirements of MoveConstructible.
    ///
    /// Effects:  Applies \a f to each element in the input sequence, with
    ///           additional arguments corresponding to the reductions and
    ///           inductions in the \a args parameter pack. The length of the
    ///           input sequence is last - first.
    ///
    /// The first element in the input sequence is specified by \a first. Each
    /// subsequent element is generated by incrementing the previous element.
    ///
    /// \note As described in the C++ standard, arithmetic on non-random-access
    ///       iterators is performed using advance and distance.
    ///
    /// \note The order of the elements of the input sequence is important for
    ///       determining ordinal position of an application of \a f, even
    ///       though the applications themselves may be unordered.
    ///
    /// Along with an element from the input sequence, for each member of the
    /// \a args parameter pack excluding \a f, an additional argument is passed
    /// to each application of \a f as follows:
    ///
    /// If the pack member is an object returned by a call to a reduction
    /// function listed in section, then the
    /// additional argument is a reference to a view of that reduction object.
    /// If the pack member is an object returned by a call to induction, then
    /// the additional argument is the induction value for that induction object
    /// corresponding to the position of the application of \a f in the input
    /// sequence.
    ///
    /// Complexity: Applies \a f exactly once for each element of the input
    ///             sequence.
    ///
    /// Remarks: If \a f returns a result, the result is ignored.
    ///
    /// \returns  The \a for_loop algorithm returns a
    ///           \a hpx::future<void> if the execution policy is of
    ///           type
    ///           \a hpx::execution::sequenced_task_policy or
    ///           \a hpx::execution::parallel_task_policy and returns \a void
    ///           otherwise.
    ///
    template <typename ExPolicy, typename I, typename... Args>
    typename util::detail::algorithm_result<ExPolicy>::type for_loop(
        ExPolicy&& policy, std::decay_t<I> first, I last, Args&&... args);

    /// The for_loop_strided implements loop functionality over a range
    /// specified by integral or iterator bounds. For the iterator case, these
    /// algorithms resemble for_each from the Parallelism TS, but leave to the
    /// programmer when and if to dereference the iterator.
    ///
    /// The execution of for_loop without specifying an execution policy is
    /// equivalent to specifying \a hpx::execution::seq as the execution
    /// policy.
    ///
    /// \tparam I           The type of the iteration variable. This could be
    ///                     an (forward) iterator type or an integral type.
    /// \tparam S           The type of the stride variable. This should be
    ///                     an integral type.
    /// \tparam Args        A parameter pack, it's last element is a function
    ///                     object to be invoked for each iteration, the others
    ///                     have to be either conforming to the induction or
    ///                     reduction concept.
    ///
    /// \param first        Refers to the beginning of the sequence of elements
    ///                     the algorithm will be applied to.
    /// \param last         Refers to the end of the sequence of elements
    ///                     the algorithm will be applied to.
    /// \param stride       Refers to the stride of the iteration steps. This
    ///                     shall have non-zero value and shall be negative
    ///                     only if I has integral type or meets the requirements
    ///                     of a bidirectional iterator.
    /// \param args         The last element of this parameter pack is the
    ///                     function (object) to invoke, while the remaining
    ///                     elements of the parameter pack are instances of
    ///                     either induction or reduction objects.
    ///                     The function (or function object) which will be
    ///                     invoked for each of the elements in the sequence
    ///                     specified by [first, last) should expose a signature
    ///                     equivalent to:
    ///                     \code
    ///                     <ignored> pred(I const& a, ...);
    ///                     \endcode \n
    ///                     The signature does not need to have const&. It will
    ///                     receive the current value of the iteration variable
    ///                     and one argument for each of the induction or
    ///                     reduction objects passed to the algorithms,
    ///                     representing their current values.
    ///
    /// Requires: \a I shall be an integral type or meet the requirements
    ///           of an input iterator type. The \a args parameter pack shall
    ///           have at least one element, comprising objects returned by
    ///           invocations of \a reduction and/or \a induction function
    ///           templates followed by exactly one element invocable
    ///           element-access function, \a f. \a f shall meet the
    ///           requirements of MoveConstructible.
    ///
    /// Effects:  Applies \a f to each element in the input sequence, with
    ///           additional arguments corresponding to the reductions and
    ///           inductions in the \a args parameter pack. The length of the
    ///           input sequence is last - first.
    ///
    /// The first element in the input sequence is specified by \a first. Each
    /// subsequent element is generated by incrementing the previous element.
    ///
    /// \note As described in the C++ standard, arithmetic on non-random-access
    ///       iterators is performed using advance and distance.
    ///
    /// \note The order of the elements of the input sequence is important for
    ///       determining ordinal position of an application of \a f, even
    ///       though the applications themselves may be unordered.
    ///
    /// Along with an element from the input sequence, for each member of the
    /// \a args parameter pack excluding \a f, an additional argument is passed
    /// to each application of \a f as follows:
    ///
    /// If the pack member is an object returned by a call to a reduction
    /// function listed in section, then the
    /// additional argument is a reference to a view of that reduction object.
    /// If the pack member is an object returned by a call to induction, then
    /// the additional argument is the induction value for that induction object
    /// corresponding to the position of the application of \a f in the input
    /// sequence.
    ///
    /// Complexity: Applies \a f exactly once for each element of the input
    ///             sequence.
    ///
    /// Remarks: If \a f returns a result, the result is ignored.
    ///
    template <typename I, typename S, typename... Args>
    void for_loop_strided(
        std::decay_t<I> first, I last, S stride, Args&&... args);

    /// The for_loop_strided implements loop functionality over a range
    /// specified by integral or iterator bounds. For the iterator case, these
    /// algorithms resemble for_each from the Parallelism TS, but leave to the
    /// programmer when and if to dereference the iterator.
    ///
    /// \tparam ExPolicy    The type of the execution policy to use (deduced).
    ///                     It describes the manner in which the execution
    ///                     of the algorithm may be parallelized and the manner
    ///                     in which it applies user-provided function objects.
    /// \tparam I           The type of the iteration variable. This could be
    ///                     an (forward) iterator type or an integral type.
    /// \tparam S           The type of the stride variable. This should be
    ///                     an integral type.
    /// \tparam Args        A parameter pack, it's last element is a function
    ///                     object to be invoked for each iteration, the others
    ///                     have to be either conforming to the induction or
    ///                     reduction concept.
    ///
    /// \param policy       The execution policy to use for the scheduling of
    ///                     the iterations.
    /// \param first        Refers to the beginning of the sequence of elements
    ///                     the algorithm will be applied to.
    /// \param last         Refers to the end of the sequence of elements
    ///                     the algorithm will be applied to.
    /// \param stride       Refers to the stride of the iteration steps. This
    ///                     shall have non-zero value and shall be negative
    ///                     only if I has integral type or meets the requirements
    ///                     of a bidirectional iterator.
    /// \param args         The last element of this parameter pack is the
    ///                     function (object) to invoke, while the remaining
    ///                     elements of the parameter pack are instances of
    ///                     either induction or reduction objects.
    ///                     The function (or function object) which will be
    ///                     invoked for each of the elements in the sequence
    ///                     specified by [first, last) should expose a signature
    ///                     equivalent to:
    ///                     \code
    ///                     <ignored> pred(I const& a, ...);
    ///                     \endcode \n
    ///                     The signature does not need to have const&. It will
    ///                     receive the current value of the iteration variable
    ///                     and one argument for each of the induction or
    ///                     reduction objects passed to the algorithms,
    ///                     representing their current values.
    ///
    /// Requires: \a I shall be an integral type or meet the requirements
    ///           of an input iterator type. The \a args parameter pack shall
    ///           have at least one element, comprising objects returned by
    ///           invocations of \a reduction and/or \a induction function
    ///           templates followed by exactly one element invocable
    ///           element-access function, \a f. \a f shall meet the
    ///           requirements of MoveConstructible.
    ///
    /// Effects:  Applies \a f to each element in the input sequence, with
    ///           additional arguments corresponding to the reductions and
    ///           inductions in the \a args parameter pack. The length of the
    ///           input sequence is last - first.
    ///
    /// The first element in the input sequence is specified by \a first. Each
    /// subsequent element is generated by incrementing the previous element.
    ///
    /// \note As described in the C++ standard, arithmetic on non-random-access
    ///       iterators is performed using advance and distance.
    ///
    /// \note The order of the elements of the input sequence is important for
    ///       determining ordinal position of an application of \a f, even
    ///       though the applications themselves may be unordered.
    ///
    /// Along with an element from the input sequence, for each member of the
    /// \a args parameter pack excluding \a f, an additional argument is passed
    /// to each application of \a f as follows:
    ///
    /// If the pack member is an object returned by a call to a reduction
    /// function listed in section, then the
    /// additional argument is a reference to a view of that reduction object.
    /// If the pack member is an object returned by a call to induction, then
    /// the additional argument is the induction value for that induction object
    /// corresponding to the position of the application of \a f in the input
    /// sequence.
    ///
    /// Complexity: Applies \a f exactly once for each element of the input
    ///             sequence.
    ///
    /// Remarks: If \a f returns a result, the result is ignored.
    ///
    /// \returns  The \a for_loop_strided algorithm returns a
    ///           \a hpx::future<void> if the execution policy is of
    ///           type
    ///           \a hpx::execution::sequenced_task_policy or
    ///           \a hpx::execution::parallel_task_policy and returns \a void
    ///           otherwise.
    ///
    template <typename ExPolicy, typename I, typename S, typename... Args>
    typename util::detail::algorithm_result<ExPolicy>::type for_loop_strided(
        ExPolicy&& policy, std::decay_t<I> first, I last, S stride,
        Args&&... args);

    /// The for_loop_n implements loop functionality over a range specified by
    /// integral or iterator bounds. For the iterator case, these algorithms
    /// resemble for_each from the Parallelism TS, but leave to the programmer
    /// when and if to dereference the iterator.
    ///
    /// The execution of for_loop_n without specifying an execution policy is
    /// equivalent to specifying \a hpx::execution::seq as the execution
    /// policy.
    ///
    /// \tparam I           The type of the iteration variable. This could be
    ///                     an (forward) iterator type or an integral type.
    /// \tparam Size        The type of a non-negative integral value specifying
    ///                     the number of items to iterate over.
    /// \tparam Args        A parameter pack, it's last element is a function
    ///                     object to be invoked for each iteration, the others
    ///                     have to be either conforming to the induction or
    ///                     reduction concept.
    ///
    /// \param first        Refers to the beginning of the sequence of elements
    ///                     the algorithm will be applied to.
    /// \param size         Refers to the number of items the algorithm will be
    ///                     applied to.
    /// \param args         The last element of this parameter pack is the
    ///                     function (object) to invoke, while the remaining
    ///                     elements of the parameter pack are instances of
    ///                     either induction or reduction objects.
    ///                     The function (or function object) which will be
    ///                     invoked for each of the elements in the sequence
    ///                     specified by [first, last) should expose a signature
    ///                     equivalent to:
    ///                     \code
    ///                     <ignored> pred(I const& a, ...);
    ///                     \endcode \n
    ///                     The signature does not need to have const&. It will
    ///                     receive the current value of the iteration variable
    ///                     and one argument for each of the induction or
    ///                     reduction objects passed to the algorithms,
    ///                     representing their current values.
    ///
    /// Requires: \a I shall be an integral type or meet the requirements
    ///           of an input iterator type. The \a args parameter pack shall
    ///           have at least one element, comprising objects returned by
    ///           invocations of \a reduction and/or \a induction function
    ///           templates followed by exactly one element invocable
    ///           element-access function, \a f. \a f shall meet the
    ///           requirements of MoveConstructible.
    ///
    /// Effects:  Applies \a f to each element in the input sequence, with
    ///           additional arguments corresponding to the reductions and
    ///           inductions in the \a args parameter pack. The length of the
    ///           input sequence is last - first.
    ///
    /// The first element in the input sequence is specified by \a first. Each
    /// subsequent element is generated by incrementing the previous element.
    ///
    /// \note As described in the C++ standard, arithmetic on non-random-access
    ///       iterators is performed using advance and distance.
    ///
    /// \note The order of the elements of the input sequence is important for
    ///       determining ordinal position of an application of \a f, even
    ///       though the applications themselves may be unordered.
    ///
    /// Along with an element from the input sequence, for each member of the
    /// \a args parameter pack excluding \a f, an additional argument is passed
    /// to each application of \a f as follows:
    ///
    /// If the pack member is an object returned by a call to a reduction
    /// function listed in section, then the
    /// additional argument is a reference to a view of that reduction object.
    /// If the pack member is an object returned by a call to induction, then
    /// the additional argument is the induction value for that induction object
    /// corresponding to the position of the application of \a f in the input
    /// sequence.
    ///
    /// Complexity: Applies \a f exactly once for each element of the input
    ///             sequence.
    ///
    /// Remarks: If \a f returns a result, the result is ignored.
    ///
    template <typename I, typename Size, typename... Args>
    void for_loop_n(I first, Size size, Args&&... args);

    /// The for_loop_n implements loop functionality over a range specified by
    /// integral or iterator bounds. For the iterator case, these algorithms
    /// resemble for_each from the Parallelism TS, but leave to the programmer
    /// when and if to dereference the iterator.
    ///
    /// \tparam ExPolicy    The type of the execution policy to use (deduced).
    ///                     It describes the manner in which the execution
    ///                     of the algorithm may be parallelized and the manner
    ///                     in which it applies user-provided function objects.
    /// \tparam I           The type of the iteration variable. This could be
    ///                     an (forward) iterator type or an integral type.
    /// \tparam Size        The type of a non-negative integral value specifying
    ///                     the number of items to iterate over.
    /// \tparam Args        A parameter pack, it's last element is a function
    ///                     object to be invoked for each iteration, the others
    ///                     have to be either conforming to the induction or
    ///                     reduction concept.
    ///
    /// \param policy       The execution policy to use for the scheduling of
    ///                     the iterations.
    /// \param first        Refers to the beginning of the sequence of elements
    ///                     the algorithm will be applied to.
    /// \param size         Refers to the number of items the algorithm will be
    ///                     applied to.
    /// \param args         The last element of this parameter pack is the
    ///                     function (object) to invoke, while the remaining
    ///                     elements of the parameter pack are instances of
    ///                     either induction or reduction objects.
    ///                     The function (or function object) which will be
    ///                     invoked for each of the elements in the sequence
    ///                     specified by [first, last) should expose a signature
    ///                     equivalent to:
    ///                     \code
    ///                     <ignored> pred(I const& a, ...);
    ///                     \endcode \n
    ///                     The signature does not need to have const&. It will
    ///                     receive the current value of the iteration variable
    ///                     and one argument for each of the induction or
    ///                     reduction objects passed to the algorithms,
    ///                     representing their current values.
    ///
    /// Requires: \a I shall be an integral type or meet the requirements
    ///           of an input iterator type. The \a args parameter pack shall
    ///           have at least one element, comprising objects returned by
    ///           invocations of \a reduction and/or \a induction function
    ///           templates followed by exactly one element invocable
    ///           element-access function, \a f. \a f shall meet the
    ///           requirements of MoveConstructible.
    ///
    /// Effects:  Applies \a f to each element in the input sequence, with
    ///           additional arguments corresponding to the reductions and
    ///           inductions in the \a args parameter pack. The length of the
    ///           input sequence is last - first.
    ///
    /// The first element in the input sequence is specified by \a first. Each
    /// subsequent element is generated by incrementing the previous element.
    ///
    /// \note As described in the C++ standard, arithmetic on non-random-access
    ///       iterators is performed using advance and distance.
    ///
    /// \note The order of the elements of the input sequence is important for
    ///       determining ordinal position of an application of \a f, even
    ///       though the applications themselves may be unordered.
    ///
    /// Along with an element from the input sequence, for each member of the
    /// \a args parameter pack excluding \a f, an additional argument is passed
    /// to each application of \a f as follows:
    ///
    /// If the pack member is an object returned by a call to a reduction
    /// function listed in section, then the
    /// additional argument is a reference to a view of that reduction object.
    /// If the pack member is an object returned by a call to induction, then
    /// the additional argument is the induction value for that induction object
    /// corresponding to the position of the application of \a f in the input
    /// sequence.
    ///
    /// Complexity: Applies \a f exactly once for each element of the input
    ///             sequence.
    ///
    /// Remarks: If \a f returns a result, the result is ignored.
    ///
    /// \returns  The \a for_loop_n algorithm returns a
    ///           \a hpx::future<void> if the execution policy is of
    ///           type
    ///           \a hpx::execution::sequenced_task_policy or
    ///           \a hpx::execution::parallel_task_policy and returns \a void
    ///           otherwise.
    ///
    template <typename ExPolicy, typename I, typename Size, typename... Args>
    typename util::detail::algorithm_result<ExPolicy>::type for_loop_n(
        ExPolicy&& policy, I first, Size size, Args&&... args);

    /// The for_loop_n_strided implements loop functionality over a range
    /// specified by integral or iterator bounds. For the iterator case, these
    /// algorithms resemble for_each from the Parallelism TS, but leave to the
    /// programmer when and if to dereference the iterator.
    ///
    /// The execution of for_loop without specifying an execution policy is
    /// equivalent to specifying \a hpx::execution::seq as the execution
    /// policy.
    ///
    /// \tparam I           The type of the iteration variable. This could be
    ///                     an (forward) iterator type or an integral type.
    /// \tparam Size        The type of a non-negative integral value specifying
    ///                     the number of items to iterate over.
    /// \tparam S           The type of the stride variable. This should be
    ///                     an integral type.
    /// \tparam Args        A parameter pack, it's last element is a function
    ///                     object to be invoked for each iteration, the others
    ///                     have to be either conforming to the induction or
    ///                     reduction concept.
    ///
    /// \param first        Refers to the beginning of the sequence of elements
    ///                     the algorithm will be applied to.
    /// \param size         Refers to the number of items the algorithm will be
    ///                     applied to.
    /// \param stride       Refers to the stride of the iteration steps. This
    ///                     shall have non-zero value and shall be negative
    ///                     only if I has integral type or meets the requirements
    ///                     of a bidirectional iterator.
    /// \param args         The last element of this parameter pack is the
    ///                     function (object) to invoke, while the remaining
    ///                     elements of the parameter pack are instances of
    ///                     either induction or reduction objects.
    ///                     The function (or function object) which will be
    ///                     invoked for each of the elements in the sequence
    ///                     specified by [first, last) should expose a signature
    ///                     equivalent to:
    ///                     \code
    ///                     <ignored> pred(I const& a, ...);
    ///                     \endcode \n
    ///                     The signature does not need to have const&. It will
    ///                     receive the current value of the iteration variable
    ///                     and one argument for each of the induction or
    ///                     reduction objects passed to the algorithms,
    ///                     representing their current values.
    ///
    /// Requires: \a I shall be an integral type or meet the requirements
    ///           of an input iterator type. The \a args parameter pack shall
    ///           have at least one element, comprising objects returned by
    ///           invocations of \a reduction and/or \a induction function
    ///           templates followed by exactly one element invocable
    ///           element-access function, \a f. \a f shall meet the
    ///           requirements of MoveConstructible.
    ///
    /// Effects:  Applies \a f to each element in the input sequence, with
    ///           additional arguments corresponding to the reductions and
    ///           inductions in the \a args parameter pack. The length of the
    ///           input sequence is last - first.
    ///
    /// The first element in the input sequence is specified by \a first. Each
    /// subsequent element is generated by incrementing the previous element.
    ///
    /// \note As described in the C++ standard, arithmetic on non-random-access
    ///       iterators is performed using advance and distance.
    ///
    /// \note The order of the elements of the input sequence is important for
    ///       determining ordinal position of an application of \a f, even
    ///       though the applications themselves may be unordered.
    ///
    /// Along with an element from the input sequence, for each member of the
    /// \a args parameter pack excluding \a f, an additional argument is passed
    /// to each application of \a f as follows:
    ///
    /// If the pack member is an object returned by a call to a reduction
    /// function listed in section, then the
    /// additional argument is a reference to a view of that reduction object.
    /// If the pack member is an object returned by a call to induction, then
    /// the additional argument is the induction value for that induction object
    /// corresponding to the position of the application of \a f in the input
    /// sequence.
    ///
    /// Complexity: Applies \a f exactly once for each element of the input
    ///             sequence.
    ///
    /// Remarks: If \a f returns a result, the result is ignored.
    ///
    template <typename I, typename Size, typename S, typename... Args>
    void for_loop_n_strided(I first, Size size, S stride, Args&&... args);

    /// The for_loop_n_strided implements loop functionality over a range
    /// specified by integral or iterator bounds. For the iterator case, these
    /// algorithms resemble for_each from the Parallelism TS, but leave to the
    /// programmer when and if to dereference the iterator.
    ///
    /// \tparam ExPolicy    The type of the execution policy to use (deduced).
    ///                     It describes the manner in which the execution
    ///                     of the algorithm may be parallelized and the manner
    ///                     in which it applies user-provided function objects.
    /// \tparam I           The type of the iteration variable. This could be
    ///                     an (forward) iterator type or an integral type.
    /// \tparam Size        The type of a non-negative integral value specifying
    ///                     the number of items to iterate over.
    /// \tparam S           The type of the stride variable. This should be
    ///                     an integral type.
    /// \tparam Args        A parameter pack, it's last element is a function
    ///                     object to be invoked for each iteration, the others
    ///                     have to be either conforming to the induction or
    ///                     reduction concept.
    ///
    /// \param policy       The execution policy to use for the scheduling of
    ///                     the iterations.
    /// \param first        Refers to the beginning of the sequence of elements
    ///                     the algorithm will be applied to.
    /// \param size         Refers to the number of items the algorithm will be
    ///                     applied to.
    /// \param stride       Refers to the stride of the iteration steps. This
    ///                     shall have non-zero value and shall be negative
    ///                     only if I has integral type or meets the requirements
    ///                     of a bidirectional iterator.
    /// \param args         The last element of this parameter pack is the
    ///                     function (object) to invoke, while the remaining
    ///                     elements of the parameter pack are instances of
    ///                     either induction or reduction objects.
    ///                     The function (or function object) which will be
    ///                     invoked for each of the elements in the sequence
    ///                     specified by [first, last) should expose a signature
    ///                     equivalent to:
    ///                     \code
    ///                     <ignored> pred(I const& a, ...);
    ///                     \endcode \n
    ///                     The signature does not need to have const&. It will
    ///                     receive the current value of the iteration variable
    ///                     and one argument for each of the induction or
    ///                     reduction objects passed to the algorithms,
    ///                     representing their current values.
    ///
    /// Requires: \a I shall be an integral type or meet the requirements
    ///           of an input iterator type. The \a args parameter pack shall
    ///           have at least one element, comprising objects returned by
    ///           invocations of \a reduction and/or \a induction function
    ///           templates followed by exactly one element invocable
    ///           element-access function, \a f. \a f shall meet the
    ///           requirements of MoveConstructible.
    ///
    /// Effects:  Applies \a f to each element in the input sequence, with
    ///           additional arguments corresponding to the reductions and
    ///           inductions in the \a args parameter pack. The length of the
    ///           input sequence is last - first.
    ///
    /// The first element in the input sequence is specified by \a first. Each
    /// subsequent element is generated by incrementing the previous element.
    ///
    /// \note As described in the C++ standard, arithmetic on non-random-access
    ///       iterators is performed using advance and distance.
    ///
    /// \note The order of the elements of the input sequence is important for
    ///       determining ordinal position of an application of \a f, even
    ///       though the applications themselves may be unordered.
    ///
    /// Along with an element from the input sequence, for each member of the
    /// \a args parameter pack excluding \a f, an additional argument is passed
    /// to each application of \a f as follows:
    ///
    /// If the pack member is an object returned by a call to a reduction
    /// function listed in section, then the
    /// additional argument is a reference to a view of that reduction object.
    /// If the pack member is an object returned by a call to induction, then
    /// the additional argument is the induction value for that induction object
    /// corresponding to the position of the application of \a f in the input
    /// sequence.
    ///
    /// Complexity: Applies \a f exactly once for each element of the input
    ///             sequence.
    ///
    /// Remarks: If \a f returns a result, the result is ignored.
    ///
    /// \returns  The \a for_loop_n_strided algorithm returns a
    ///           \a hpx::future<void> if the execution policy is of
    ///           type
    ///           \a hpx::execution::sequenced_task_policy or
    ///           \a hpx::execution::parallel_task_policy and returns \a void
    ///           otherwise.
    ///
    template <typename ExPolicy, typename I, typename Size, typename S,
        typename... Args>
    typename util::detail::algorithm_result<ExPolicy>::type for_loop_n_strided(
        ExPolicy&& policy, I first, Size size, S stride, Args&&... args);
}    // namespace hpx

#else

#include <hpx/config.hpp>
#include <hpx/assert.hpp>
#include <hpx/concepts/concepts.hpp>
#include <hpx/datastructures/tuple.hpp>
#include <hpx/execution/algorithms/detail/predicates.hpp>
#include <hpx/functional/detail/invoke.hpp>
#include <hpx/functional/tag_fallback_dispatch.hpp>
#include <hpx/iterator_support/traits/is_iterator.hpp>
#include <hpx/modules/executors.hpp>
#include <hpx/modules/threading_base.hpp>
#include <hpx/type_support/pack.hpp>
#include <hpx/type_support/unused.hpp>

#include <hpx/parallel/algorithms/detail/dispatch.hpp>
#include <hpx/parallel/algorithms/for_loop_induction.hpp>
#include <hpx/parallel/algorithms/for_loop_reduction.hpp>
#include <hpx/parallel/util/detail/algorithm_result.hpp>
#include <hpx/parallel/util/loop.hpp>
#include <hpx/parallel/util/partitioner.hpp>

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <type_traits>
#include <utility>
#include <vector>

namespace hpx {
    namespace parallel { inline namespace v2 {
        // for_loop
        namespace detail {
            /// \cond NOINTERNAL

            ///////////////////////////////////////////////////////////////////////
            template <typename... Ts, std::size_t... Is>
            HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void init_iteration(
                hpx::tuple<Ts...>& args, hpx::util::index_pack<Is...>,
                std::size_t part_index)
            {
                int const _sequencer[] = {
                    0, (hpx::get<Is>(args).init_iteration(part_index), 0)...};
                (void) _sequencer;
            }

            template <typename... Ts, std::size_t... Is, typename F, typename B>
            HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void invoke_iteration(
                hpx::tuple<Ts...>& args, hpx::util::index_pack<Is...>, F&& f,
                B part_begin)
            {
                HPX_INVOKE(std::forward<F>(f), part_begin,
                    hpx::get<Is>(args).iteration_value()...);
            }

            template <typename... Ts, std::size_t... Is>
            HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void next_iteration(
                hpx::tuple<Ts...>& args, hpx::util::index_pack<Is...>)
            {
                int const _sequencer[] = {
                    0, (hpx::get<Is>(args).next_iteration(), 0)...};
                (void) _sequencer;
            }

            template <typename... Ts, std::size_t... Is>
            HPX_HOST_DEVICE HPX_FORCEINLINE constexpr void exit_iteration(
                hpx::tuple<Ts...>& args, hpx::util::index_pack<Is...>,
                std::size_t size)
            {
                int const _sequencer[] = {
                    0, (hpx::get<Is>(args).exit_iteration(size), 0)...};
                (void) _sequencer;
            }

            ///////////////////////////////////////////////////////////////////////
            template <typename ExPolicy, typename F, typename S, typename Tuple>
            struct part_iterations;

            template <typename ExPolicy, typename F, typename S, typename... Ts>
            struct part_iterations<ExPolicy, F, S, hpx::tuple<Ts...>>
            {
                using fun_type = std::decay_t<F>;

                fun_type f_;
                S stride_;
                hpx::tuple<Ts...> args_;

                template <typename F_, typename S_, typename Args>
                part_iterations(F_&& f, S_&& stride, Args&& args)
                  : f_(std::forward<F_>(f))
                  , stride_(std::forward<S_>(stride))
                  , args_(std::forward<Args>(args))
                {
                }

                template <typename B>
                HPX_HOST_DEVICE constexpr void execute(B part_begin,
                    std::size_t part_steps, std::size_t part_index)
                {
                    auto pack = typename hpx::util::make_index_pack<sizeof...(
                        Ts)>::type();
                    detail::init_iteration(args_, pack, part_index);

                    if (stride_ == 1)
                    {
                        while (part_steps-- != 0)
                        {
                            detail::invoke_iteration(
                                args_, pack, f_, part_begin++);
                            detail::next_iteration(args_, pack);
                        }
                    }
                    else if (stride_ > 0)
                    {
                        while (part_steps >= std::size_t(stride_))
                        {
                            detail::invoke_iteration(
                                args_, pack, f_, part_begin);

                            part_begin =
                                parallel::v1::detail::next(part_begin, stride_);
                            part_steps -= stride_;

                            detail::next_iteration(args_, pack);
                        }

                        if (part_steps != 0)
                        {
                            detail::invoke_iteration(
                                args_, pack, f_, part_begin);
                            detail::next_iteration(args_, pack);
                        }
                    }
                    else
                    {
                        while (part_steps >= std::size_t(-stride_))
                        {
                            detail::invoke_iteration(
                                args_, pack, f_, part_begin);

                            part_begin =
                                parallel::v1::detail::next(part_begin, stride_);
                            part_steps += stride_;

                            detail::next_iteration(args_, pack);
                        }

                        if (part_steps != 0)
                        {
                            detail::invoke_iteration(
                                args_, pack, f_, part_begin);
                            detail::next_iteration(args_, pack);
                        }
                    }
                }

                template <typename B>
                HPX_HOST_DEVICE HPX_FORCEINLINE void operator()(B part_begin,
                    std::size_t part_steps, std::size_t part_index)
                {
                    hpx::util::annotate_function annotate(f_);
                    execute(part_begin, part_steps, part_index);
                }
            };

            template <typename ExPolicy, typename F, typename S>
            struct part_iterations<ExPolicy, F, S, hpx::tuple<>>
            {
                using fun_type = std::decay_t<F>;

                fun_type f_;
                S stride_;

                template <typename F_, typename S_, typename Args>
                part_iterations(F_&& f, S_&& stride, Args&&)
                  : f_(std::forward<F_>(f))
                  , stride_(std::forward<S_>(stride))
                {
                }

                template <typename B>
                HPX_HOST_DEVICE constexpr void execute(
                    B part_begin, std::size_t part_steps)
                {
                    if (stride_ == 1)
                    {
                        parallel::util::loop_n<std::decay_t<ExPolicy>>(
                            part_begin, part_steps, f_);
                    }
                    else if (stride_ > 0)
                    {
                        while (part_steps >= std::size_t(stride_))
                        {
                            HPX_INVOKE(f_, part_begin);

                            part_begin =
                                parallel::v1::detail::next(part_begin, stride_);
                            part_steps -= stride_;
                        }

                        if (part_steps != 0)
                        {
                            HPX_INVOKE(f_, part_begin);
                        }
                    }
                    else
                    {
                        while (part_steps >= std::size_t(-stride_))
                        {
                            HPX_INVOKE(f_, part_begin);

                            part_begin =
                                parallel::v1::detail::next(part_begin, stride_);
                            part_steps += stride_;
                        }

                        if (part_steps != 0)
                        {
                            HPX_INVOKE(f_, part_begin);
                        }
                    }
                }

                template <typename B>
                HPX_HOST_DEVICE HPX_FORCEINLINE void operator()(
                    B part_begin, std::size_t part_steps, std::size_t)
                {
                    hpx::util::annotate_function annotate(f_);
                    execute(part_begin, part_steps);
                }
            };

            ///////////////////////////////////////////////////////////////////////
            struct for_loop_algo : public v1::detail::algorithm<for_loop_algo>
            {
                for_loop_algo()
                  : for_loop_algo::algorithm("for_loop_algo")
                {
                }

                template <typename ExPolicy, typename InIter, typename S,
                    typename F>
                HPX_HOST_DEVICE static hpx::util::unused_type sequential(
                    ExPolicy&&, InIter first, std::size_t count, S stride,
                    F&& f)
                {
                    if (stride == 1)
                    {
                        parallel::util::loop_n<std::decay_t<ExPolicy>>(
                            first, count, std::forward<F>(f));
                    }
                    else if (stride > 0)
                    {
                        while (count >= std::size_t(stride))
                        {
                            HPX_INVOKE(f, first);

                            first = parallel::v1::detail::next(first, stride);
                            count -= stride;
                        }

                        if (count != 0)
                        {
                            HPX_INVOKE(f, first);
                        }
                    }
                    else
                    {
                        while (count >= std::size_t(-stride))
                        {
                            HPX_INVOKE(f, first);

                            first = parallel::v1::detail::next(first, stride);
                            count += stride;
                        }

                        if (count != 0)
                        {
                            HPX_INVOKE(f, first);
                        }
                    }

                    return hpx::util::unused_type();
                }

                template <typename ExPolicy, typename InIter, typename Size,
                    typename S, typename F, typename Arg, typename... Args>
                HPX_HOST_DEVICE static hpx::util::unused_type sequential(
                    ExPolicy&&, InIter first, Size size, S stride, F&& f,
                    Arg&& arg, Args&&... args)
                {
                    int const init_sequencer[] = {(arg.init_iteration(0), 0),
                        (args.init_iteration(0), 0)...};
                    (void) init_sequencer;

                    std::size_t count = size;
                    if (stride > 0)
                    {
                        while (count >= std::size_t(stride))
                        {
                            HPX_INVOKE(f, first, arg.iteration_value(),
                                args.iteration_value()...);

                            first = parallel::v1::detail::next(first, stride);
                            count -= stride;

                            int const next_sequencer[] = {
                                (arg.next_iteration(), 0),
                                (args.next_iteration(), 0)...};
                            (void) next_sequencer;
                        }
                    }
                    else
                    {
                        while (count >= std::size_t(-stride))
                        {
                            HPX_INVOKE(f, first, arg.iteration_value(),
                                args.iteration_value()...);

                            first = parallel::v1::detail::next(first, stride);
                            count += stride;

                            int const next_sequencer[] = {
                                (arg.next_iteration(), 0),
                                (args.next_iteration(), 0)...};
                            (void) next_sequencer;
                        }
                    }

                    if (count != 0)
                    {
                        HPX_INVOKE(f, first, arg.iteration_value(),
                            args.iteration_value()...);
                    }

                    // make sure live-out variables are properly set on return
                    int const exit_sequencer[] = {(arg.exit_iteration(size), 0),
                        (args.exit_iteration(size), 0)...};
                    (void) exit_sequencer;

                    return hpx::util::unused_type();
                }

                template <typename ExPolicy, typename B, typename Size,
                    typename S, typename F, typename... Ts>
                static typename util::detail::algorithm_result<ExPolicy>::type
                parallel(ExPolicy&& policy, B first, Size size, S stride, F&& f,
                    Ts&&... ts)
                {
                    if (size == 0)
                        return util::detail::algorithm_result<ExPolicy>::get();

                    // we need to decay copy here to properly transport everything
                    // to a GPU device
                    using args_type = hpx::tuple<std::decay_t<Ts>...>;

                    args_type args =
                        hpx::forward_as_tuple(std::forward<Ts>(ts)...);

                    return util::partitioner<ExPolicy>::call_with_index(policy,
                        first, size, stride,
                        part_iterations<ExPolicy, F, S, args_type>{
                            std::forward<F>(f), stride, args},
                        [=](std::vector<hpx::future<void>>&&) mutable -> void {
                            auto pack =
                                typename hpx::util::make_index_pack<sizeof...(
                                    Ts)>::type();
                            // make sure live-out variables are properly set on
                            // return
                            detail::exit_iteration(args, pack, size);
                        });
                }
            };

            // reshuffle arguments, last argument is function object, will go first
            template <typename ExPolicy, typename B, typename E, typename S,
                std::size_t... Is, typename... Args>
            typename util::detail::algorithm_result<ExPolicy>::type for_loop(
                ExPolicy&& policy, B first, E last, S stride,
                hpx::util::index_pack<Is...>, Args&&... args)
            {
                // stride shall not be zero
                HPX_ASSERT(stride != 0);

                // stride should be negative only if E is an integral type or at
                // least a bidirectional iterator
                if (stride < 0)
                {
                    HPX_ASSERT(std::is_integral<E>::value ||
                        hpx::traits::is_bidirectional_iterator<E>::value);
                }

                // the for_loop should be executed sequentially if the
                // execution policy enforces sequential execution or if the
                // loop boundaries are integral types
                static_assert((std::is_integral<B>::value ||
                                  hpx::traits::is_forward_iterator<B>::value),
                    "Requires at least forward iterator or integral loop "
                    "boundaries.");

                std::size_t size = parallel::v1::detail::distance(first, last);
                auto&& t = hpx::forward_as_tuple(std::forward<Args>(args)...);

                return for_loop_algo().call(std::forward<ExPolicy>(policy),
                    first, size, stride, hpx::get<sizeof...(Args) - 1>(t),
                    hpx::get<Is>(t)...);
            }

            // reshuffle arguments, last argument is function object, will go first
            template <typename ExPolicy, typename B, typename Size, typename S,
                std::size_t... Is, typename... Args>
            typename util::detail::algorithm_result<ExPolicy>::type for_loop_n(
                ExPolicy&& policy, B first, Size size, S stride,
                hpx::util::index_pack<Is...>, Args&&... args)
            {
                // stride shall not be zero
                HPX_ASSERT(stride != 0);

                // stride should be negative only if E is an integral type or at
                // least a bidirectional iterator
                if (stride < 0)
                {
                    HPX_ASSERT(std::is_integral<B>::value ||
                        hpx::traits::is_bidirectional_iterator<B>::value);
                }

                static_assert((std::is_integral<B>::value ||
                                  hpx::traits::is_forward_iterator<B>::value),
                    "Requires at least forward iterator or integral loop "
                    "boundaries.");

                auto&& t = hpx::forward_as_tuple(std::forward<Args>(args)...);

                return for_loop_algo().call(std::forward<ExPolicy>(policy),
                    first, size, stride, hpx::get<sizeof...(Args) - 1>(t),
                    hpx::get<Is>(t)...);
            }
            /// \endcond
        }    // namespace detail

        template <typename ExPolicy, typename I, typename... Args,
            HPX_CONCEPT_REQUIRES_(hpx::is_execution_policy<ExPolicy>::value &&
                (hpx::traits::is_iterator<I>::value ||
                    std::is_integral<I>::value))>
        HPX_DEPRECATED_V(1, 6,
            "hpx::parallel::for_loop is deprecated, use hpx::for_loop instead")
        typename util::detail::algorithm_result<ExPolicy>::type for_loop(
            ExPolicy&& policy, std::decay_t<I> first, I last, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop must be called with at least a function object");

            using hpx::util::make_index_pack;
            return detail::for_loop(std::forward<ExPolicy>(policy), first, last,
                1, typename make_index_pack<sizeof...(Args) - 1>::type(),
                std::forward<Args>(args)...);
        }

        template <typename I, typename... Args,
            HPX_CONCEPT_REQUIRES_(hpx::traits::is_iterator<I>::value ||
                std::is_integral<I>::value)>
        HPX_DEPRECATED_V(1, 6,
            "hpx::parallel::for_loop is deprecated, use hpx::for_loop instead")
        void for_loop(std::decay_t<I> first, I last, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop must be called with at least a function object");

            return for_loop(
                hpx::execution::seq, first, last, std::forward<Args>(args)...);
        }

        template <typename ExPolicy, typename I, typename S, typename... Args,
            HPX_CONCEPT_REQUIRES_(hpx::is_execution_policy<ExPolicy>::value&&
                                      std::is_integral<S>::value &&
                (hpx::traits::is_iterator<I>::value ||
                    std::is_integral<I>::value))>
        HPX_DEPRECATED_V(1, 6,
            "hpx::parallel::for_loop is deprecated, use hpx::for_loop instead")
        typename util::detail::algorithm_result<ExPolicy>::type
            for_loop_strided(ExPolicy&& policy, std::decay_t<I> first, I last,
                S stride, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop_strided must be called with at least a function "
                "object");

            using hpx::util::make_index_pack;
            return detail::for_loop(std::forward<ExPolicy>(policy), first, last,
                stride, typename make_index_pack<sizeof...(Args) - 1>::type(),
                std::forward<Args>(args)...);
        }

        template <typename I, typename S, typename... Args,
            HPX_CONCEPT_REQUIRES_(std::is_integral<S>::value &&
                (hpx::traits::is_iterator<I>::value ||
                    std::is_integral<I>::value))>
        HPX_DEPRECATED_V(1, 6,
            "hpx::parallel::for_loop is deprecated, use hpx::for_loop instead")
        void for_loop_strided(
            std::decay_t<I> first, I last, S stride, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop_strided must be called with at least a function "
                "object");

            return for_loop_strided(hpx::execution::seq, first, last, stride,
                std::forward<Args>(args)...);
        }

        template <typename ExPolicy, typename I, typename Size,
            typename... Args,
            HPX_CONCEPT_REQUIRES_(hpx::is_execution_policy<ExPolicy>::value&&
                                      std::is_integral<Size>::value &&
                (hpx::traits::is_iterator<I>::value ||
                    std::is_integral<I>::value))>
        HPX_DEPRECATED_V(1, 6,
            "hpx::parallel::for_loop is deprecated, use hpx::for_loop instead")
        typename util::detail::algorithm_result<ExPolicy>::type
            for_loop_n(ExPolicy&& policy, I first, Size size, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop_n must be called with at least a function object");

            using hpx::util::make_index_pack;
            return detail::for_loop_n(std::forward<ExPolicy>(policy), first,
                size, 1, typename make_index_pack<sizeof...(Args) - 1>::type(),
                std::forward<Args>(args)...);
        }

        template <typename I, typename Size, typename... Args,
            HPX_CONCEPT_REQUIRES_(std::is_integral<Size>::value &&
                (hpx::traits::is_iterator<I>::value ||
                    std::is_integral<I>::value))>
        HPX_DEPRECATED_V(1, 6,
            "hpx::parallel::for_loop is deprecated, use hpx::for_loop instead")
        void for_loop_n(I first, Size size, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop_n must be called with at least a function object");

            return for_loop_n(
                hpx::execution::seq, first, size, std::forward<Args>(args)...);
        }

        template <typename ExPolicy, typename I, typename Size, typename S,
            typename... Args,
            HPX_CONCEPT_REQUIRES_(
                hpx::is_execution_policy<ExPolicy>::value&& std::is_integral<
                    Size>::value&& std::is_integral<S>::value &&
                (hpx::traits::is_iterator<I>::value ||
                    std::is_integral<I>::value))>
        HPX_DEPRECATED_V(1, 6,
            "hpx::parallel::for_loop is deprecated, use hpx::for_loop instead")
        typename util::detail::algorithm_result<ExPolicy>::type
            for_loop_n_strided(
                ExPolicy&& policy, I first, Size size, S stride, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop_n_strided must be called with at least a function "
                "object");

            using hpx::util::make_index_pack;
            return detail::for_loop_n(std::forward<ExPolicy>(policy), first,
                size, stride,
                typename make_index_pack<sizeof...(Args) - 1>::type(),
                std::forward<Args>(args)...);
        }

        template <typename I, typename Size, typename S, typename... Args,
            HPX_CONCEPT_REQUIRES_(
                std::is_integral<Size>::value&& std::is_integral<S>::value &&
                (hpx::traits::is_iterator<I>::value ||
                    std::is_integral<I>::value))>
        HPX_DEPRECATED_V(1, 6,
            "hpx::parallel::for_loop is deprecated, use hpx::for_loop instead")
        void for_loop_n_strided(I first, Size size, S stride, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop_n_strided must be called with at least a function "
                "object");
            return for_loop_strided_n(hpx::execution::seq, first, size, stride,
                std::forward<Args>(args)...);
        }
    }}    // namespace parallel::v2

    ///////////////////////////////////////////////////////////////////////////
    HPX_INLINE_CONSTEXPR_VARIABLE struct for_loop_t final
      : hpx::functional::tag_fallback<for_loop_t>
    {
    private:
        // clang-format off
        template <typename ExPolicy, typename I, typename... Args,
            HPX_CONCEPT_REQUIRES_(
                hpx::is_execution_policy<ExPolicy>::value &&
                (hpx::traits::is_iterator<I>::value ||
                 std::is_integral<I>::value)
            )>
        // clang-format on
        friend typename parallel::util::detail::algorithm_result<ExPolicy>::type

        tag_fallback_dispatch(hpx::for_loop_t, ExPolicy&& policy,
            std::decay_t<I> first, I last, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop must be called with at least a function object");

            using hpx::util::make_index_pack;
            return parallel::v2::detail::for_loop(
                std::forward<ExPolicy>(policy), first, last, 1,
                typename make_index_pack<sizeof...(Args) - 1>::type(),
                std::forward<Args>(args)...);
        }

        // clang-format off
        template <typename I, typename... Args,
            HPX_CONCEPT_REQUIRES_(
                hpx::traits::is_iterator<I>::value ||
                std::is_integral<I>::value
            )>
        // clang-format on

        friend void tag_fallback_dispatch(
            hpx::for_loop_t, std::decay_t<I> first, I last, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop must be called with at least a function object");

            using hpx::util::make_index_pack;
            return parallel::v2::detail::for_loop(hpx::execution::seq, first,
                last, 1, typename make_index_pack<sizeof...(Args) - 1>::type(),
                std::forward<Args>(args)...);
        }
    } for_loop{};

    ///////////////////////////////////////////////////////////////////////////
    HPX_INLINE_CONSTEXPR_VARIABLE struct for_loop_strided_t final
      : hpx::functional::tag_fallback<for_loop_strided_t>
    {
    private:
        // clang-format off
        template <typename ExPolicy, typename I, typename S, typename... Args,
            HPX_CONCEPT_REQUIRES_(
                hpx::is_execution_policy<ExPolicy>::value &&
                std::is_integral<S>::value &&
                (hpx::traits::is_iterator<I>::value ||
                 std::is_integral<I>::value)
            )>
        // clang-format on
        friend typename parallel::util::detail::algorithm_result<ExPolicy>::type

        tag_fallback_dispatch(hpx::for_loop_strided_t, ExPolicy&& policy,
            std::decay_t<I> first, I last, S stride, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop_strided must be called with at least a function "
                "object");

            using hpx::util::make_index_pack;
            return parallel::v2::detail::for_loop(
                std::forward<ExPolicy>(policy), first, last, stride,
                typename make_index_pack<sizeof...(Args) - 1>::type(),
                std::forward<Args>(args)...);
        }

        // clang-format off
        template <typename I, typename S, typename... Args,
            HPX_CONCEPT_REQUIRES_(
                std::is_integral<S>::value &&
                (hpx::traits::is_iterator<I>::value ||
                 std::is_integral<I>::value)
            )>
        // clang-format on

        friend void tag_fallback_dispatch(hpx::for_loop_strided_t,
            std::decay_t<I> first, I last, S stride, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop_strided must be called with at least a function "
                "object");

            using hpx::util::make_index_pack;
            return parallel::v2::detail::for_loop(hpx::execution::seq, first,
                last, stride,
                typename make_index_pack<sizeof...(Args) - 1>::type(),
                std::forward<Args>(args)...);
        }
    } for_loop_strided{};

    ///////////////////////////////////////////////////////////////////////////
    HPX_INLINE_CONSTEXPR_VARIABLE struct for_loop_n_t final
      : hpx::functional::tag_fallback<for_loop_n_t>
    {
    private:
        // clang-format off
        template <typename ExPolicy, typename I, typename Size,
            typename... Args,
            HPX_CONCEPT_REQUIRES_(
                hpx::is_execution_policy<ExPolicy>::value &&
                std::is_integral<Size>::value &&
                (hpx::traits::is_iterator<I>::value ||
                 std::is_integral<I>::value)
            )>
        // clang-format on
        friend typename parallel::util::detail::algorithm_result<ExPolicy>::type
        tag_fallback_dispatch(hpx::for_loop_n_t, ExPolicy&& policy, I first,
            Size size, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop_n must be called with at least a function object");

            using hpx::util::make_index_pack;
            return parallel::v2::detail::for_loop_n(
                std::forward<ExPolicy>(policy), first, size, 1,
                typename make_index_pack<sizeof...(Args) - 1>::type(),
                std::forward<Args>(args)...);
        }

        // clang-format off
        template <typename I, typename Size, typename... Args,
            HPX_CONCEPT_REQUIRES_(
                std::is_integral<Size>::value &&
                (hpx::traits::is_iterator<I>::value ||
                 std::is_integral<I>::value)
            )>
        // clang-format on
        friend void tag_fallback_dispatch(
            hpx::for_loop_n_t, I first, Size size, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop_n must be called with at least a function object");

            using hpx::util::make_index_pack;
            return parallel::v2::detail::for_loop_n(hpx::execution::seq, first,
                size, 1, typename make_index_pack<sizeof...(Args) - 1>::type(),
                std::forward<Args>(args)...);
        }
    } for_loop_n{};

    ///////////////////////////////////////////////////////////////////////////
    HPX_INLINE_CONSTEXPR_VARIABLE struct for_loop_n_strided_t final
      : hpx::functional::tag_fallback<for_loop_n_strided_t>
    {
    private:
        // clang-format off
        template <typename ExPolicy, typename I, typename Size, typename S,
            typename... Args,
            HPX_CONCEPT_REQUIRES_(
                hpx::is_execution_policy<ExPolicy>::value &&
                std::is_integral<Size>::value &&
                std::is_integral<S>::value &&
                (hpx::traits::is_iterator<I>::value ||
                 std::is_integral<I>::value)
            )>
        // clang-format on
        friend typename parallel::util::detail::algorithm_result<ExPolicy>::type
        tag_fallback_dispatch(hpx::for_loop_n_strided_t, ExPolicy&& policy,
            I first, Size size, S stride, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop_n_strided must be called with at least a function "
                "object");

            using hpx::util::make_index_pack;
            return parallel::v2::detail::for_loop_n(
                std::forward<ExPolicy>(policy), first, size, stride,
                typename make_index_pack<sizeof...(Args) - 1>::type(),
                std::forward<Args>(args)...);
        }

        // clang-format off
        template <typename I, typename Size, typename S, typename... Args,
            HPX_CONCEPT_REQUIRES_(
                std::is_integral<Size>::value &&
                std::is_integral<S>::value &&
                (hpx::traits::is_iterator<I>::value ||
                 std::is_integral<I>::value)
            )>
        // clang-format on
        friend void tag_fallback_dispatch(hpx::for_loop_n_strided_t, I first,
            Size size, S stride, Args&&... args)
        {
            static_assert(sizeof...(Args) >= 1,
                "for_loop_n_strided must be called with at least a function "
                "object");

            using hpx::util::make_index_pack;
            return parallel::v2::detail::for_loop_n(hpx::execution::seq, first,
                size, stride,
                typename make_index_pack<sizeof...(Args) - 1>::type(),
                std::forward<Args>(args)...);
        }
    } for_loop_n_strided{};
}    // namespace hpx

#if defined(HPX_HAVE_THREAD_DESCRIPTION)
namespace hpx { namespace traits {
    template <typename ExPolicy, typename F, typename S, typename Tuple>
    struct get_function_address<
        parallel::v2::detail::part_iterations<ExPolicy, F, S, Tuple>>
    {
        static constexpr std::size_t call(
            parallel::v2::detail::part_iterations<ExPolicy, F, S, Tuple> const&
                f) noexcept
        {
            return get_function_address<std::decay_t<F>>::call(f.f_);
        }
    };

    template <typename ExPolicy, typename F, typename S, typename Tuple>
    struct get_function_annotation<
        parallel::v2::detail::part_iterations<ExPolicy, F, S, Tuple>>
    {
        static constexpr char const* call(
            parallel::v2::detail::part_iterations<ExPolicy, F, S, Tuple> const&
                f) noexcept
        {
            return get_function_annotation<std::decay_t<F>>::call(f.f_);
        }
    };

#if HPX_HAVE_ITTNOTIFY != 0 && !defined(HPX_HAVE_APEX)
    template <typename ExPolicy, typename F, typename S, typename Tuple>
    struct get_function_annotation_itt<
        parallel::v2::detail::part_iterations<ExPolicy, F, S, Tuple>>
    {
        static util::itt::string_handle call(
            parallel::v2::detail::part_iterations<ExPolicy, F, S, Tuple> const&
                f) noexcept
        {
            return get_function_annotation_itt<std::decay_t<F>>::call(f.f_);
        }
    };
#endif
}}    // namespace hpx::traits
#endif
#endif
