Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_allReduceView.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_ALLREDUCEVIEW_HPP
11#define TPETRA_DETAILS_ALLREDUCEVIEW_HPP
12
15#include "Kokkos_Core.hpp"
16#include "Teuchos_CommHelpers.hpp"
17#include "Tpetra_Details_temporaryViewUtils.hpp"
18#include <limits>
19#include <type_traits>
20
23
24namespace Tpetra {
25namespace Details {
26
27template <typename InputViewType, typename OutputViewType>
28static void
29allReduceRawContiguous(const OutputViewType& output,
30 const InputViewType& input,
31 const Teuchos::Comm<int>& comm) {
32 using Teuchos::outArg;
33 using Teuchos::REDUCE_SUM;
34 using Teuchos::reduceAll;
35 using ValueType = typename InputViewType::non_const_value_type;
36 size_t count = input.span();
37 TEUCHOS_ASSERT(count <= size_t(INT_MAX));
38 if (isInterComm(comm) && input.data() == output.data()) {
39 // Can't do in-place collective on an intercomm,
40 // so use a separate copy as the input.
41 typename InputViewType::array_layout layout(input.extent(0), input.extent(1), input.extent(2), input.extent(3), input.extent(4), input.extent(5), input.extent(6), input.extent(7));
42 Kokkos::View<typename InputViewType::non_const_data_type, typename InputViewType::array_layout, typename InputViewType::device_type>
43 tempInput(Kokkos::ViewAllocateWithoutInitializing("tempInput"), layout);
44 // DEEP_COPY REVIEW - This could be either DEVICE-TO-DEVICE or HOST-TO-HOST
45 // Either way, MPI is called right afterwards, meaning we'd need a sync on device
46 Kokkos::deep_copy(tempInput, input);
47 reduceAll<int, ValueType>(comm, REDUCE_SUM, static_cast<int>(count),
48 tempInput.data(), output.data());
49 } else
50 reduceAll<int, ValueType>(comm, REDUCE_SUM, static_cast<int>(count),
51 input.data(), output.data());
52}
53
57template <class InputViewType, class OutputViewType>
58static void
60 const InputViewType& input,
61 const Teuchos::Comm<int>& comm) {
62 // using execution_space = typename OutputViewType::execution_space;
63 const bool viewsAlias = output.data() == input.data();
64 if (comm.getSize() == 1) {
65 if (!viewsAlias) {
66 // InputViewType and OutputViewType can't be AnonymousSpace
67 // Views, because deep_copy needs to know their memory spaces.
68 // DEEP_COPY REVIEW - NOT TESTED
69 Kokkos::deep_copy(output, input);
70 }
71 return;
72 }
73
74 // we must ensure MPI can handle the pointers we pass it
75 // if GPUAware, we are done
76 // otherwise, if the views use GPUs, then we should copy them
77 using Layout = typename TempView::UnifiedContiguousLayout<InputViewType, OutputViewType>::type;
78 // if one or both is already in the correct layout, toLayout returns the same view
79 auto inputContig = TempView::toLayout<InputViewType, Layout>(input);
80 auto outputContig = TempView::toLayout<InputViewType, Layout>(output);
82 allReduceRawContiguous(outputContig, inputContig, comm);
83
84 } else {
85 auto inputMPI = TempView::toMPISafe<decltype(inputContig), false>(inputContig);
86 auto outputMPI = TempView::toMPISafe<decltype(outputContig), false>(outputContig);
87 allReduceRawContiguous(outputMPI, inputMPI, comm);
88 // DEEP_COPY REVIEW - Could be either
89 Kokkos::deep_copy(outputContig, outputMPI);
90 }
91 // DEEP_COPY REVIEW - Could be either
92 Kokkos::deep_copy(output, outputContig);
93}
94
95} // namespace Details
96} // namespace Tpetra
97
98#endif // TPETRA_DETAILS_ALLREDUCEVIEW_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::isInterComm.
Struct that holds views of the contents of a CrsMatrix.
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
Implementation details of Tpetra.
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
bool isInterComm(const Teuchos::Comm< int > &)
Return true if and only if the input communicator wraps an MPI intercommunicator.
Namespace Tpetra contains the class and methods constituting the Tpetra library.