Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_copyConvert.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_COPYCONVERT_HPP
11#define TPETRA_DETAILS_COPYCONVERT_HPP
12
17
18#include "TpetraCore_config.h"
19#include "Kokkos_Core.hpp"
20#include "KokkosKernels_ArithTraits.hpp"
21#include <sstream>
22#include <stdexcept>
23#include <type_traits>
24
25namespace Tpetra {
26namespace Details {
27
28//
29// Implementation details for copyConvert (see below).
30// Users should skip over this anonymous namespace.
31//
32namespace { // (anonymous)
33
34// We need separate implementations for both (T,complex) and
35// (complex,T), but we can't just overload for both cases, because
36// that would be ambiguous (e.g., (complex,complex)).
37template <class OutputValueType,
38 class InputValueType,
39 const bool outputIsComplex =
40 KokkosKernels::ArithTraits<OutputValueType>::is_complex,
41 const bool inputIsComplex =
42 KokkosKernels::ArithTraits<InputValueType>::is_complex>
43struct ConvertValue {
44 static KOKKOS_INLINE_FUNCTION void
45 convert(OutputValueType& dst, const InputValueType& src) {
46 // This looks trivial, but it actually invokes OutputValueType's
47 // constructor, so that needs to be marked as a __host__
48 // __device__ function (e.g., via the KOKKOS_FUNCTION or
49 // KOKKOS_INLINE_FUNCTION macros).
50 dst = OutputValueType(src);
51 }
52};
53
54template <class OutputRealType, class InputComplexType>
55struct ConvertValue<OutputRealType, InputComplexType, false, true> {
56 static KOKKOS_INLINE_FUNCTION void
57 convert(OutputRealType& dst,
58 const InputComplexType& src) {
59 // OutputRealType's constructor needs to be marked with either
60 // KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
61 using KAI = KokkosKernels::ArithTraits<InputComplexType>;
62 dst = OutputRealType(KAI::real(src));
63 }
64};
65
66template <class OutputComplexType, class InputRealType>
67struct ConvertValue<OutputComplexType, InputRealType, true, false> {
68 static KOKKOS_INLINE_FUNCTION void
69 convert(OutputComplexType& dst,
70 const InputRealType& src) {
71 // OutputComplexType's constructor needs to be marked with
72 // either KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
73 using output_mag_type =
74 typename KokkosKernels::ArithTraits<OutputComplexType>::mag_type;
75 using KAM = KokkosKernels::ArithTraits<output_mag_type>;
76 dst = OutputComplexType(src, KAM::zero());
77 }
78};
79
80template <class OutputValueType,
81 class InputValueType>
82KOKKOS_INLINE_FUNCTION void
83convertValue(OutputValueType& dst, const InputValueType& src) {
84 ConvertValue<OutputValueType, InputValueType>::convert(dst, src);
85}
86
91template <class OutputViewType,
92 class InputViewType,
93 const int rank = static_cast<int>(OutputViewType::rank)>
94class CopyConvertFunctor {};
95
96template <class OutputViewType,
97 class InputViewType>
98class CopyConvertFunctor<OutputViewType, InputViewType, 1> {
99 private:
100 static_assert(static_cast<int>(OutputViewType::rank) == 1 &&
101 static_cast<int>(InputViewType::rank) == 1,
102 "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
103 "OutputViewType and InputViewType must both have rank 1.");
104 OutputViewType dst_;
105 InputViewType src_;
106
107 public:
108 using index_type = typename OutputViewType::size_type;
109
110 CopyConvertFunctor(const OutputViewType& dst,
111 const InputViewType& src)
112 : dst_(dst)
113 , src_(src) {}
114
115 KOKKOS_INLINE_FUNCTION void
116 operator()(const index_type i) const {
117 convertValue(dst_(i), src_(i));
118 }
119};
120
121template <class OutputViewType,
122 class InputViewType>
123class CopyConvertFunctor<OutputViewType, InputViewType, 2> {
124 public:
125 using index_type = typename OutputViewType::size_type;
126
127 private:
128 static_assert(static_cast<int>(OutputViewType::rank) == 2 &&
129 static_cast<int>(InputViewType::rank) == 2,
130 "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
131 "OutputViewType and InputViewType must both have rank 2.");
132 OutputViewType dst_;
133 InputViewType src_;
134 index_type numCols_;
135
136 public:
137 CopyConvertFunctor(const OutputViewType& dst,
138 const InputViewType& src)
139 : dst_(dst)
140 , src_(src)
141 , numCols_(dst.extent(1)) {}
142
143 KOKKOS_INLINE_FUNCTION void
144 operator()(const index_type i) const {
145 const index_type numCols = numCols_;
146 for (index_type j = 0; j < numCols; ++j) {
147 convertValue(dst_(i, j), src_(i, j));
148 }
149 }
150};
151
153template <class OutputViewType, class InputViewType>
154class CanUseKokkosDeepCopy {
155 private:
156 static constexpr bool sameValueType =
157 std::is_same<typename OutputViewType::non_const_value_type,
158 typename InputViewType::non_const_value_type>::value;
159 static constexpr bool sameMemorySpace =
160 std::is_same<typename OutputViewType::memory_space,
161 typename InputViewType::memory_space>::value;
162 static constexpr bool sameLayout =
163 std::is_same<typename OutputViewType::array_layout,
164 typename InputViewType::array_layout>::value;
165
166 public:
167 static constexpr bool value =
168 sameValueType && (sameMemorySpace || sameLayout);
169};
170
189template <class OutputViewType,
190 class InputViewType,
191 const bool canUseKokkosDeepCopy =
192 CanUseKokkosDeepCopy<OutputViewType, InputViewType>::value,
193 const bool outputExecSpaceCanAccessInputMemSpace =
194 Kokkos::SpaceAccessibility<
195 typename OutputViewType::memory_space,
196 typename InputViewType::memory_space>::accessible>
197struct CopyConvertImpl {
198 static void
199 run(const OutputViewType& dst,
200 const InputViewType& src);
201};
202
204template <class OutputViewType,
205 class InputViewType,
206 const bool outputExecSpaceCanAccessInputMemSpace>
207struct CopyConvertImpl<OutputViewType, InputViewType,
208 true, outputExecSpaceCanAccessInputMemSpace> {
209 static void
210 run(const OutputViewType& dst,
211 const InputViewType& src) {
212 // NOTE: It's important to do the addition _inside_ the
213 // reinterpret-cast. If you reinterpret_cast the separate
214 // results, you may get the wrong answer (e.g., because
215 // ptrdiff_t is signed, and pointers may have arbitrary 64-bit
216 // virtual addresses). I'm speaking from experience here.
217 const ptrdiff_t dst_beg = reinterpret_cast<ptrdiff_t>(dst.data());
218 const ptrdiff_t dst_end =
219 reinterpret_cast<ptrdiff_t>(dst.data() + dst.span());
220 const ptrdiff_t src_beg = reinterpret_cast<ptrdiff_t>(src.data());
221 const ptrdiff_t src_end =
222 reinterpret_cast<ptrdiff_t>(src.data() + src.span());
223
224 if (dst_end > src_beg && src_end > dst_beg) {
225 // dst and src alias each other, so we can't call
226 // Kokkos::deep_copy(dst,src) directly (Kokkos detects this
227 // and throws, at least in debug mode). Instead, we make
228 // temporary host storage (create_mirror always makes a new
229 // allocation, unlike create_mirror_view). Use host because
230 // it's cheaper to allocate. Hopefully users aren't doing
231 // aliased copies in a tight loop.
232 auto src_copy = Kokkos::create_mirror(Kokkos::HostSpace(), src);
233 // DEEP_COPY REVIEW - NOT TESTED
234 Kokkos::deep_copy(src_copy, src);
235 // DEEP_COPY REVIEW - NOT TESTED
236 Kokkos::deep_copy(dst, src_copy);
237 } else { // no aliasing
238 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
239 using execution_space = typename OutputViewType::execution_space;
240 Kokkos::deep_copy(execution_space(), dst, src);
241 }
242 }
243};
244
247template <class OutputViewType,
248 class InputViewType>
249struct CopyConvertImpl<OutputViewType,
250 InputViewType,
251 false,
252 true> {
253 static void
254 run(const OutputViewType& dst,
255 const InputViewType& src) {
256 using functor_type = CopyConvertFunctor<OutputViewType, InputViewType>;
257 using execution_space = typename OutputViewType::execution_space;
258 using index_type = typename OutputViewType::size_type;
259 using range_type = Kokkos::RangePolicy<execution_space, index_type>;
260 Kokkos::parallel_for("Tpetra::Details::copyConvert",
261 range_type(0, dst.extent(0)),
262 functor_type(dst, src));
263 }
264};
265
272template <class OutputViewType,
273 class InputViewType>
274struct CopyConvertImpl<OutputViewType, InputViewType, false, false> {
275 static void
276 run(const OutputViewType& dst,
277 const InputViewType& src) {
278 using output_memory_space = typename OutputViewType::memory_space;
279 using output_execution_space = typename OutputViewType::execution_space;
280 auto src_outputSpaceCopy =
281 Kokkos::create_mirror_view(output_memory_space(), src);
282 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
283 Kokkos::deep_copy(output_execution_space(), src_outputSpaceCopy, src);
284
285 // The output View's execution space can access
286 // outputSpaceCopy's data, so we can run the functor now.
287 using output_space_copy_type = decltype(src_outputSpaceCopy);
288 using functor_type =
289 CopyConvertFunctor<OutputViewType, output_space_copy_type>;
290 using execution_space = typename OutputViewType::execution_space;
291 using index_type = typename OutputViewType::size_type;
292 using range_type = Kokkos::RangePolicy<execution_space, index_type>;
293 Kokkos::parallel_for("Tpetra::Details::copyConvert",
294 range_type(0, dst.extent(0)),
295 functor_type(dst, src_outputSpaceCopy));
296 }
297};
298} // namespace
299
308template <class OutputViewType,
309 class InputViewType>
311 const InputViewType& src) {
312 static_assert(Kokkos::is_view<OutputViewType>::value,
313 "OutputViewType must be a Kokkos::View.");
314 static_assert(Kokkos::is_view<InputViewType>::value,
315 "InputViewType must be a Kokkos::View.");
316 static_assert(std::is_same<typename OutputViewType::value_type,
317 typename OutputViewType::non_const_value_type>::value,
318 "OutputViewType must be a nonconst Kokkos::View.");
319 static_assert(static_cast<int>(OutputViewType::rank) ==
320 static_cast<int>(InputViewType::rank),
321 "src and dst must have the same rank.");
322
323 if (dst.extent(0) != src.extent(0)) {
324 std::ostringstream os;
325 os << "Tpetra::Details::copyConvert: "
326 << "dst.extent(0) = " << dst.extent(0)
327 << " != src.extent(0) = " << src.extent(0)
328 << ".";
329 throw std::invalid_argument(os.str());
330 }
331 if (static_cast<int>(OutputViewType::rank) > 1 &&
332 dst.extent(1) != src.extent(1)) {
333 std::ostringstream os;
334 os << "Tpetra::Details::copyConvert: "
335 << "dst.extent(1) = " << dst.extent(1)
336 << " != src.extent(1) = " << src.extent(1)
337 << ".";
338 throw std::invalid_argument(os.str());
339 }
340
341 // Canonicalize the View types in order to avoid redundant instantiations.
342 using output_view_type =
343 Kokkos::View<typename OutputViewType::non_const_data_type,
344 typename OutputViewType::array_layout,
345 typename OutputViewType::device_type>;
346 using input_view_type =
347 Kokkos::View<typename InputViewType::const_data_type,
348 typename InputViewType::array_layout,
349 typename InputViewType::device_type>;
350 CopyConvertImpl<output_view_type, input_view_type>::run(dst, src);
351}
352
353} // namespace Details
354} // namespace Tpetra
355
356#endif // TPETRA_DETAILS_COPYCONVERT_HPP
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length....
Namespace Tpetra contains the class and methods constituting the Tpetra library.