Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_copyConvert.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_COPYCONVERT_HPP
11#define TPETRA_DETAILS_COPYCONVERT_HPP
12
17
18#include "TpetraCore_config.h"
19#include "Kokkos_Core.hpp"
20#if KOKKOS_VERSION >= 40799
21#include "KokkosKernels_ArithTraits.hpp"
22#else
23#include "Kokkos_ArithTraits.hpp"
24#endif
25#include <sstream>
26#include <stdexcept>
27#include <type_traits>
28
29namespace Tpetra {
30namespace Details {
31
32//
33// Implementation details for copyConvert (see below).
34// Users should skip over this anonymous namespace.
35//
36namespace { // (anonymous)
37
38// We need separate implementations for both (T,complex) and
39// (complex,T), but we can't just overload for both cases, because
40// that would be ambiguous (e.g., (complex,complex)).
41template <class OutputValueType,
42 class InputValueType,
43 const bool outputIsComplex =
44#if KOKKOS_VERSION >= 40799
45 KokkosKernels::ArithTraits<OutputValueType>::is_complex,
46#else
47 Kokkos::ArithTraits<OutputValueType>::is_complex,
48#endif
49 const bool inputIsComplex =
50#if KOKKOS_VERSION >= 40799
51 KokkosKernels::ArithTraits<InputValueType>::is_complex>
52#else
53 Kokkos::ArithTraits<InputValueType>::is_complex>
54#endif
55struct ConvertValue {
56 static KOKKOS_INLINE_FUNCTION void
57 convert(OutputValueType& dst, const InputValueType& src) {
58 // This looks trivial, but it actually invokes OutputValueType's
59 // constructor, so that needs to be marked as a __host__
60 // __device__ function (e.g., via the KOKKOS_FUNCTION or
61 // KOKKOS_INLINE_FUNCTION macros).
62 dst = OutputValueType(src);
63 }
64};
65
66template <class OutputRealType, class InputComplexType>
67struct ConvertValue<OutputRealType, InputComplexType, false, true> {
68 static KOKKOS_INLINE_FUNCTION void
69 convert(OutputRealType& dst,
70 const InputComplexType& src) {
71 // OutputRealType's constructor needs to be marked with either
72 // KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
73#if KOKKOS_VERSION >= 40799
74 using KAI = KokkosKernels::ArithTraits<InputComplexType>;
75#else
76 using KAI = Kokkos::ArithTraits<InputComplexType>;
77#endif
78 dst = OutputRealType(KAI::real(src));
79 }
80};
81
82template <class OutputComplexType, class InputRealType>
83struct ConvertValue<OutputComplexType, InputRealType, true, false> {
84 static KOKKOS_INLINE_FUNCTION void
85 convert(OutputComplexType& dst,
86 const InputRealType& src) {
87 // OutputComplexType's constructor needs to be marked with
88 // either KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
89 using output_mag_type =
90#if KOKKOS_VERSION >= 40799
91 typename KokkosKernels::ArithTraits<OutputComplexType>::mag_type;
92#else
93 typename Kokkos::ArithTraits<OutputComplexType>::mag_type;
94#endif
95#if KOKKOS_VERSION >= 40799
96 using KAM = KokkosKernels::ArithTraits<output_mag_type>;
97#else
98 using KAM = Kokkos::ArithTraits<output_mag_type>;
99#endif
100 dst = OutputComplexType(src, KAM::zero());
101 }
102};
103
104template <class OutputValueType,
105 class InputValueType>
106KOKKOS_INLINE_FUNCTION void
107convertValue(OutputValueType& dst, const InputValueType& src) {
108 ConvertValue<OutputValueType, InputValueType>::convert(dst, src);
109}
110
115template <class OutputViewType,
116 class InputViewType,
117 const int rank = static_cast<int>(OutputViewType::rank)>
118class CopyConvertFunctor {};
119
120template <class OutputViewType,
121 class InputViewType>
122class CopyConvertFunctor<OutputViewType, InputViewType, 1> {
123 private:
124 static_assert(static_cast<int>(OutputViewType::rank) == 1 &&
125 static_cast<int>(InputViewType::rank) == 1,
126 "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
127 "OutputViewType and InputViewType must both have rank 1.");
128 OutputViewType dst_;
129 InputViewType src_;
130
131 public:
132 using index_type = typename OutputViewType::size_type;
133
134 CopyConvertFunctor(const OutputViewType& dst,
135 const InputViewType& src)
136 : dst_(dst)
137 , src_(src) {}
138
139 KOKKOS_INLINE_FUNCTION void
140 operator()(const index_type i) const {
141 convertValue(dst_(i), src_(i));
142 }
143};
144
145template <class OutputViewType,
146 class InputViewType>
147class CopyConvertFunctor<OutputViewType, InputViewType, 2> {
148 public:
149 using index_type = typename OutputViewType::size_type;
150
151 private:
152 static_assert(static_cast<int>(OutputViewType::rank) == 2 &&
153 static_cast<int>(InputViewType::rank) == 2,
154 "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
155 "OutputViewType and InputViewType must both have rank 2.");
156 OutputViewType dst_;
157 InputViewType src_;
158 index_type numCols_;
159
160 public:
161 CopyConvertFunctor(const OutputViewType& dst,
162 const InputViewType& src)
163 : dst_(dst)
164 , src_(src)
165 , numCols_(dst.extent(1)) {}
166
167 KOKKOS_INLINE_FUNCTION void
168 operator()(const index_type i) const {
169 const index_type numCols = numCols_;
170 for (index_type j = 0; j < numCols; ++j) {
171 convertValue(dst_(i, j), src_(i, j));
172 }
173 }
174};
175
177template <class OutputViewType, class InputViewType>
178class CanUseKokkosDeepCopy {
179 private:
180 static constexpr bool sameValueType =
181 std::is_same<typename OutputViewType::non_const_value_type,
182 typename InputViewType::non_const_value_type>::value;
183 static constexpr bool sameMemorySpace =
184 std::is_same<typename OutputViewType::memory_space,
185 typename InputViewType::memory_space>::value;
186 static constexpr bool sameLayout =
187 std::is_same<typename OutputViewType::array_layout,
188 typename InputViewType::array_layout>::value;
189
190 public:
191 static constexpr bool value =
192 sameValueType && (sameMemorySpace || sameLayout);
193};
194
213template <class OutputViewType,
214 class InputViewType,
215 const bool canUseKokkosDeepCopy =
216 CanUseKokkosDeepCopy<OutputViewType, InputViewType>::value,
217 const bool outputExecSpaceCanAccessInputMemSpace =
218 Kokkos::SpaceAccessibility<
219 typename OutputViewType::memory_space,
220 typename InputViewType::memory_space>::accessible>
221struct CopyConvertImpl {
222 static void
223 run(const OutputViewType& dst,
224 const InputViewType& src);
225};
226
228template <class OutputViewType,
229 class InputViewType,
230 const bool outputExecSpaceCanAccessInputMemSpace>
231struct CopyConvertImpl<OutputViewType, InputViewType,
232 true, outputExecSpaceCanAccessInputMemSpace> {
233 static void
234 run(const OutputViewType& dst,
235 const InputViewType& src) {
236 // NOTE: It's important to do the addition _inside_ the
237 // reinterpret-cast. If you reinterpret_cast the separate
238 // results, you may get the wrong answer (e.g., because
239 // ptrdiff_t is signed, and pointers may have arbitrary 64-bit
240 // virtual addresses). I'm speaking from experience here.
241 const ptrdiff_t dst_beg = reinterpret_cast<ptrdiff_t>(dst.data());
242 const ptrdiff_t dst_end =
243 reinterpret_cast<ptrdiff_t>(dst.data() + dst.span());
244 const ptrdiff_t src_beg = reinterpret_cast<ptrdiff_t>(src.data());
245 const ptrdiff_t src_end =
246 reinterpret_cast<ptrdiff_t>(src.data() + src.span());
247
248 if (dst_end > src_beg && src_end > dst_beg) {
249 // dst and src alias each other, so we can't call
250 // Kokkos::deep_copy(dst,src) directly (Kokkos detects this
251 // and throws, at least in debug mode). Instead, we make
252 // temporary host storage (create_mirror always makes a new
253 // allocation, unlike create_mirror_view). Use host because
254 // it's cheaper to allocate. Hopefully users aren't doing
255 // aliased copies in a tight loop.
256 auto src_copy = Kokkos::create_mirror(Kokkos::HostSpace(), src);
257 // DEEP_COPY REVIEW - NOT TESTED
258 Kokkos::deep_copy(src_copy, src);
259 // DEEP_COPY REVIEW - NOT TESTED
260 Kokkos::deep_copy(dst, src_copy);
261 } else { // no aliasing
262 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
263 using execution_space = typename OutputViewType::execution_space;
264 Kokkos::deep_copy(execution_space(), dst, src);
265 }
266 }
267};
268
271template <class OutputViewType,
272 class InputViewType>
273struct CopyConvertImpl<OutputViewType,
274 InputViewType,
275 false,
276 true> {
277 static void
278 run(const OutputViewType& dst,
279 const InputViewType& src) {
280 using functor_type = CopyConvertFunctor<OutputViewType, InputViewType>;
281 using execution_space = typename OutputViewType::execution_space;
282 using index_type = typename OutputViewType::size_type;
283 using range_type = Kokkos::RangePolicy<execution_space, index_type>;
284 Kokkos::parallel_for("Tpetra::Details::copyConvert",
285 range_type(0, dst.extent(0)),
286 functor_type(dst, src));
287 }
288};
289
296template <class OutputViewType,
297 class InputViewType>
298struct CopyConvertImpl<OutputViewType, InputViewType, false, false> {
299 static void
300 run(const OutputViewType& dst,
301 const InputViewType& src) {
302 using output_memory_space = typename OutputViewType::memory_space;
303 using output_execution_space = typename OutputViewType::execution_space;
304 auto src_outputSpaceCopy =
305 Kokkos::create_mirror_view(output_memory_space(), src);
306 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
307 Kokkos::deep_copy(output_execution_space(), src_outputSpaceCopy, src);
308
309 // The output View's execution space can access
310 // outputSpaceCopy's data, so we can run the functor now.
311 using output_space_copy_type = decltype(src_outputSpaceCopy);
312 using functor_type =
313 CopyConvertFunctor<OutputViewType, output_space_copy_type>;
314 using execution_space = typename OutputViewType::execution_space;
315 using index_type = typename OutputViewType::size_type;
316 using range_type = Kokkos::RangePolicy<execution_space, index_type>;
317 Kokkos::parallel_for("Tpetra::Details::copyConvert",
318 range_type(0, dst.extent(0)),
319 functor_type(dst, src_outputSpaceCopy));
320 }
321};
322} // namespace
323
332template <class OutputViewType,
333 class InputViewType>
335 const InputViewType& src) {
336 static_assert(Kokkos::is_view<OutputViewType>::value,
337 "OutputViewType must be a Kokkos::View.");
338 static_assert(Kokkos::is_view<InputViewType>::value,
339 "InputViewType must be a Kokkos::View.");
340 static_assert(std::is_same<typename OutputViewType::value_type,
341 typename OutputViewType::non_const_value_type>::value,
342 "OutputViewType must be a nonconst Kokkos::View.");
343 static_assert(static_cast<int>(OutputViewType::rank) ==
344 static_cast<int>(InputViewType::rank),
345 "src and dst must have the same rank.");
346
347 if (dst.extent(0) != src.extent(0)) {
348 std::ostringstream os;
349 os << "Tpetra::Details::copyConvert: "
350 << "dst.extent(0) = " << dst.extent(0)
351 << " != src.extent(0) = " << src.extent(0)
352 << ".";
353 throw std::invalid_argument(os.str());
354 }
355 if (static_cast<int>(OutputViewType::rank) > 1 &&
356 dst.extent(1) != src.extent(1)) {
357 std::ostringstream os;
358 os << "Tpetra::Details::copyConvert: "
359 << "dst.extent(1) = " << dst.extent(1)
360 << " != src.extent(1) = " << src.extent(1)
361 << ".";
362 throw std::invalid_argument(os.str());
363 }
364
365 // Canonicalize the View types in order to avoid redundant instantiations.
366 using output_view_type =
367 Kokkos::View<typename OutputViewType::non_const_data_type,
368 typename OutputViewType::array_layout,
369 typename OutputViewType::device_type>;
370 using input_view_type =
371 Kokkos::View<typename InputViewType::const_data_type,
372 typename InputViewType::array_layout,
373 typename InputViewType::device_type>;
374 CopyConvertImpl<output_view_type, input_view_type>::run(dst, src);
375}
376
377} // namespace Details
378} // namespace Tpetra
379
380#endif // TPETRA_DETAILS_COPYCONVERT_HPP
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length....
Namespace Tpetra contains the class and methods constituting the Tpetra library.