Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_copyOffsets.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_COPYOFFSETS_HPP
11#define TPETRA_DETAILS_COPYOFFSETS_HPP
12
17
18#include "TpetraCore_config.h"
20#include "Kokkos_Core.hpp"
21#include <limits>
22#include <type_traits>
23
24namespace Tpetra {
25namespace Details {
26
27//
28// Implementation details for copyOffsets (see below).
29// Users should skip over this anonymous namespace.
30//
31namespace { // (anonymous)
32
33// Implementation detail of copyOffsets (see below). Determines
34// whether integer overflow is impossible on assignment from an
35// InputType to an OutputType.
36//
37// Implicit here is the assumption that both input and output types
38// are integers.
39template <class OutputType, class InputType>
40struct OutputCanFitInput {
41 private:
42 static constexpr bool output_signed = std::is_signed<OutputType>::value;
43 static constexpr bool input_signed = std::is_signed<InputType>::value;
44
45 public:
46 static const bool value = sizeof(OutputType) > sizeof(InputType) ||
47 (sizeof(OutputType) == sizeof(InputType) &&
48 !output_signed && input_signed);
49};
50
51// Avoid warnings for "unsigned integer < 0" comparisons.
52template <class InputType,
53 bool input_signed = std::is_signed<InputType>::value>
54struct Negative {};
55
56template <class InputType>
57struct Negative<InputType, true> {
58 static KOKKOS_INLINE_FUNCTION bool
59 negative(const InputType src) {
60 return src < InputType(0);
61 }
62};
63
64template <class InputType>
65struct Negative<InputType, false> {
66 static KOKKOS_INLINE_FUNCTION bool
67 negative(const InputType /* src */) {
68 return false;
69 }
70};
71
72template <class InputType>
73KOKKOS_INLINE_FUNCTION bool negative(const InputType src) {
74 return Negative<InputType>::negative(src);
75}
76
77template <class OutputType, class InputType>
78struct OverflowChecker {
79 private:
80 static constexpr bool output_signed = std::is_signed<OutputType>::value;
81 static constexpr bool input_signed = std::is_signed<InputType>::value;
82
83 public:
84 // 1. Signed to unsigned could overflow due to negative numbers.
85 // 2. Larger to smaller could overflow.
86 // 3. Same size but unsigned to signed could overflow.
87 static constexpr bool could_overflow =
88 (!output_signed && input_signed) ||
89 (sizeof(OutputType) < sizeof(InputType)) ||
90 (sizeof(OutputType) == sizeof(InputType) &&
91 output_signed && !input_signed);
92
93 KOKKOS_INLINE_FUNCTION bool
94 overflows(const InputType src) const {
95 if (!could_overflow) {
96 return false;
97 } else {
98 // Signed to unsigned could overflow due to negative numbers.
99 if (!output_signed && input_signed) {
100 return negative(src);
101 }
102 // We're only comparing InputType with InputType here, so this
103 // should not emit warnings.
104 return src < minDstVal_ || src > maxDstVal_;
105 }
106 }
107
108 private:
109 // If InputType is unsigned and OutputType is signed, casting max
110 // OutputType to InputType could overflow. See #5548.
111 InputType minDstVal_ = input_signed ? std::numeric_limits<OutputType>::min() : OutputType(0);
112 InputType maxDstVal_ = std::numeric_limits<OutputType>::max();
113};
114
115template <class OutputViewType, class InputViewType>
116void errorIfOverflow(const OutputViewType& dst,
117 const InputViewType& src,
118 const size_t overflowCount) {
119 if (overflowCount == 0) {
120 return;
121 }
122
123 std::ostringstream os;
124 const bool plural = overflowCount != size_t(1);
125 os << "copyOffsets: " << overflowCount << " value" << (plural ? "s" : "") << " in src were too big (in the "
126 "sense of integer overflow) to fit in dst.";
127
128 const bool verbose = Details::Behavior::verbose();
129 if (verbose) {
130 const size_t maxNumToPrint =
132 const size_t srcLen(src.extent(0));
133 if (srcLen <= maxNumToPrint) {
134 auto dst_h = Kokkos::create_mirror_view(dst);
135 auto src_h = Kokkos::create_mirror_view(src);
136 // DEEP_COPY REVIEW - NOT TESTED
137 Kokkos::deep_copy(src_h, src);
138 // DEEP_COPY REVIEW - NOT TESTED
139 Kokkos::deep_copy(dst_h, dst);
140
141 os << " src: [";
142 for (size_t k = 0; k < srcLen; ++k) {
143 os << src_h[k];
144 if (k + size_t(1) < srcLen) {
145 os << ", ";
146 }
147 }
148 os << "], ";
149
150 os << " dst: [";
151 for (size_t k = 0; k < srcLen; ++k) {
152 os << dst_h[k];
153 if (k + size_t(1) < srcLen) {
154 os << ", ";
155 }
156 }
157 os << "].";
158 } else {
159 os << " src.extent(0) > " << maxNumToPrint << ", Tpetra's "
160 "verbose print count threshold. To increase this, set the "
161 "environment variable TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD "
162 "to the desired threshold and rerun. You do NOT need to "
163 "rebuild Trilinos.";
164 }
165 }
166 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
167}
168
169// Implementation detail of copyOffsets (see below).
170//
171// Kokkos parallel_reduce functor for copying offset ("ptr") arrays.
172// Tpetra::Details::FixedHashTable uses this in its "copy"
173// constructor for converting between different Device types. All
174// the action happens in the partial specializations for different
175// values of outputCanFitInput. "Output can fit input" means that
176// casting the input's value type to the output's value type will
177// never result in integer overflow.
178template <class OutputViewType,
179 class InputViewType,
180 const bool outputCanFitInput =
181 OutputCanFitInput<typename OutputViewType::non_const_value_type,
182 typename InputViewType::non_const_value_type>::value>
183class CopyOffsetsFunctor {};
184
185// Specialization for when overflow is possible.
186template <class OutputViewType, class InputViewType>
187class CopyOffsetsFunctor<OutputViewType, InputViewType, false> {
188 public:
189 using execution_space = typename OutputViewType::execution_space;
190 using size_type = typename OutputViewType::size_type;
191 using value_type = size_t;
192
193 using input_value_type = typename InputViewType::non_const_value_type;
194 using output_value_type = typename OutputViewType::non_const_value_type;
195
196 CopyOffsetsFunctor(const OutputViewType& dst, const InputViewType& src)
197 : dst_(dst)
198 , src_(src) {
199 static_assert(Kokkos::SpaceAccessibility<
200 typename OutputViewType::memory_space,
201 typename InputViewType::memory_space>::accessible,
202 "CopyOffsetsFunctor (implements copyOffsets): Output "
203 "View's space must be able to access the input View's "
204 "memory space.");
205 }
206
207 KOKKOS_INLINE_FUNCTION void
208 operator()(const size_type i, value_type& overflowCount) const {
209 const input_value_type src_i = src_(i);
210 if (checker_.overflows(src_i)) {
211 ++overflowCount;
212 }
213 dst_(i) = static_cast<output_value_type>(src_i);
214 }
215
216 KOKKOS_INLINE_FUNCTION void
217 operator()(const size_type i) const {
218 const input_value_type src_i = src_(i);
219 dst_(i) = static_cast<output_value_type>(src_i);
220 }
221
222 KOKKOS_INLINE_FUNCTION void init(value_type& overflowCount) const {
223 overflowCount = 0;
224 }
225
226 KOKKOS_INLINE_FUNCTION void
227 join(value_type& result,
228 const value_type& current) const {
229 result += current;
230 }
231
232 private:
233 OutputViewType dst_;
234 InputViewType src_;
235 OverflowChecker<output_value_type, input_value_type> checker_;
236};
237
238// Specialization for when overflow is impossible.
239template <class OutputViewType, class InputViewType>
240class CopyOffsetsFunctor<OutputViewType, InputViewType, true> {
241 public:
242 using execution_space = typename OutputViewType::execution_space;
243 using size_type = typename OutputViewType::size_type;
244 using value_type = size_t;
245
246 CopyOffsetsFunctor(const OutputViewType& dst, const InputViewType& src)
247 : dst_(dst)
248 , src_(src) {
249 static_assert(Kokkos::SpaceAccessibility<
250 typename OutputViewType::memory_space,
251 typename InputViewType::memory_space>::accessible,
252 "CopyOffsetsFunctor (implements copyOffsets): Output "
253 "View's space must be able to access the input View's "
254 "memory space.");
255 }
256
257 KOKKOS_INLINE_FUNCTION void
258 operator()(const size_type i, value_type& /* overflowCount */) const {
259 // Overflow is impossible in this case, so there's no need to check.
260 dst_(i) = src_(i);
261 }
262
263 KOKKOS_INLINE_FUNCTION void
264 operator()(const size_type i) const {
265 dst_(i) = src_(i);
266 }
267
268 KOKKOS_INLINE_FUNCTION void init(value_type& overflowCount) const {
269 overflowCount = 0;
270 }
271
272 KOKKOS_INLINE_FUNCTION void
273 join(value_type& /* result */,
274 const value_type& /* current */) const {}
275
276 private:
277 OutputViewType dst_;
278 InputViewType src_;
279};
280
281// Implementation detail of copyOffsets (see below).
282//
283// We specialize copyOffsets on two different conditions:
284//
285// 1. Are the two Views' layouts the same, and do the input and
286// output Views have the same value type?
287// 2. Can the output View's execution space access the input View's
288// memory space?
289//
290// If (1) is true, that makes the implementation simple: just call
291// Kokkos::deep_copy (FixedHashTable always uses the same layout, no
292// matter the device type). Otherwise, we need a custom copy
293// functor. If (2) is true, then we can use CopyOffsetsFunctor
294// directly. Otherwise, we have to copy the input View into the
295// output View's memory space, before we can use the functor.
296//
297template <class OutputViewType,
298 class InputViewType,
299 const bool sameLayoutsSameOffsetTypes =
300 std::is_same<typename OutputViewType::array_layout,
301 typename InputViewType::array_layout>::value&&
302 std::is_same<typename OutputViewType::non_const_value_type,
303 typename InputViewType::non_const_value_type>::value,
304 const bool outputExecSpaceCanAccessInputMemSpace =
305 Kokkos::SpaceAccessibility<
306 typename OutputViewType::memory_space,
307 typename InputViewType::memory_space>::accessible>
308struct CopyOffsetsImpl {
309 static void run(const OutputViewType& dst, const InputViewType& src);
310};
311
312// Specialization for sameLayoutsSameOffsetTypes = true:
313//
314// If both input and output Views have the same layout, and both
315// input and output use the same type for offsets, then we don't
316// need to check for overflow, and we can use Kokkos::deep_copy
317// directly. It doesn't matter whether the output execution space
318// can access the input memory space: Kokkos::deep_copy takes care
319// of the details.
320template <class OutputViewType,
321 class InputViewType,
322 const bool outputExecSpaceCanAccessInputMemSpace>
323struct CopyOffsetsImpl<OutputViewType, InputViewType,
324 true, outputExecSpaceCanAccessInputMemSpace> {
325 static void run(const OutputViewType& dst, const InputViewType& src) {
326 static_assert(std::is_same<typename OutputViewType::non_const_value_type,
327 typename InputViewType::non_const_value_type>::value,
328 "CopyOffsetsImpl (implementation of copyOffsets): In order"
329 " to call this specialization, the input and output must "
330 "use the same offset type.");
331 static_assert(static_cast<int>(OutputViewType::rank) ==
332 static_cast<int>(InputViewType::rank),
333 "CopyOffsetsImpl (implementation of copyOffsets): In order"
334 " to call this specialization, src and dst must have the "
335 "same rank.");
336 static_assert(std::is_same<typename OutputViewType::array_layout,
337 typename InputViewType::array_layout>::value,
338 "CopyOffsetsImpl (implementation of copyOffsets): In order"
339 " to call this specialization, src and dst must have the "
340 "the same array_layout.");
341 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
342 using execution_space = typename OutputViewType::execution_space;
343 Kokkos::deep_copy(execution_space(), dst, src);
344 }
345};
346
347// Specializations for sameLayoutsSameOffsetTypes = false:
348//
349// If input and output don't have the same layout, or use different
350// types for offsets, then we can't use Kokkos::deep_copy directly,
351// and we may have to check for overflow.
352
353// Specialization for sameLayoutsSameOffsetTypes = false and
354// outputExecSpaceCanAccessInputMemSpace = true:
355//
356// If the output execution space can access the input memory space,
357// then we can use CopyOffsetsFunctor directly.
358template <class OutputViewType,
359 class InputViewType>
360struct CopyOffsetsImpl<OutputViewType, InputViewType,
361 false, true> {
362 static void run(const OutputViewType& dst, const InputViewType& src) {
363 static_assert(static_cast<int>(OutputViewType::rank) ==
364 static_cast<int>(InputViewType::rank),
365 "CopyOffsetsImpl (implementation of copyOffsets): "
366 "src and dst must have the same rank.");
367 constexpr bool sameLayoutsSameOffsetTypes =
368 std::is_same<typename OutputViewType::array_layout,
369 typename InputViewType::array_layout>::value &&
370 std::is_same<typename OutputViewType::non_const_value_type,
371 typename InputViewType::non_const_value_type>::value;
372 static_assert(!sameLayoutsSameOffsetTypes,
373 "CopyOffsetsImpl (implements copyOffsets): In order to "
374 "call this specialization, sameLayoutsSameOffsetTypes "
375 "must be false. That is, either the input and output "
376 "must have different array layouts, or their value types "
377 "must differ.");
378 static_assert(Kokkos::SpaceAccessibility<
379 typename OutputViewType::memory_space,
380 typename InputViewType::memory_space>::accessible,
381 "CopyOffsetsImpl (implements copyOffsets): In order to "
382 "call this specialization, the output View's space must "
383 "be able to access the input View's memory space.");
384 using functor_type = CopyOffsetsFunctor<OutputViewType, InputViewType>;
385 using execution_space = typename OutputViewType::execution_space;
386 using size_type = typename OutputViewType::size_type;
387 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
388
389 const bool debug = Details::Behavior::debug();
390 if (debug) {
391 size_t overflowCount = 0; // output argument of the reduction
392 Kokkos::parallel_reduce("Tpetra::Details::copyOffsets",
393 range_type(0, dst.extent(0)),
394 functor_type(dst, src),
395 overflowCount);
396 errorIfOverflow(dst, src, overflowCount);
397 } else {
398 Kokkos::parallel_for("Tpetra::Details::copyOffsets",
399 range_type(0, dst.extent(0)),
400 functor_type(dst, src));
401 }
402 }
403};
404
405// Specialization for sameLayoutsSameOffsetTypes = false and
406// outputExecSpaceCanAccessInputMemSpace = false.
407//
408// If the output execution space canNOT access the input memory
409// space, then we can't use CopyOffsetsFunctor directly. Instead,
410// tell Kokkos to copy the input View's data into the output View's
411// memory space _first_. Since the offset types are different for
412// this specialization, we can't just call Kokkos::deep_copy
413// directly between the input and output Views of offsets; that
414// wouldn't compile.
415//
416// This case can and does come up in practice: If the output View's
417// execution space is Cuda, it cannot currently access host memory
418// (that's the opposite direction from what UVM allows).
419// Furthermore, that case specifically requires overflow checking,
420// since (as of 28 Jan 2016 at least) Kokkos::Cuda uses a smaller
421// offset type than Kokkos' host spaces.
422template <class OutputViewType, class InputViewType>
423struct CopyOffsetsImpl<OutputViewType, InputViewType,
424 false, false> {
425 static void run(const OutputViewType& dst, const InputViewType& src) {
426 static_assert(static_cast<int>(OutputViewType::rank) ==
427 static_cast<int>(InputViewType::rank),
428 "CopyOffsetsImpl (implementation of copyOffsets): In order"
429 " to call this specialization, src and dst must have the "
430 "same rank.");
431 constexpr bool sameLayoutsSameOffsetTypes =
432 std::is_same<typename OutputViewType::array_layout,
433 typename InputViewType::array_layout>::value &&
434 std::is_same<typename OutputViewType::non_const_value_type,
435 typename InputViewType::non_const_value_type>::value;
436 static_assert(!sameLayoutsSameOffsetTypes,
437 "CopyOffsetsImpl (implements copyOffsets): In order to "
438 "call this specialization, sameLayoutsSameOffsetTypes "
439 "must be false. That is, either the input and output "
440 "must have different array layouts, or their value types "
441 "must differ.");
442 using output_space_copy_type =
443 Kokkos::View<typename InputViewType::non_const_value_type*,
444 Kokkos::LayoutLeft, typename OutputViewType::device_type>;
445 using Kokkos::view_alloc;
446 using Kokkos::WithoutInitializing;
447 using execution_space = typename OutputViewType::execution_space;
448 output_space_copy_type
449 outputSpaceCopy(view_alloc("outputSpace", WithoutInitializing),
450 src.extent(0));
451 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
452 Kokkos::deep_copy(execution_space(), outputSpaceCopy, src);
453
454 // The output View's execution space can access
455 // outputSpaceCopy's data, so we can run the functor now.
456 using functor_type =
457 CopyOffsetsFunctor<OutputViewType, output_space_copy_type>;
458 using size_type = typename OutputViewType::size_type;
459 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
460
461 const bool debug = Details::Behavior::debug();
462 if (debug) {
463 size_t overflowCount = 0;
464 Kokkos::parallel_reduce("Tpetra::Details::copyOffsets",
465 range_type(0, dst.extent(0)),
466 functor_type(dst, outputSpaceCopy),
467 overflowCount);
468 errorIfOverflow(dst, src, overflowCount);
469 } else {
470 Kokkos::parallel_for("Tpetra::Details::copyOffsets",
471 range_type(0, dst.extent(0)),
472 functor_type(dst, outputSpaceCopy));
473 }
474 }
475};
476} // namespace
477
489template <class OutputViewType, class InputViewType>
490void copyOffsets(const OutputViewType& dst, const InputViewType& src) {
491 static_assert(Kokkos::is_view<OutputViewType>::value,
492 "OutputViewType (the type of dst) must be a Kokkos::View.");
493 static_assert(Kokkos::is_view<InputViewType>::value,
494 "InputViewType (the type of src) must be a Kokkos::View.");
495 static_assert(std::is_same<typename OutputViewType::value_type,
496 typename OutputViewType::non_const_value_type>::value,
497 "OutputViewType (the type of dst) must be a nonconst Kokkos::View.");
498 static_assert(static_cast<int>(OutputViewType::rank) == 1,
499 "OutputViewType (the type of dst) must be a rank-1 Kokkos::View.");
500 static_assert(static_cast<int>(InputViewType::rank) == 1,
501 "InputViewType (the type of src) must be a rank-1 Kokkos::View.");
502 static_assert(std::is_integral<typename std::decay<decltype(dst(0))>::type>::value,
503 "The entries of dst must be built-in integers.");
504 static_assert(std::is_integral<typename std::decay<decltype(src(0))>::type>::value,
505 "The entries of src must be built-in integers.");
506
507 TEUCHOS_TEST_FOR_EXCEPTION(dst.extent(0) != src.extent(0), std::invalid_argument,
508 "copyOffsets: dst.extent(0) = " << dst.extent(0)
509 << " != src.extent(0) = " << src.extent(0) << ".");
510
511 CopyOffsetsImpl<OutputViewType, InputViewType>::run(dst, src);
512}
513
514} // namespace Details
515} // namespace Tpetra
516
517#endif // TPETRA_DETAILS_COPYOFFSETS_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Struct that holds views of the contents of a CrsMatrix.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Implementation details of Tpetra.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
Namespace Tpetra contains the class and methods constituting the Tpetra library.