Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_packCrsGraph_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
11#define TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
12
13#include "TpetraCore_config.h"
14#include "Teuchos_Array.hpp"
15#include "Teuchos_ArrayView.hpp"
23#include <memory>
24#include <string>
25
47
48namespace Tpetra {
49
50//
51// Users must never rely on anything in the Details namespace.
52//
53namespace Details {
54
55namespace PackCrsGraphImpl {
63template <class OutputOffsetsViewType,
64 class CountsViewType,
65 class InputOffsetsViewType,
66 class InputLocalRowIndicesViewType,
67 class InputLocalRowPidsViewType,
68 const bool debug =
69#ifdef HAVE_TPETRA_DEBUG
70 true
71#else
72 false
73#endif // HAVE_TPETRA_DEBUG
74 >
76 public:
77 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
78 typedef typename CountsViewType::non_const_value_type count_type;
79 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
80 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
81 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
82 // output Views drive where execution happens.
83 typedef typename OutputOffsetsViewType::device_type device_type;
84 static_assert(std::is_same<typename CountsViewType::device_type::execution_space,
85 typename device_type::execution_space>::value,
86 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
87 static_assert(Kokkos::is_view<OutputOffsetsViewType>::value,
88 "OutputOffsetsViewType must be a Kokkos::View.");
89 static_assert(std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
90 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
91 static_assert(std::is_integral<output_offset_type>::value,
92 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
93 static_assert(Kokkos::is_view<CountsViewType>::value,
94 "CountsViewType must be a Kokkos::View.");
95 static_assert(std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
96 "CountsViewType must be a nonconst Kokkos::View.");
97 static_assert(std::is_integral<count_type>::value,
98 "The type of each entry of CountsViewType must be a built-in integer type.");
99 static_assert(Kokkos::is_view<InputOffsetsViewType>::value,
100 "InputOffsetsViewType must be a Kokkos::View.");
101 static_assert(std::is_integral<input_offset_type>::value,
102 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
103 static_assert(Kokkos::is_view<InputLocalRowIndicesViewType>::value,
104 "InputLocalRowIndicesViewType must be a Kokkos::View.");
105 static_assert(std::is_integral<local_row_index_type>::value,
106 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
107
109 const CountsViewType& counts,
113 : outputOffsets_(outputOffsets)
114 , counts_(counts)
115 , rowOffsets_(rowOffsets)
116 , lclRowInds_(lclRowInds)
117 , lclRowPids_(lclRowPids)
118 , error_("error") // don't forget this, or you'll get segfaults!
119 {
120 if (debug) {
121 const size_t numRowsToPack = static_cast<size_t>(lclRowInds_.extent(0));
122
123 if (numRowsToPack != static_cast<size_t>(counts_.extent(0))) {
124 std::ostringstream os;
125 os << "lclRowInds.extent(0) = " << numRowsToPack
126 << " != counts.extent(0) = " << counts_.extent(0)
127 << ".";
128 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str());
129 }
130 if (static_cast<size_t>(numRowsToPack + 1) !=
131 static_cast<size_t>(outputOffsets_.extent(0))) {
132 std::ostringstream os;
133 os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
134 << " != outputOffsets.extent(0) = " << outputOffsets_.extent(0)
135 << ".";
136 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str());
137 }
138 }
139 }
140
142 operator()(const local_row_index_type& curInd,
143 output_offset_type& update,
144 const bool final) const {
145 if (debug) {
146 if (curInd < static_cast<local_row_index_type>(0)) {
147 error_() = 1;
148 return;
149 }
150 }
151
152 if (final) {
153 if (debug) {
154 if (curInd >= static_cast<local_row_index_type>(outputOffsets_.extent(0))) {
155 error_() = 2;
156 return;
157 }
158 }
159 outputOffsets_(curInd) = update;
160 }
161
162 if (curInd < static_cast<local_row_index_type>(counts_.extent(0))) {
163 const auto lclRow = lclRowInds_(curInd);
164 if (static_cast<size_t>(lclRow + 1) >= static_cast<size_t>(rowOffsets_.extent(0)) ||
165 static_cast<local_row_index_type>(lclRow) < static_cast<local_row_index_type>(0)) {
166 error_() = 3;
167 return;
168 }
169 // count_type could differ from the type of each row offset.
170 // For example, row offsets might each be 64 bits, but if their
171 // difference always fits in 32 bits, we may then safely use a
172 // 32-bit count_type.
173 const count_type count =
174 static_cast<count_type>(rowOffsets_(lclRow + 1) - rowOffsets_(lclRow));
175
176 // We pack first the global column indices and then pids (if any),
177 // However, if the number of entries in the row is zero, we pack nothing.
178 const count_type numEntToPack = (count == 0)
179 ? static_cast<count_type>(0)
180 : count * (1 + (lclRowPids_.size() > 0 ? 1 : 0));
181
182 if (final) {
183 counts_(curInd) = numEntToPack;
184 }
185 update += numEntToPack;
186 }
187 }
188
189 // mfh 31 May 2017: Don't need init or join. If you have join, MUST
190 // have join both with and without volatile! Otherwise intrawarp
191 // joins are really slow on GPUs.
192
194 int getError() const {
195 auto error_h = Kokkos::create_mirror_view(error_);
196 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
197 // Note: In the UVM case, this would otherwise be a no-op
198 // and thus not fence, so the value might not be correct on return
199 // In the non-UVM case, create_mirror_view will block for the allocation
200 Kokkos::deep_copy(error_h, error_);
201
202 return error_h();
203 }
204
205 private:
206 OutputOffsetsViewType outputOffsets_;
207 CountsViewType counts_;
208 typename InputOffsetsViewType::const_type rowOffsets_;
209 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
210 typename InputLocalRowPidsViewType::const_type lclRowPids_;
211 Kokkos::View<int, device_type> error_;
212};
213
223template <class OutputOffsetsViewType,
224 class CountsViewType,
228typename CountsViewType::non_const_value_type
230 const CountsViewType& counts,
235 CountsViewType, typename InputOffsetsViewType::const_type,
236 typename InputLocalRowIndicesViewType::const_type,
237 typename InputLocalRowPidsViewType::const_type>
239 typedef typename CountsViewType::non_const_value_type count_type;
240 typedef typename OutputOffsetsViewType::size_type size_type;
241 typedef typename OutputOffsetsViewType::execution_space execution_space;
242 typedef typename functor_type::local_row_index_type LO;
243 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
244 const char prefix[] = "computeNumPacketsAndOffsets: ";
245
246 count_type count = 0;
247 const count_type numRowsToPack = lclRowInds.extent(0);
248
249 if (numRowsToPack == 0) {
250 return count;
251 } else {
252 TEUCHOS_TEST_FOR_EXCEPTION(rowOffsets.extent(0) <= static_cast<size_type>(1),
253 std::invalid_argument, prefix << "There is at least one row to pack, "
254 "but the graph has no rows. lclRowInds.extent(0) = "
255 << numRowsToPack << ", but rowOffsets.extent(0) = " << rowOffsets.extent(0) << " <= 1.");
257 static_cast<size_type>(numRowsToPack + 1),
258 std::invalid_argument,
259 prefix << "Output dimension does not match number of rows to pack. "
260 << "outputOffsets.extent(0) = " << outputOffsets.extent(0)
261 << " != lclRowInds.extent(0) + 1 = "
262 << static_cast<size_type>(numRowsToPack + 1) << ".");
263 TEUCHOS_TEST_FOR_EXCEPTION(counts.extent(0) != numRowsToPack, std::invalid_argument,
264 prefix << "counts.extent(0) = " << counts.extent(0)
265 << " != numRowsToPack = " << numRowsToPack << ".");
266
268 Kokkos::parallel_scan("Tpetra::Details::computeNumPacketsAndOffsets::scan", range_type(0, numRowsToPack + 1), f);
269
270 // At least in debug mode, this functor checks for errors.
271 const int errCode = f.getError();
272 TEUCHOS_TEST_FOR_EXCEPTION(errCode != 0, std::runtime_error, prefix << "parallel_scan error code " << errCode << " != 0.");
273
274#if 0
275 size_t total = 0;
276 for (LO k = 0; k < numRowsToPack; ++k) {
277 total += counts[k];
278 }
280 if (errStr.get () == NULL) {
281 errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
282 }
283 std::ostringstream& os = *errStr;
284 os << prefix
285 << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
286 << outputOffsets(numRowsToPack) << " != sum of counts = "
287 << total << "." << std::endl;
288 if (numRowsToPack != 0) {
289 // Only print the array if it's not too long.
290 if (numRowsToPack < static_cast<LO> (10)) {
291 os << "outputOffsets: [";
292 for (LO i = 0; i <= numRowsToPack; ++i) {
293 os << outputOffsets(i);
294 if (static_cast<LO> (i + 1) <= numRowsToPack) {
295 os << ",";
296 }
297 }
298 os << "]" << std::endl;
299 os << "counts: [";
300 for (LO i = 0; i < numRowsToPack; ++i) {
301 os << counts(i);
302 if (static_cast<LO> (i + 1) < numRowsToPack) {
303 os << ",";
304 }
305 }
306 os << "]" << std::endl;
307 }
308 else {
309 os << "outputOffsets(" << (numRowsToPack-1) << ") = "
310 << outputOffsets(numRowsToPack-1) << "." << std::endl;
311 }
312 }
314 return {false, errStr};
315 }
316#endif // HAVE_TPETRA_DEBUG
317
318 // Get last entry of outputOffsets, which is the sum of the entries
319 // of counts. Don't assume UVM.
320 using Tpetra::Details::getEntryOnHost;
321 return static_cast<count_type>(getEntryOnHost(outputOffsets,
323 }
324}
325
336template <class Packet,
337 class LocalMapType,
338 class BufferDeviceType,
339 class InputLidsType,
340 class InputPidsType>
342 size_t
344 const Kokkos::View<Packet*, BufferDeviceType>& exports,
345 const InputLidsType& lids_in,
346 const InputPidsType& pids_in,
347 const size_t offset,
348 const size_t num_ent,
349 const bool pack_pids) {
350 using LO = typename LocalMapType::local_ordinal_type;
351 using GO = typename LocalMapType::global_ordinal_type;
352
353 if (num_ent == 0) {
354 // Empty rows always take zero bytes, to ensure sparsity.
355 return static_cast<size_t>(0);
356 }
357
358 size_t num_ent_packed = num_ent;
359 if (pack_pids) {
361 }
362
363 // Copy column indices one at a time, so that we don't need
364 // temporary storage.
365 for (size_t k = 0; k < num_ent; ++k) {
366 const LO lid = lids_in[k];
367 const GO gid = col_map.getGlobalElement(lid);
368 exports(offset + k) = gid;
369 }
370 // Copy PIDs one at a time, so that we don't need temporary storage.
371 if (pack_pids) {
372 for (size_t k = 0; k < num_ent; ++k) {
373 const LO lid = lids_in[k];
374 const int pid = pids_in[lid];
375 exports(offset + num_ent + k) = static_cast<GO>(pid);
376 }
377 }
378
379 return num_ent_packed;
380}
381
382template <class Packet,
383 class LocalGraph,
384 class LocalMap,
385 class BufferDeviceType>
386struct PackCrsGraphFunctor {
387 using local_graph_type = LocalGraph;
388 using local_map_type = LocalMap;
389 using LO = typename local_map_type::local_ordinal_type;
390 using GO = typename local_map_type::global_ordinal_type;
391
392 using num_packets_per_lid_view_type =
393 Kokkos::View<const size_t*, BufferDeviceType>;
394 using offsets_view_type = Kokkos::View<const size_t*, BufferDeviceType>;
395 using exports_view_type = Kokkos::View<Packet*, BufferDeviceType>;
396 using export_lids_view_type =
398 using source_pids_view_type =
400
401 using count_type =
402 typename num_packets_per_lid_view_type::non_const_value_type;
403 using offset_type = typename offsets_view_type::non_const_value_type;
404 using value_type = Kokkos::pair<int, LO>;
405
406 static_assert(std::is_same<LO, typename local_graph_type::data_type>::value,
407 "local_map_type::local_ordinal_type and "
408 "local_graph_type::data_type must be the same.");
409
410 local_graph_type local_graph;
411 local_map_type local_col_map;
412 exports_view_type exports;
413 num_packets_per_lid_view_type num_packets_per_lid;
414 export_lids_view_type export_lids;
415 source_pids_view_type source_pids;
416 offsets_view_type offsets;
417 bool pack_pids;
418
419 PackCrsGraphFunctor(const local_graph_type& local_graph_in,
420 const local_map_type& local_col_map_in,
421 const exports_view_type& exports_in,
422 const num_packets_per_lid_view_type& num_packets_per_lid_in,
423 const export_lids_view_type& export_lids_in,
424 const source_pids_view_type& source_pids_in,
425 const offsets_view_type& offsets_in,
426 const bool pack_pids_in)
427 : local_graph(local_graph_in)
428 , local_col_map(local_col_map_in)
429 , exports(exports_in)
430 , num_packets_per_lid(num_packets_per_lid_in)
431 , export_lids(export_lids_in)
432 , source_pids(source_pids_in)
433 , offsets(offsets_in)
434 , pack_pids(pack_pids_in) {
435 const LO numRows = local_graph_in.numRows();
436 const LO rowMapDim =
437 static_cast<LO>(local_graph.row_map.extent(0));
438 TEUCHOS_TEST_FOR_EXCEPTION(numRows != 0 && rowMapDim != numRows + static_cast<LO>(1),
439 std::logic_error, "local_graph.row_map.extent(0) = " << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
440 }
441
442 KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
443 using ::Tpetra::Details::OrdinalTraits;
444 dst = Kokkos::make_pair(0, OrdinalTraits<LO>::invalid());
445 }
446
447 KOKKOS_INLINE_FUNCTION void
448 join(value_type& dst, const value_type& src) const {
449 // `dst` should reflect the first (least) bad index and all other
450 // associated error codes and data, so prefer keeping it.
451 if (src.first != 0 && dst.first == 0) {
452 dst = src;
453 }
454 }
455
456 KOKKOS_INLINE_FUNCTION
457 void operator()(const LO i, value_type& dst) const {
458 const size_t offset = offsets[i];
459 const LO export_lid = export_lids[i];
460 const size_t buf_size = exports.size();
461 const size_t num_packets_this_lid = num_packets_per_lid(i);
462 const size_t num_ent =
463 static_cast<size_t>(local_graph.row_map[export_lid + 1] - local_graph.row_map[export_lid]);
464
465 // Only pack this row's data if it has a nonzero number of
466 // entries. We can do this because receiving processes get the
467 // number of packets, and will know that zero packets means zero
468 // entries.
469 if (num_ent == 0) {
470 return;
471 }
472
473 if (export_lid >= static_cast<LO>(local_graph.numRows())) {
474 if (dst.first != 0) { // keep only the first error
475 dst = Kokkos::make_pair(1, i); // invalid row
476 }
477 return;
478 } else if ((offset > buf_size || offset + num_packets_this_lid > buf_size)) {
479 if (dst.first != 0) { // keep only the first error
480 dst = Kokkos::make_pair(2, i); // out of bounds
481 }
482 return;
483 }
484
485 // We can now pack this row
486
487 // Since the graph is locally indexed on the calling process, we
488 // have to use its column Map (which it _must_ have in this case)
489 // to convert to global indices.
490 const auto row_beg = local_graph.row_map[export_lid];
491 const auto row_end = local_graph.row_map[export_lid + 1];
492 auto lids_in = Kokkos::subview(local_graph.entries,
493 Kokkos::make_pair(row_beg, row_end));
494 size_t num_ent_packed_this_row =
495 packRow(local_col_map, exports, lids_in,
496 source_pids, offset, num_ent, pack_pids);
497 if (num_ent_packed_this_row != num_packets_this_lid) {
498 if (dst.first != 0) { // keep only the first error
499 dst = Kokkos::make_pair(3, i);
500 }
501 }
502 }
503};
504
512template <class Packet,
513 class LocalGraph,
514 class LocalMap,
515 class BufferDeviceType>
516void do_pack(const LocalGraph& local_graph,
517 const LocalMap& local_map,
518 const Kokkos::View<Packet*, BufferDeviceType>& exports,
519 const typename PackTraits<
520 size_t>::input_array_type& num_packets_per_lid,
521 const typename PackTraits<
522 typename LocalMap::local_ordinal_type>::input_array_type& export_lids,
523 const typename PackTraits<
524 int>::input_array_type& source_pids,
525 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
526 const bool pack_pids) {
527 using LO = typename LocalMap::local_ordinal_type;
528 using execution_space = typename LocalGraph::device_type::execution_space;
529 using range_type = Kokkos::RangePolicy<execution_space, LO>;
530 const char prefix[] = "Tpetra::Details::PackCrsGraphImpl::do_pack: ";
531
532 if (export_lids.extent(0) != 0) {
533 TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(offsets.extent(0)) !=
534 static_cast<size_t>(export_lids.extent(0) + 1),
535 std::invalid_argument, prefix << "offsets.extent(0) = " << offsets.extent(0) << " != export_lids.extent(0) (= " << export_lids.extent(0) << ") + 1.");
536 TEUCHOS_TEST_FOR_EXCEPTION(export_lids.extent(0) != num_packets_per_lid.extent(0),
537 std::invalid_argument, prefix << "export_lids.extent(0) = " << export_lids.extent(0) << " != num_packets_per_lid.extent(0) = " << num_packets_per_lid.extent(0) << ".");
538 // If exports has nonzero length at this point, then the graph
539 // has at least one entry to pack. Thus, if packing process
540 // ranks, we had better have at least one process rank to pack.
541 TEUCHOS_TEST_FOR_EXCEPTION(pack_pids && exports.extent(0) != 0 &&
542 source_pids.extent(0) == 0,
543 std::invalid_argument, prefix << "pack_pids is true, and exports.extent(0) = " << exports.extent(0) << " != 0, meaning that we need to pack at "
544 "least one graph entry, but source_pids.extent(0) = 0.");
545 }
546
547 using pack_functor_type =
548 PackCrsGraphFunctor<Packet, LocalGraph, LocalMap,
550 pack_functor_type f(local_graph, local_map, exports,
551 num_packets_per_lid, export_lids,
552 source_pids, offsets, pack_pids);
553
554 typename pack_functor_type::value_type result;
555 range_type range(0, num_packets_per_lid.extent(0));
556 Kokkos::parallel_reduce("Tpetra::Details::computeNumPacketsAndOffsets::reduce", range, f, result);
557
558 if (result.first != 0) {
559 // We can't deep_copy from AnonymousSpace Views, so we can't
560 // print out any information from them in case of error.
561 std::ostringstream os;
562 if (result.first == 1) { // invalid local row index
563 os << "invalid local row index";
564 } else if (result.first == 2) { // invalid offset
565 os << "invalid offset";
566 }
567 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, prefix << "PackCrsGraphFunctor "
568 "reported error code "
569 << result.first << " (" << os.str() << ") for the first bad row " << result.second << ".");
570 }
571}
572
599template <typename LO, typename GO, typename NT>
601 Kokkos::DualView<
604 const Kokkos::View<
605 size_t*,
606 typename CrsGraph<LO, GO, NT>::buffer_device_type>& num_packets_per_lid,
607 const Kokkos::View<
608 const LO*,
609 typename CrsGraph<LO, GO, NT>::buffer_device_type>& export_lids,
610 const Kokkos::View<
611 const int*,
613 size_t& constant_num_packets,
614 const bool pack_pids) {
615 using Kokkos::View;
616 using crs_graph_type = CrsGraph<LO, GO, NT>;
617 using packet_type = typename crs_graph_type::packet_type;
618 using buffer_device_type = typename crs_graph_type::buffer_device_type;
619 using exports_view_type = Kokkos::DualView<packet_type*, buffer_device_type>;
620 using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
621 using local_map_type = typename Tpetra::Map<LO, GO, NT>::local_map_type;
622 const char prefix[] = "Tpetra::Details::packCrsGraph: ";
623 constexpr bool debug = false;
624
625 local_graph_device_type local_graph = sourceGraph.getLocalGraphDevice();
626 local_map_type local_col_map = sourceGraph.getColMap()->getLocalMap();
627
628 // Setting this to zero tells the caller to expect a possibly
629 // different ("nonconstant") number of packets per local index
630 // (i.e., a possibly different number of entries per row).
632
633 const size_t num_export_lids(export_lids.extent(0));
634 TEUCHOS_TEST_FOR_EXCEPTION(num_export_lids != size_t(num_packets_per_lid.extent(0)),
635 std::invalid_argument, prefix << "num_export_lids.extent(0) = " << num_export_lids << " != num_packets_per_lid.extent(0) = " << num_packets_per_lid.extent(0) << ".");
636 if (num_export_lids != 0) {
637 TEUCHOS_TEST_FOR_EXCEPTION(num_packets_per_lid.data() == nullptr, std::invalid_argument,
638 prefix << "num_export_lids = " << num_export_lids << " != 0, but "
639 "num_packets_per_lid.data() = "
640 << num_packets_per_lid.data() << " == NULL.");
641 }
642
643 if (num_export_lids == 0) {
644 exports = exports_view_type("exports", 0);
645 return;
646 }
647
648 // Array of offsets into the pack buffer.
650
651 // Compute number of packets per LID (row to send), as well as
652 // corresponding offsets (the prefix sum of the packet counts).
653 const size_t count =
654 computeNumPacketsAndOffsets(offsets, num_packets_per_lid,
655 local_graph.row_map, export_lids, export_pids);
656
657 // Resize the output pack buffer if needed.
658 if (count > size_t(exports.extent(0))) {
659 exports = exports_view_type("exports", count);
660 if (debug) {
661 std::ostringstream os;
662 os << "*** exports resized to " << count << std::endl;
663 std::cerr << os.str();
664 }
665 }
666 if (debug) {
667 std::ostringstream os;
668 os << "*** count: " << count << ", exports.extent(0): "
669 << exports.extent(0) << std::endl;
670 std::cerr << os.str();
671 }
672
673 // If exports has nonzero length at this point, then the graph has
674 // at least one entry to pack. Thus, if packing process ranks, we
675 // had better have at least one process rank to pack.
676 TEUCHOS_TEST_FOR_EXCEPTION(pack_pids && exports.extent(0) != 0 &&
677 export_pids.extent(0) == 0,
678 std::invalid_argument, prefix << "pack_pids is true, and exports.extent(0) = " << exports.extent(0) << " != 0, meaning that we need to pack at least "
679 "one graph entry, but export_pids.extent(0) = 0.");
680
681 exports.modify_device();
682 auto exports_d = exports.view_device();
684 export_lids, export_pids, offsets, pack_pids);
685 // If we got this far, we succeeded.
686}
687
688} // namespace PackCrsGraphImpl
689
690template <typename LO, typename GO, typename NT>
692 Teuchos::Array<typename CrsGraph<LO, GO, NT>::packet_type>& exports,
693 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
694 const Teuchos::ArrayView<const LO>& exportLIDs,
695 size_t& constantNumPackets) {
696 using Kokkos::HostSpace;
697 using Kokkos::MemoryUnmanaged;
698 using Kokkos::View;
699 using crs_graph_type = CrsGraph<LO, GO, NT>;
700 using packet_type = typename crs_graph_type::packet_type;
701 using BDT = typename crs_graph_type::buffer_device_type;
702
703 // Convert all Teuchos::Array to Kokkos::View
704
705 // This is an output array, so we don't have to copy to device here.
706 // However, we'll have to remember to copy back to host when done.
710 numPacketsPerLID.getRawPtr(),
711 numPacketsPerLID.size(), false,
712 "num_packets_per_lid");
713 // This is an input array, so we have to copy to device here.
714 // However, we never need to copy it back to host.
717 exportLIDs.getRawPtr(),
718 exportLIDs.size(), true,
719 "export_lids");
721 Kokkos::DualView<packet_type*, BDT> exports_dv;
722 constexpr bool pack_pids = false;
723
724 static_assert(std::is_same<
725 typename decltype(num_packets_per_lid_d)::non_const_value_type,
726 size_t>::value,
727 "num_packets_per_lid_d's non_const_value_type should be size_t.");
728 static_assert(std::is_same<
729 typename decltype(num_packets_per_lid_d)::device_type,
730 BDT>::value,
731 "num_packets_per_lid_d's BDT should be size_t.");
732 static_assert(std::is_same<
733 typename decltype(export_lids_d)::device_type,
734 BDT>::value,
735 "export_lids_d's device_type should be BDT.");
736 static_assert(std::is_same<
737 typename decltype(export_pids_d)::non_const_value_type,
738 int>::value,
739 "export_pids_d's non_const_value_type should be int.");
740 static_assert(std::is_same<
741 typename decltype(export_pids_d)::device_type,
742 BDT>::value,
743 "export_pids_d's device_type should be BDT.");
744
745 PackCrsGraphImpl::packCrsGraph(sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
747
748 // The counts are an output of packCrsGraph, so we have to copy
749 // them back to host.
752 numPacketsPerLID.size());
753
754 // DEEP_COPY REVIEW - DEVICE-TO-HOST
755 using execution_space = typename BDT::execution_space;
756 Kokkos::deep_copy(execution_space(), num_packets_per_lid_h, num_packets_per_lid_d);
757
758 // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
759 // exports_dv above, then we have two host copies for exports_h.
760
761 // The exports are an output of packCrsGraph, so we have to
762 // copy them back to host.
763 if (static_cast<size_t>(exports.size()) !=
764 static_cast<size_t>(exports_dv.extent(0))) {
765 exports.resize(exports_dv.extent(0));
766 }
768 exports_h(exports.getRawPtr(), exports.size());
769 // DEEP_COPY REVIEW - DEVICE-TO-HOST
770 Kokkos::deep_copy(execution_space(), exports_h, exports_dv.view_device());
771 execution_space().fence();
772}
773
776template <typename LO, typename GO, typename NT>
778 const Kokkos::DualView<
779 const LO*,
780 typename CrsGraph<LO, GO, NT>::buffer_device_type>& export_lids,
781 const Kokkos::DualView<
782 const int*,
784 Kokkos::DualView<
787 Kokkos::DualView<
788 size_t*,
790 num_packets_per_lid,
791 size_t& constant_num_packets,
792 const bool pack_pids) {
793 using Kokkos::View;
794 using crs_graph_type = CrsGraph<LO, GO, NT>;
795 using BDT = typename crs_graph_type::buffer_device_type;
796 using PT = typename crs_graph_type::packet_type;
797 using exports_dual_view_type = Kokkos::DualView<PT*, BDT>;
798 using LGT = typename crs_graph_type::local_graph_device_type;
799 using LMT = typename crs_graph_type::map_type::local_map_type;
800 const char prefix[] = "Tpetra::Details::packCrsGraphNew: ";
801
802 const LGT local_graph = sourceGraph.getLocalGraphDevice();
803 const LMT local_col_map = sourceGraph.getColMap()->getLocalMap();
804
805 // Setting this to zero tells the caller to expect a possibly
806 // different ("nonconstant") number of packets per local index
807 // (i.e., a possibly different number of entries per row).
809
810 const size_t num_export_lids =
811 static_cast<size_t>(export_lids.extent(0));
813 static_cast<size_t>(num_packets_per_lid.extent(0)),
814 std::invalid_argument, prefix << "num_export_lids.extent(0) = " << num_export_lids << " != num_packets_per_lid.extent(0) = " << num_packets_per_lid.extent(0) << ".");
816 num_packets_per_lid.view_device().data() == nullptr,
817 std::invalid_argument, prefix << "num_export_lids = " << num_export_lids << " != 0, but num_packets_per_lid.view_device().data() = nullptr.");
818
819 if (num_export_lids == 0) {
820 exports = exports_dual_view_type();
821 return;
822 }
823
824 // Array of offsets into the pack buffer.
825 using offsets_type = Kokkos::View<size_t*, BDT>;
826 offsets_type offsets("offsets", num_export_lids + 1);
827
828 // Compute number of packets per LID (row to send), as well as
829 // corresponding offsets (the prefix sum of the packet counts).
830 num_packets_per_lid.clear_sync_state();
831 num_packets_per_lid.modify_device();
832 using PackCrsGraphImpl::computeNumPacketsAndOffsets;
833 const size_t count =
834 computeNumPacketsAndOffsets(offsets, num_packets_per_lid.view_device(),
835 local_graph.row_map,
836 export_lids.view_device(),
837 export_pids.view_device());
838
839 // Resize the output pack buffer if needed.
840 if (count > static_cast<size_t>(exports.extent(0))) {
841 exports = exports_dual_view_type("exports", count);
842 }
843
844 // If exports has nonzero length at this point, then the graph has
845 // at least one entry to pack. Thus, if packing process ranks, we
846 // had better have at least one process rank to pack.
847 TEUCHOS_TEST_FOR_EXCEPTION(pack_pids && exports.extent(0) != 0 &&
848 export_pids.extent(0) == 0,
849 std::invalid_argument, prefix << "pack_pids is true, and exports.extent(0) = " << exports.extent(0) << " != 0, meaning that we need to pack at least "
850 "one graph entry, but export_pids.extent(0) = 0.");
851
852 exports.modify_device();
853 using PackCrsGraphImpl::do_pack;
854 do_pack<PT, LGT, LMT, BDT>(local_graph, local_col_map,
855 exports.view_device(),
856 num_packets_per_lid.view_device(),
857 export_lids.view_device(),
858 export_pids.view_device(),
859 offsets, pack_pids);
860}
861
862template <typename LO, typename GO, typename NT>
864 Kokkos::DualView<
867 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
868 const Teuchos::ArrayView<const LO>& exportLIDs,
869 const Teuchos::ArrayView<const int>& sourcePIDs,
870 size_t& constantNumPackets) {
871 using Kokkos::HostSpace;
872 using Kokkos::MemoryUnmanaged;
873 using Kokkos::View;
874 using crs_graph_type = CrsGraph<LO, GO, NT>;
875 using buffer_device_type = typename crs_graph_type::buffer_device_type;
876
877 // Convert all Teuchos::Array to Kokkos::View
878
879 // This is an output array, so we don't have to copy to device here.
880 // However, we'll have to remember to copy back to host when done.
882 create_mirror_view_from_raw_host_array(buffer_device_type(),
883 numPacketsPerLID.getRawPtr(),
884 numPacketsPerLID.size(), false,
885 "num_packets_per_lid");
886
887 // This is an input array, so we have to copy to device here.
888 // However, we never need to copy it back to host.
890 create_mirror_view_from_raw_host_array(buffer_device_type(),
891 exportLIDs.getRawPtr(),
892 exportLIDs.size(), true,
893 "export_lids");
894 // This is an input array, so we have to copy to device here.
895 // However, we never need to copy it back to host.
897 create_mirror_view_from_raw_host_array(buffer_device_type(),
898 sourcePIDs.getRawPtr(),
899 sourcePIDs.size(), true,
900 "export_pids");
901 constexpr bool pack_pids = true;
902 PackCrsGraphImpl::packCrsGraph(sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
904
905 // The counts are an output of packCrsGraph, so we
906 // have to copy them back to host.
908 // DEEP_COPY REVIEW - DEVICE-TO-HOST
909 using execution_space = typename buffer_device_type::execution_space;
910 Kokkos::deep_copy(execution_space(),
912 execution_space().fence();
913}
914
915} // namespace Details
916} // namespace Tpetra
917
918#define TPETRA_DETAILS_PACKCRSGRAPH_INSTANT(LO, GO, NT) \
919 template void \
920 Details::packCrsGraph<LO, GO, NT>( \
921 const CrsGraph<LO, GO, NT>&, \
922 Teuchos::Array<CrsGraph<LO, GO, NT>::packet_type>&, \
923 const Teuchos::ArrayView<size_t>&, \
924 const Teuchos::ArrayView<const LO>&, \
925 size_t&); \
926 template void \
927 Details::packCrsGraphNew<LO, GO, NT>( \
928 const CrsGraph<LO, GO, NT>&, \
929 const Kokkos::DualView< \
930 const LO*, \
931 CrsGraph<LO, GO, NT>::buffer_device_type>&, \
932 const Kokkos::DualView< \
933 const int*, \
934 CrsGraph<LO, GO, NT>::buffer_device_type>&, \
935 Kokkos::DualView< \
936 CrsGraph<LO, GO, NT>::packet_type*, \
937 CrsGraph<LO, GO, NT>::buffer_device_type>&, \
938 Kokkos::DualView< \
939 size_t*, \
940 CrsGraph<LO, GO, NT>::buffer_device_type>, \
941 size_t&, \
942 const bool); \
943 template void \
944 Details::packCrsGraphWithOwningPIDs<LO, GO, NT>( \
945 const CrsGraph<LO, GO, NT>&, \
946 Kokkos::DualView<CrsGraph<LO, GO, NT>::packet_type*, CrsGraph<LO, GO, NT>::buffer_device_type>&, \
947 const Teuchos::ArrayView<size_t>&, \
948 const Teuchos::ArrayView<const LO>&, \
949 const Teuchos::ArrayView<const int>&, \
950 size_t&);
951
952#endif // TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
Declaration of the Tpetra::CrsGraph class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
CountsViewType::non_const_value_type computeNumPacketsAndOffsets(const OutputOffsetsViewType &outputOffsets, const CountsViewType &counts, const InputOffsetsViewType &rowOffsets, const InputLocalRowIndicesViewType &lclRowInds, const InputLocalRowPidsViewType &lclRowPids)
Compute the number of packets and offsets for the pack procedure.
void do_pack(const LocalGraph &local_graph, const LocalMap &local_map, const Kokkos::View< Packet *, BufferDeviceType > &exports, const typename PackTraits< size_t >::input_array_type &num_packets_per_lid, const typename PackTraits< typename LocalMap::local_ordinal_type >::input_array_type &export_lids, const typename PackTraits< int >::input_array_type &source_pids, const Kokkos::View< const size_t *, BufferDeviceType > &offsets, const bool pack_pids)
Perform the pack operation for the graph.
KOKKOS_FUNCTION size_t packRow(const LocalMapType &col_map, const Kokkos::View< Packet *, BufferDeviceType > &exports, const InputLidsType &lids_in, const InputPidsType &pids_in, const size_t offset, const size_t num_ent, const bool pack_pids)
Packs a single row of the CrsGraph.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
Struct that holds views of the contents of a CrsMatrix.
"Local" part of Map suitable for Kokkos kernels.
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
Compute the number of packets and offsets for the pack procedure.
Implementation details of Tpetra.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Traits class for packing / unpacking data of type T.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.