Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_CRSGRAPH_DEF_HPP
11#define TPETRA_CRSGRAPH_DEF_HPP
12
15
16#include <memory>
18#ifdef KOKKOS_ENABLE_SYCL
19#include <sycl/sycl.hpp>
20#endif
21
26#include "Tpetra_Details_getGraphDiagOffsets.hpp"
27#include "Tpetra_Details_getGraphOffRankOffsets.hpp"
28#include "Tpetra_Details_makeColMap.hpp"
32#include "Tpetra_Distributor.hpp"
33#include "Teuchos_SerialDenseMatrix.hpp"
34#include "Tpetra_Vector.hpp"
37#include "Tpetra_Details_packCrsGraph.hpp"
38#include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
39#include "Tpetra_Details_CrsPadding.hpp"
40#include "Tpetra_Util.hpp"
41#include <algorithm>
42#include <limits>
43#include <map>
44#include <sstream>
45#include <string>
46#include <type_traits>
47#include <utility>
48#include <vector>
49
50namespace Tpetra {
51namespace Details {
52namespace Impl {
53
54template <class MapIter>
55void verbosePrintMap(std::ostream& out,
56 MapIter beg,
57 MapIter end,
58 const size_t numEnt,
59 const char mapName[]) {
60 using ::Tpetra::Details::Behavior;
61 using ::Tpetra::Details::verbosePrintArray;
62
63 out << mapName << ": {";
64 const size_t maxNumToPrint =
66 if (maxNumToPrint == 0) {
67 if (numEnt != 0) {
68 out << "...";
69 }
70 } else {
71 const size_t numToPrint = numEnt > maxNumToPrint ? maxNumToPrint : numEnt;
72 size_t count = 0;
73 for (MapIter it = beg; it != end; ++it) {
74 out << "(" << (*it).first << ", ";
75 verbosePrintArray(out, (*it).second, "gblColInds",
76 maxNumToPrint);
77 out << ")";
78 if (count + size_t(1) < numToPrint) {
79 out << ", ";
80 }
81 ++count;
82 }
83 if (count < numEnt) {
84 out << ", ...";
85 }
86 }
87 out << "}";
88}
89
90template <class LO, class GO, class Node>
91Teuchos::ArrayView<GO>
92getRowGraphGlobalRow(
93 std::vector<GO>& gblColIndsStorage,
94 const RowGraph<LO, GO, Node>& graph,
95 const GO gblRowInd) {
96 size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
97 if (gblColIndsStorage.size() < origNumEnt) {
98 gblColIndsStorage.resize(origNumEnt);
99 }
100 typename CrsGraph<LO, GO, Node>::nonconst_global_inds_host_view_type gblColInds(gblColIndsStorage.data(),
101 origNumEnt);
102 graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
103 Teuchos::ArrayView<GO> retval(gblColIndsStorage.data(), origNumEnt);
104 return retval;
105}
106
107template <class LO, class GO, class DT, class OffsetType, class NumEntType>
108class ConvertColumnIndicesFromGlobalToLocal {
109 public:
110 ConvertColumnIndicesFromGlobalToLocal(const ::Kokkos::View<LO*, DT>& lclColInds,
111 const ::Kokkos::View<const GO*, DT>& gblColInds,
112 const ::Kokkos::View<const OffsetType*, DT>& ptr,
113 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
114 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
115 : lclColInds_(lclColInds)
116 , gblColInds_(gblColInds)
117 , ptr_(ptr)
118 , lclColMap_(lclColMap)
119 , numRowEnt_(numRowEnt) {}
120
121 KOKKOS_FUNCTION void
122 operator()(const LO& lclRow, OffsetType& curNumBad) const {
123 const OffsetType offset = ptr_(lclRow);
124 // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
125 // of entries in a row to LO, as long as the row doesn't have
126 // too many duplicate entries.
127 const LO numEnt = static_cast<LO>(numRowEnt_(lclRow));
128 for (LO j = 0; j < numEnt; ++j) {
129 const GO gid = gblColInds_(offset + j);
130 const LO lid = lclColMap_.getLocalElement(gid);
131 lclColInds_(offset + j) = lid;
132 if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid()) {
133 ++curNumBad;
134 }
135 }
136 }
137
138 static OffsetType
139 run(const ::Kokkos::View<LO*, DT>& lclColInds,
140 const ::Kokkos::View<const GO*, DT>& gblColInds,
141 const ::Kokkos::View<const OffsetType*, DT>& ptr,
142 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
143 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) {
144 typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
145 typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
146
147 const LO lclNumRows = ptr.extent(0) == 0 ? static_cast<LO>(0) : static_cast<LO>(ptr.extent(0) - 1);
148 OffsetType numBad = 0;
149 // Count of "bad" column indices is a reduction over rows.
150 ::Kokkos::parallel_reduce(range_type(0, lclNumRows),
151 functor_type(lclColInds, gblColInds, ptr,
152 lclColMap, numRowEnt),
153 numBad);
154 return numBad;
155 }
156
157 private:
158 ::Kokkos::View<LO*, DT> lclColInds_;
159 ::Kokkos::View<const GO*, DT> gblColInds_;
160 ::Kokkos::View<const OffsetType*, DT> ptr_;
162 ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
163};
164
165} // namespace Impl
166
181template <class LO, class GO, class DT, class OffsetType, class NumEntType>
182OffsetType
184 const Kokkos::View<const GO*, DT>& gblColInds,
185 const Kokkos::View<const OffsetType*, DT>& ptr,
187 const Kokkos::View<const NumEntType*, DT>& numRowEnt) {
188 using Impl::ConvertColumnIndicesFromGlobalToLocal;
190 return impl_type::run(lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
191}
192
193template <class ViewType, class LO>
194class MaxDifference {
195 public:
196 MaxDifference(const ViewType& ptr)
197 : ptr_(ptr) {}
198
199 KOKKOS_INLINE_FUNCTION void init(LO& dst) const {
200 dst = 0;
201 }
202
203 KOKKOS_INLINE_FUNCTION void
204 join(LO& dst, const LO& src) const {
205 dst = (src > dst) ? src : dst;
206 }
207
208 KOKKOS_INLINE_FUNCTION void
209 operator()(const LO lclRow, LO& maxNumEnt) const {
210 const LO numEnt = static_cast<LO>(ptr_(lclRow + 1) - ptr_(lclRow));
211 maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
212 }
213
214 private:
215 typename ViewType::const_type ptr_;
216};
217
218template <class ViewType, class LO>
219typename ViewType::non_const_value_type
220maxDifference(const char kernelLabel[],
221 const ViewType& ptr,
222 const LO lclNumRows) {
223 if (lclNumRows == 0) {
224 // mfh 07 May 2018: Weirdly, I need this special case,
225 // otherwise I get the wrong answer.
226 return static_cast<LO>(0);
227 } else {
228 using execution_space = typename ViewType::execution_space;
229 using range_type = Kokkos::RangePolicy<execution_space, LO>;
230 LO theMaxNumEnt{0};
231 Kokkos::parallel_reduce(kernelLabel,
232 range_type(0, lclNumRows),
233 MaxDifference<ViewType, LO>(ptr),
234 theMaxNumEnt);
235 return theMaxNumEnt;
236 }
237}
238
239} // namespace Details
240
241template <class LocalOrdinal, class GlobalOrdinal, class Node>
242bool CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
243 getDebug() {
244 return Details::Behavior::debug("CrsGraph");
245}
246
247template <class LocalOrdinal, class GlobalOrdinal, class Node>
248bool CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
249 getVerbose() {
250 return Details::Behavior::verbose("CrsGraph");
251}
252
253template <class LocalOrdinal, class GlobalOrdinal, class Node>
255 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
256 const size_t maxNumEntriesPerRow,
257 const Teuchos::RCP<Teuchos::ParameterList>& params)
258 : dist_object_type(rowMap)
259 , rowMap_(rowMap)
260 , numAllocForAllRows_(maxNumEntriesPerRow) {
261 const char tfecfFuncName[] =
262 "CrsGraph(rowMap,maxNumEntriesPerRow,params): ";
263 staticAssertions();
264 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid(),
265 std::invalid_argument,
266 "The allocation hint maxNumEntriesPerRow must be "
267 "a valid size_t value, which in this case means it must not be "
268 "Teuchos::OrdinalTraits<size_t>::invalid().");
271}
272
273template <class LocalOrdinal, class GlobalOrdinal, class Node>
275 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
276 const Teuchos::RCP<const map_type>& colMap,
277 const size_t maxNumEntriesPerRow,
278 const Teuchos::RCP<Teuchos::ParameterList>& params)
279 : dist_object_type(rowMap)
280 , rowMap_(rowMap)
281 , colMap_(colMap)
282 , numAllocForAllRows_(maxNumEntriesPerRow) {
283 const char tfecfFuncName[] =
284 "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,params): ";
285 staticAssertions();
287 maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid(),
288 std::invalid_argument,
289 "The allocation hint maxNumEntriesPerRow must be "
290 "a valid size_t value, which in this case means it must not be "
291 "Teuchos::OrdinalTraits<size_t>::invalid().");
294}
295
296template <class LocalOrdinal, class GlobalOrdinal, class Node>
298 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
299 const Teuchos::ArrayView<const size_t>& numEntPerRow,
300 const Teuchos::RCP<Teuchos::ParameterList>& params)
301 : dist_object_type(rowMap)
302 , rowMap_(rowMap)
303 , numAllocForAllRows_(0) {
304 const char tfecfFuncName[] =
305 "CrsGraph(rowMap,numEntPerRow,params): ";
306 staticAssertions();
307
308 const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
310 static_cast<size_t>(numEntPerRow.size()) != lclNumRows,
311 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size() << " != the local number of rows " << lclNumRows << " as specified by "
312 "the input row Map.");
313
314 if (debug_) {
315 for (size_t r = 0; r < lclNumRows; ++r) {
316 const size_t curRowCount = numEntPerRow[r];
317 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
318 std::invalid_argument, "numEntPerRow(" << r << ") "
319 "specifies an invalid number of entries "
320 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
321 }
322 }
323
324 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
325 // The latter is a const View, so we have to copy into a nonconst
326 // View first, then assign.
327 typedef decltype(k_numAllocPerRow_) out_view_type;
328 typedef typename out_view_type::non_const_type nc_view_type;
329 typedef Kokkos::View<const size_t*,
330 typename nc_view_type::array_layout,
331 Kokkos::HostSpace,
332 Kokkos::MemoryUnmanaged>
335 nc_view_type numAllocPerRowOut("Tpetra::CrsGraph::numAllocPerRow",
336 lclNumRows);
337 // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
338 using exec_space = typename nc_view_type::execution_space;
339 Kokkos::deep_copy(exec_space(), numAllocPerRowOut, numAllocPerRowIn);
341
344}
346template <class LocalOrdinal, class GlobalOrdinal, class Node>
348 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
349 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
350 const Teuchos::RCP<Teuchos::ParameterList>& params)
351 : dist_object_type(rowMap)
352 , rowMap_(rowMap)
353 , k_numAllocPerRow_(numEntPerRow.view_host())
354 , numAllocForAllRows_(0) {
355 const char tfecfFuncName[] =
356 "CrsGraph(rowMap,numEntPerRow,params): ";
357 staticAssertions();
358
359 const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
361 static_cast<size_t>(numEntPerRow.extent(0)) != lclNumRows,
362 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.extent(0) << " != the local number of rows " << lclNumRows << " as specified by "
363 "the input row Map.");
365 if (debug_) {
366 for (size_t r = 0; r < lclNumRows; ++r) {
367 const size_t curRowCount = numEntPerRow.view_host()(r);
368 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
369 std::invalid_argument, "numEntPerRow(" << r << ") "
370 "specifies an invalid number of entries "
371 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
372 }
373 }
374
377}
378
379template <class LocalOrdinal, class GlobalOrdinal, class Node>
381 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
382 const Teuchos::RCP<const map_type>& colMap,
383 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
384 const Teuchos::RCP<Teuchos::ParameterList>& params)
385 : dist_object_type(rowMap)
386 , rowMap_(rowMap)
387 , colMap_(colMap)
388 , k_numAllocPerRow_(numEntPerRow.view_host())
389 , numAllocForAllRows_(0) {
390 const char tfecfFuncName[] =
391 "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
392 staticAssertions();
393
394 const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
396 static_cast<size_t>(numEntPerRow.extent(0)) != lclNumRows,
397 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.extent(0) << " != the local number of rows " << lclNumRows << " as specified by "
398 "the input row Map.");
399
400 if (debug_) {
401 for (size_t r = 0; r < lclNumRows; ++r) {
402 const size_t curRowCount = numEntPerRow.view_host()(r);
403 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
404 std::invalid_argument, "numEntPerRow(" << r << ") "
405 "specifies an invalid number of entries "
406 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
407 }
408 }
409
410 resumeFill(params);
411 checkInternalState();
412}
413
414template <class LocalOrdinal, class GlobalOrdinal, class Node>
416 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
417 const Teuchos::RCP<const map_type>& colMap,
418 const Teuchos::ArrayView<const size_t>& numEntPerRow,
419 const Teuchos::RCP<Teuchos::ParameterList>& params)
420 : dist_object_type(rowMap)
421 , rowMap_(rowMap)
422 , colMap_(colMap)
423 , numAllocForAllRows_(0) {
424 const char tfecfFuncName[] =
425 "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
426 staticAssertions();
428 const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
430 static_cast<size_t>(numEntPerRow.size()) != lclNumRows,
431 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size() << " != the local number of rows " << lclNumRows << " as specified by "
432 "the input row Map.");
433
434 if (debug_) {
435 for (size_t r = 0; r < lclNumRows; ++r) {
436 const size_t curRowCount = numEntPerRow[r];
437 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
438 std::invalid_argument, "numEntPerRow(" << r << ") "
439 "specifies an invalid number of entries "
440 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
441 }
442 }
443
444 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
445 // The latter is a const View, so we have to copy into a nonconst
446 // View first, then assign.
447 typedef decltype(k_numAllocPerRow_) out_view_type;
448 typedef typename out_view_type::non_const_type nc_view_type;
449 typedef Kokkos::View<const size_t*,
450 typename nc_view_type::array_layout,
451 Kokkos::HostSpace,
452 Kokkos::MemoryUnmanaged>
453 in_view_type;
454 in_view_type numAllocPerRowIn(numEntPerRow.getRawPtr(), lclNumRows);
455 nc_view_type numAllocPerRowOut("Tpetra::CrsGraph::numAllocPerRow",
456 lclNumRows);
457 // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
458 using exec_space = typename nc_view_type::execution_space;
459 Kokkos::deep_copy(exec_space(), numAllocPerRowOut, numAllocPerRowIn);
461
464}
465
466template <class LocalOrdinal, class GlobalOrdinal, class Node>
469 const Teuchos::RCP<const map_type>& rowMap,
470 const Teuchos::RCP<Teuchos::ParameterList>& params)
471 : dist_object_type(rowMap)
472 , rowMap_(rowMap)
473 , colMap_(originalGraph.colMap_)
474 , numAllocForAllRows_(originalGraph.numAllocForAllRows_)
475 , storageStatus_(originalGraph.storageStatus_)
476 , indicesAreAllocated_(originalGraph.indicesAreAllocated_)
477 , indicesAreLocal_(originalGraph.indicesAreLocal_)
478 , indicesAreSorted_(originalGraph.indicesAreSorted_) {
479 staticAssertions();
480
481 int numRows = rowMap->getLocalNumElements();
482 size_t numNonZeros = originalGraph.getRowPtrsPackedHost()(numRows);
483 auto rowsToUse = Kokkos::pair<size_t, size_t>(0, numRows + 1);
484
485 this->setRowPtrsUnpacked(Kokkos::subview(originalGraph.getRowPtrsUnpackedDevice(), rowsToUse));
486 this->setRowPtrsPacked(Kokkos::subview(originalGraph.getRowPtrsPackedDevice(), rowsToUse));
487
488 if (indicesAreLocal_) {
491 } else {
492 gblInds_wdv = global_inds_wdv_type(originalGraph.gblInds_wdv, 0, numNonZeros);
493 }
494
496}
497
498template <class LocalOrdinal, class GlobalOrdinal, class Node>
500 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
501 const Teuchos::RCP<const map_type>& colMap,
502 const typename local_graph_device_type::row_map_type& rowPointers,
503 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
504 const Teuchos::RCP<Teuchos::ParameterList>& params)
505 : dist_object_type(rowMap)
506 , rowMap_(rowMap)
507 , colMap_(colMap)
508 , numAllocForAllRows_(0)
509 , storageStatus_(Details::STORAGE_1D_PACKED)
510 , indicesAreAllocated_(true)
511 , indicesAreLocal_(true) {
512 staticAssertions();
513 if (!params.is_null() && params->isParameter("sorted") &&
514 !params->get<bool>("sorted")) {
515 indicesAreSorted_ = false;
516 } else {
517 indicesAreSorted_ = true;
518 }
519 setAllIndices(rowPointers, columnIndices);
521}
522
523template <class LocalOrdinal, class GlobalOrdinal, class Node>
525 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
526 const Teuchos::RCP<const map_type>& colMap,
527 const Teuchos::ArrayRCP<size_t>& rowPointers,
528 const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices,
529 const Teuchos::RCP<Teuchos::ParameterList>& params)
530 : dist_object_type(rowMap)
531 , rowMap_(rowMap)
532 , colMap_(colMap)
533 , numAllocForAllRows_(0)
534 , storageStatus_(Details::STORAGE_1D_PACKED)
535 , indicesAreAllocated_(true)
536 , indicesAreLocal_(true) {
537 staticAssertions();
538 if (!params.is_null() && params->isParameter("sorted") &&
539 !params->get<bool>("sorted")) {
540 indicesAreSorted_ = false;
541 } else {
542 indicesAreSorted_ = true;
543 }
544 setAllIndices(rowPointers, columnIndices);
545 checkInternalState();
546}
547
548template <class LocalOrdinal, class GlobalOrdinal, class Node>
550 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
551 const Teuchos::RCP<const map_type>& colMap,
553 const Teuchos::RCP<Teuchos::ParameterList>& params)
555 rowMap,
556 colMap,
557 Teuchos::null,
558 Teuchos::null,
559 params) {}
560
561template <class LocalOrdinal, class GlobalOrdinal, class Node>
564 const Teuchos::RCP<const map_type>& rowMap,
565 const Teuchos::RCP<const map_type>& colMap,
566 const Teuchos::RCP<const map_type>& domainMap,
567 const Teuchos::RCP<const map_type>& rangeMap,
568 const Teuchos::RCP<Teuchos::ParameterList>& params)
570 , rowMap_(rowMap)
571 , colMap_(colMap)
572 , numAllocForAllRows_(0)
573 , storageStatus_(Details::STORAGE_1D_PACKED)
574 , indicesAreAllocated_(true)
575 , indicesAreLocal_(true) {
576 staticAssertions();
577 const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
578
580 colMap.is_null(), std::runtime_error,
581 ": The input column Map must be nonnull.");
583 k_local_graph_.numRows() != rowMap->getLocalNumElements(),
584 std::runtime_error,
585 ": The input row Map and the input local graph need to have the same "
586 "number of rows. The row Map claims "
587 << rowMap->getLocalNumElements()
588 << " row(s), but the local graph claims " << k_local_graph_.numRows()
589 << " row(s).");
590
591 // NOTE (mfh 17 Mar 2014) getLocalNumRows() returns
592 // rowMap_->getLocalNumElements(), but it doesn't have to.
593 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
594 // k_local_graph_.numRows () != getLocalNumRows (), std::runtime_error,
595 // ": The input row Map and the input local graph need to have the same "
596 // "number of rows. The row Map claims " << getLocalNumRows () << " row(s), "
597 // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
599 lclIndsUnpacked_wdv.extent(0) != 0 || gblInds_wdv.extent(0) != 0, std::logic_error,
600 ": cannot have 1D data structures allocated.");
601
602 if (!params.is_null() && params->isParameter("sorted") &&
603 !params->get<bool>("sorted")) {
604 indicesAreSorted_ = false;
605 } else {
606 indicesAreSorted_ = true;
607 }
608
609 setDomainRangeMaps(domainMap.is_null() ? rowMap_ : domainMap,
610 rangeMap.is_null() ? rowMap_ : rangeMap);
611 Teuchos::Array<int> remotePIDs(0); // unused output argument
612 this->makeImportExport(remotePIDs, false);
613
614 lclIndsPacked_wdv = local_inds_wdv_type(k_local_graph_.entries);
616 this->setRowPtrs(k_local_graph_.row_map);
617
618 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
619
620 const bool callComputeGlobalConstants = params.get() == nullptr ||
621 params->get("compute global constants", true);
622
623 if (callComputeGlobalConstants) {
625 }
626 this->fillComplete_ = true;
627 this->checkInternalState();
628}
629
630template <class LocalOrdinal, class GlobalOrdinal, class Node>
633 const Teuchos::RCP<const map_type>& rowMap,
634 const Teuchos::RCP<const map_type>& colMap,
635 const Teuchos::RCP<const map_type>& domainMap,
636 const Teuchos::RCP<const map_type>& rangeMap,
637 const Teuchos::RCP<const import_type>& importer,
638 const Teuchos::RCP<const export_type>& exporter,
639 const Teuchos::RCP<Teuchos::ParameterList>& params)
641 , rowMap_(rowMap)
642 , colMap_(colMap)
643 , rangeMap_(rangeMap.is_null() ? rowMap : rangeMap)
644 , domainMap_(domainMap.is_null() ? rowMap : domainMap)
645 , importer_(importer)
646 , exporter_(exporter)
647 , numAllocForAllRows_(0)
648 , storageStatus_(Details::STORAGE_1D_PACKED)
649 , indicesAreAllocated_(true)
650 , indicesAreLocal_(true) {
651 staticAssertions();
652 const char tfecfFuncName[] =
653 "Tpetra::CrsGraph(local_graph_device_type,"
654 "Map,Map,Map,Map,Import,Export,params): ";
655
656 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(colMap.is_null(), std::runtime_error,
657 "The input column Map must be nonnull.");
658
661 setRowPtrs(lclGraph.row_map);
662
663 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
664
665 if (!params.is_null() && params->isParameter("sorted") &&
666 !params->get<bool>("sorted")) {
667 indicesAreSorted_ = false;
668 } else {
669 indicesAreSorted_ = true;
670 }
671
673 params.get() == nullptr ||
674 params->get("compute global constants", true);
677 }
678 fillComplete_ = true;
680}
681
682template <class LocalOrdinal, class GlobalOrdinal, class Node>
684 CrsGraph(const row_ptrs_device_view_type& rowPointers,
686 const Teuchos::RCP<const map_type>& rowMap,
687 const Teuchos::RCP<const map_type>& colMap,
688 const Teuchos::RCP<const map_type>& domainMap,
689 const Teuchos::RCP<const map_type>& rangeMap,
690 const Teuchos::RCP<const import_type>& importer,
691 const Teuchos::RCP<const export_type>& exporter,
692 const Teuchos::RCP<Teuchos::ParameterList>& params)
694 , rowMap_(rowMap)
695 , colMap_(colMap)
696 , rangeMap_(rangeMap.is_null() ? rowMap : rangeMap)
697 , domainMap_(domainMap.is_null() ? rowMap : domainMap)
698 , importer_(importer)
699 , exporter_(exporter)
700 , numAllocForAllRows_(0)
701 , storageStatus_(Details::STORAGE_1D_PACKED)
702 , indicesAreAllocated_(true)
703 , indicesAreLocal_(true) {
704 staticAssertions();
705 const char tfecfFuncName[] =
706 "Tpetra::CrsGraph(row_ptrs_device_view_type,local_inds_wdv_type"
707 "Map,Map,Map,Map,Import,Export,params): ";
708
709 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(colMap.is_null(), std::runtime_error,
710 "The input column Map must be nonnull.");
711
712 lclIndsPacked_wdv = columnIndices;
713 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
714 setRowPtrs(rowPointers);
715
716 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
717
718 if (!params.is_null() && params->isParameter("sorted") &&
719 !params->get<bool>("sorted")) {
720 indicesAreSorted_ = false;
721 } else {
722 indicesAreSorted_ = true;
723 }
724
725 const bool callComputeGlobalConstants =
726 params.get() == nullptr ||
727 params->get("compute global constants", true);
728 if (callComputeGlobalConstants) {
729 this->computeGlobalConstants();
731 fillComplete_ = true;
732 checkInternalState();
733}
734
735template <class LocalOrdinal, class GlobalOrdinal, class Node>
736Teuchos::RCP<const Teuchos::ParameterList>
738 getValidParameters() const {
739 using Teuchos::ParameterList;
740 using Teuchos::parameterList;
741 using Teuchos::RCP;
742
743 RCP<ParameterList> params = parameterList("Tpetra::CrsGraph");
744
745 // Make a sublist for the Import.
747
748 // FIXME (mfh 02 Apr 2012) We should really have the Import and
749 // Export objects fill in these lists. However, we don't want to
750 // create an Import or Export unless we need them. For now, we
751 // know that the Import and Export just pass the list directly to
752 // their Distributor, so we can create a Distributor here
753 // (Distributor's constructor is a lightweight operation) and have
754 // it fill in the list.
755
756 // Fill in Distributor default parameters by creating a
757 // Distributor and asking it to do the work.
758 Distributor distributor(rowMap_->getComm(), importSublist);
759 params->set("Import", *importSublist, "How the Import performs communication.");
760
761 // Make a sublist for the Export. For now, it's a clone of the
762 // Import sublist. It's not a shallow copy, though, since we
763 // might like the Import to do communication differently than the
764 // Export.
765 params->set("Export", *importSublist, "How the Export performs communication.");
766
767 return params;
768}
769
770template <class LocalOrdinal, class GlobalOrdinal, class Node>
772 setParameterList(const Teuchos::RCP<Teuchos::ParameterList>& params) {
773 Teuchos::RCP<const Teuchos::ParameterList> validParams =
774 getValidParameters();
775 params->validateParametersAndSetDefaults(*validParams);
776 this->setMyParamList(params);
777}
778
779template <class LocalOrdinal, class GlobalOrdinal, class Node>
782 getGlobalNumRows() const {
783 return rowMap_->getGlobalNumElements();
784}
785
786template <class LocalOrdinal, class GlobalOrdinal, class Node>
789 getGlobalNumCols() const {
790 const char tfecfFuncName[] = "getGlobalNumCols: ";
792 !isFillComplete() || getDomainMap().is_null(), std::runtime_error,
793 "The graph does not have a domain Map. You may not call this method in "
794 "that case.");
795 return getDomainMap()->getGlobalNumElements();
796}
797
798template <class LocalOrdinal, class GlobalOrdinal, class Node>
799size_t
801 getLocalNumRows() const {
802 return this->rowMap_.is_null() ? static_cast<size_t>(0) : this->rowMap_->getLocalNumElements();
803}
804
805template <class LocalOrdinal, class GlobalOrdinal, class Node>
806size_t
808 getLocalNumCols() const {
809 const char tfecfFuncName[] = "getLocalNumCols: ";
811 !hasColMap(), std::runtime_error,
812 "The graph does not have a column Map. You may not call this method "
813 "unless the graph has a column Map. This requires either that a custom "
814 "column Map was given to the constructor, or that fillComplete() has "
815 "been called.");
816 return colMap_.is_null() ? static_cast<size_t>(0) : colMap_->getLocalNumElements();
817}
818
819template <class LocalOrdinal, class GlobalOrdinal, class Node>
820Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
822 getRowMap() const {
823 return rowMap_;
824}
825
826template <class LocalOrdinal, class GlobalOrdinal, class Node>
827Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
829 getColMap() const {
830 return colMap_;
831}
832
833template <class LocalOrdinal, class GlobalOrdinal, class Node>
834Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
836 getDomainMap() const {
837 return domainMap_;
838}
839
840template <class LocalOrdinal, class GlobalOrdinal, class Node>
841Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
843 getRangeMap() const {
844 return rangeMap_;
845}
846
847template <class LocalOrdinal, class GlobalOrdinal, class Node>
848Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
850 getImporter() const {
851 return importer_;
852}
853
854template <class LocalOrdinal, class GlobalOrdinal, class Node>
855Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
857 getExporter() const {
858 return exporter_;
859}
860
861template <class LocalOrdinal, class GlobalOrdinal, class Node>
863 hasColMap() const {
864 return !colMap_.is_null();
865}
866
867template <class LocalOrdinal, class GlobalOrdinal, class Node>
869 isStorageOptimized() const {
870 // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
871 // getLocalNumRows() is zero?
872
873 const bool isOpt = indicesAreAllocated_ &&
874 k_numRowEntries_.extent(0) == 0 &&
875 getLocalNumRows() > 0;
876
877 return isOpt;
878}
879
880template <class LocalOrdinal, class GlobalOrdinal, class Node>
883 getGlobalNumEntries() const {
884 const char tfecfFuncName[] = "getGlobalNumEntries: ";
885 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->haveGlobalConstants_, std::logic_error,
886 "The graph does not have global constants computed, "
887 "but the user has requested them.");
888
889 return globalNumEntries_;
890}
891
892template <class LocalOrdinal, class GlobalOrdinal, class Node>
893size_t
895 getLocalNumEntries() const {
896 const char tfecfFuncName[] = "getLocalNumEntries: ";
897 typedef LocalOrdinal LO;
898
899 Details::ProfilingRegion regionGLNE("Tpetra::CrsGraph::getLocalNumEntries");
900
901 if (this->indicesAreAllocated_) {
902 const LO lclNumRows = this->getLocalNumRows();
903 if (lclNumRows == 0) {
904 return static_cast<size_t>(0);
905 } else {
906 // Avoid the "*this capture" issue by creating a local Kokkos::View.
907 auto numEntPerRow = this->k_numRowEntries_;
908 const LO numNumEntPerRow = numEntPerRow.extent(0);
909 if (numNumEntPerRow == 0) {
910 if (static_cast<LO>(this->getRowPtrsPackedDevice().extent(0)) <
911 static_cast<LO>(lclNumRows + 1)) {
912 return static_cast<size_t>(0);
913 } else {
914 // indices are allocated and k_numRowEntries_ is not allocated,
915 // so we have packed storage and the length of lclIndsPacked_wdv
916 // must be the number of local entries.
917 if (debug_) {
918 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->getRowPtrsPackedHost()(lclNumRows) != lclIndsPacked_wdv.extent(0), std::logic_error,
919 "Final entry of packed host rowptrs doesn't match the length of lclIndsPacked");
921 return lclIndsPacked_wdv.extent(0);
922 }
923 } else { // k_numRowEntries_ is populated
924 // k_numRowEntries_ is actually be a host View, so we run
925 // the sum in its native execution space. This also means
926 // that we can use explicit capture (which could perhaps
927 // improve build time) instead of KOKKOS_LAMBDA, and avoid
928 // any CUDA build issues with trying to run a __device__ -
929 // only function on host.
930 typedef typename num_row_entries_type::execution_space
931 host_exec_space;
932 typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
933
935 size_t nodeNumEnt = 0;
936 Kokkos::parallel_reduce(
937 "Tpetra::CrsGraph::getNumNodeEntries",
938 range_type(0, upperLoopBound),
939 [=](const LO& k, size_t& lclSum) {
941 },
943 return nodeNumEnt;
944 }
946 } else { // nothing allocated on this process, so no entries
947 return static_cast<size_t>(0);
948 }
949}
951template <class LocalOrdinal, class GlobalOrdinal, class Node>
955 const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
956 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->haveGlobalConstants_, std::logic_error,
957 "The graph does not have global constants computed, "
958 "but the user has requested them.");
959
960 return globalMaxNumRowEntries_;
962
963template <class LocalOrdinal, class GlobalOrdinal, class Node>
964size_t
967 return nodeMaxNumRowEntries_;
968}
969
970template <class LocalOrdinal, class GlobalOrdinal, class Node>
972 isFillComplete() const {
973 return fillComplete_;
974}
976template <class LocalOrdinal, class GlobalOrdinal, class Node>
978 isFillActive() const {
979 return !fillComplete_;
980}
981
982template <class LocalOrdinal, class GlobalOrdinal, class Node>
984 isLocallyIndexed() const {
985 return indicesAreLocal_;
986}
987
988template <class LocalOrdinal, class GlobalOrdinal, class Node>
990 isGloballyIndexed() const {
991 return indicesAreGlobal_;
992}
993
994template <class LocalOrdinal, class GlobalOrdinal, class Node>
995size_t
998 typedef LocalOrdinal LO;
999
1000 if (this->indicesAreAllocated_) {
1001 const LO lclNumRows = this->getLocalNumRows();
1002 if (lclNumRows == 0) {
1003 return static_cast<size_t>(0);
1004 } else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
1005 if (static_cast<LO>(this->getRowPtrsPackedDevice().extent(0)) <
1006 static_cast<LO>(lclNumRows + 1)) {
1007 return static_cast<size_t>(0);
1008 } else {
1009 if (this->isLocallyIndexed())
1010 return lclIndsPacked_wdv.extent(0);
1011 else
1012 return gblInds_wdv.extent(0);
1013 }
1014 } else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1015 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1016 if (rowPtrsUnpacked_host.extent(0) == 0) {
1017 return static_cast<size_t>(0);
1018 } else {
1019 if (this->isLocallyIndexed())
1020 return lclIndsUnpacked_wdv.extent(0);
1021 else
1022 return gblInds_wdv.extent(0);
1023 }
1024 } else {
1025 return static_cast<size_t>(0);
1026 }
1027 } else {
1028 return Tpetra::Details::OrdinalTraits<size_t>::invalid();
1030}
1031
1032template <class LocalOrdinal, class GlobalOrdinal, class Node>
1033Teuchos::RCP<const Teuchos::Comm<int>>
1035 getComm() const {
1036 return this->rowMap_.is_null() ? Teuchos::null : this->rowMap_->getComm();
1037}
1038
1039template <class LocalOrdinal, class GlobalOrdinal, class Node>
1042 getIndexBase() const {
1043 return rowMap_->getIndexBase();
1044}
1045
1046template <class LocalOrdinal, class GlobalOrdinal, class Node>
1048 indicesAreAllocated() const {
1049 return indicesAreAllocated_;
1050}
1051
1052template <class LocalOrdinal, class GlobalOrdinal, class Node>
1054 isSorted() const {
1055 return indicesAreSorted_;
1056}
1057
1058template <class LocalOrdinal, class GlobalOrdinal, class Node>
1060 isMerged() const {
1061 return noRedundancies_;
1062}
1063
1064template <class LocalOrdinal, class GlobalOrdinal, class Node>
1067 // FIXME (mfh 07 May 2013) How do we know that the change
1068 // introduced a redundancy, or even that it invalidated the sorted
1069 // order of indices? CrsGraph has always made this conservative
1070 // guess. It could be a bit costly to check at insertion time,
1071 // though.
1072 indicesAreSorted_ = false;
1073 noRedundancies_ = false;
1074
1075 // We've modified the graph, so we'll have to recompute local
1076 // constants like the number of diagonal entries on this process.
1077 haveLocalConstants_ = false;
1078}
1079
1080template <class LocalOrdinal, class GlobalOrdinal, class Node>
1082 allocateIndices(const ELocalGlobal lg, const bool verbose) {
1083 using std::endl;
1084 using Teuchos::arcp;
1085 using Teuchos::Array;
1086 using Teuchos::ArrayRCP;
1087 typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1088 typedef typename local_graph_device_type::row_map_type::non_const_type
1090 const char tfecfFuncName[] = "allocateIndices: ";
1091 const char suffix[] =
1092 " Please report this bug to the Tpetra developers.";
1093
1094 Details::ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1095
1096 std::unique_ptr<std::string> prefix;
1097 if (verbose) {
1098 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1099 std::ostringstream os;
1100 os << *prefix << "Start: lg="
1101 << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1102 << ", numRows: " << this->getLocalNumRows() << endl;
1103 std::cerr << os.str();
1104 }
1105
1106 // This is a protected function, only callable by us. If it was
1107 // called incorrectly, it is our fault. That's why the tests
1108 // below throw std::logic_error instead of std::invalid_argument.
1109 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isLocallyIndexed() && lg == GlobalIndices, std::logic_error,
1110 ": The graph is locally indexed, but Tpetra code is calling "
1111 "this method with lg=GlobalIndices."
1112 << suffix);
1113 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isGloballyIndexed() && lg == LocalIndices, std::logic_error,
1114 ": The graph is globally indexed, but Tpetra code is calling "
1115 "this method with lg=LocalIndices."
1116 << suffix);
1117 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreAllocated(), std::logic_error,
1118 ": The graph's "
1119 "indices are already allocated, but Tpetra is calling "
1120 "allocateIndices again."
1121 << suffix);
1122 const size_t numRows = this->getLocalNumRows();
1123
1124 //
1125 // STATIC ALLOCATION PROFILE
1126 //
1127 size_type numInds = 0;
1128 {
1129 if (verbose) {
1130 std::ostringstream os;
1131 os << *prefix << "Allocate k_rowPtrs: " << (numRows + 1) << endl;
1132 std::cerr << os.str();
1133 }
1134 non_const_row_map_type k_rowPtrs("Tpetra::CrsGraph::ptr", numRows + 1);
1136 if (this->k_numAllocPerRow_.extent(0) != 0) {
1137 // It's OK to throw std::invalid_argument here, because we
1138 // haven't incurred any side effects yet. Throwing that
1139 // exception (and not, say, std::logic_error) implies that the
1140 // instance can recover.
1141 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->k_numAllocPerRow_.extent(0) != numRows,
1142 std::invalid_argument,
1143 "k_numAllocPerRow_ is allocated, that is, "
1144 "has nonzero length "
1145 << this->k_numAllocPerRow_.extent(0)
1146 << ", but its length != numRows = " << numRows << ".");
1147
1148 // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1149 // we want to compute here) lives on device. That's OK;
1150 // computeOffsetsFromCounts can handle this case.
1152
1153 // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1154 // doesn't attempt to check its input for "invalid" flag
1155 // values. For now, we omit that feature of the sequential
1156 // code disabled below.
1157 numInds = computeOffsetsFromCounts(k_rowPtrs, k_numAllocPerRow_);
1158 } else {
1159 // It's OK to throw std::invalid_argument here, because we
1160 // haven't incurred any side effects yet. Throwing that
1161 // exception (and not, say, std::logic_error) implies that the
1162 // instance can recover.
1163 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->numAllocForAllRows_ ==
1164 Tpetra::Details::OrdinalTraits<size_t>::invalid(),
1165 std::invalid_argument,
1166 "numAllocForAllRows_ has an invalid value, "
1167 "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = "
1168 << Tpetra::Details::OrdinalTraits<size_t>::invalid() << ".");
1169
1171 numInds = computeOffsetsFromConstantCount(k_rowPtrs, this->numAllocForAllRows_);
1172 }
1173 // "Commit" the resulting row offsets.
1174 setRowPtrsUnpacked(k_rowPtrs);
1175 }
1176 if (debug_) {
1177 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numInds != size_type(this->getRowPtrsUnpackedHost()(numRows)), std::logic_error,
1178 ": Number of indices produced by computeOffsetsFrom[Constant]Counts "
1179 "does not match final entry of rowptrs unpacked");
1180 }
1181
1182 if (lg == LocalIndices) {
1183 if (verbose) {
1184 std::ostringstream os;
1185 os << *prefix << "Allocate local column indices "
1186 "lclIndsUnpacked_wdv: "
1187 << numInds << endl;
1188 std::cerr << os.str();
1189 }
1190 lclIndsUnpacked_wdv = local_inds_wdv_type(
1191 local_inds_dualv_type("Tpetra::CrsGraph::lclInd", numInds));
1192 } else {
1193 if (verbose) {
1194 std::ostringstream os;
1195 os << *prefix << "Allocate global column indices "
1196 "gblInds_wdv: "
1197 << numInds << endl;
1198 std::cerr << os.str();
1199 }
1200 gblInds_wdv = global_inds_wdv_type(
1201 global_inds_dualv_type("Tpetra::CrsGraph::gblInd", numInds));
1202 }
1203 storageStatus_ = Details::STORAGE_1D_UNPACKED;
1204
1205 this->indicesAreLocal_ = (lg == LocalIndices);
1206 this->indicesAreGlobal_ = (lg == GlobalIndices);
1207
1208 if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1209 using Kokkos::ViewAllocateWithoutInitializing;
1210 const char label[] = "Tpetra::CrsGraph::numRowEntries";
1211 if (verbose) {
1212 std::ostringstream os;
1213 os << *prefix << "Allocate k_numRowEntries_: " << numRows
1214 << endl;
1215 std::cerr << os.str();
1216 }
1217 num_row_entries_type numRowEnt(ViewAllocateWithoutInitializing(label), numRows);
1218 // DEEP_COPY REVIEW - VALUE-TO-HOSTMIRROR
1219 Kokkos::deep_copy(execution_space(), numRowEnt, static_cast<size_t>(0)); // fill w/ 0s
1220 Kokkos::fence("CrsGraph::allocateIndices"); // TODO: Need to understand downstream failure points and move this fence.
1221 this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1222 }
1223
1224 // Once indices are allocated, CrsGraph needs to free this information.
1225 this->numAllocForAllRows_ = 0;
1226 this->k_numAllocPerRow_ = decltype(k_numAllocPerRow_)();
1227 this->indicesAreAllocated_ = true;
1228
1229 try {
1230 this->checkInternalState();
1231 } catch (std::logic_error& e) {
1232 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
1233 "At end of allocateIndices, "
1234 "checkInternalState threw std::logic_error: "
1235 << e.what());
1236 } catch (std::exception& e) {
1237 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1238 "At end of allocateIndices, "
1239 "checkInternalState threw std::exception: "
1240 << e.what());
1241 } catch (...) {
1242 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1243 "At end of allocateIndices, "
1244 "checkInternalState threw an exception "
1245 "not a subclass of std::exception.");
1246 }
1247
1248 if (verbose) {
1249 std::ostringstream os;
1250 os << *prefix << "Done" << endl;
1251 std::cerr << os.str();
1252 }
1253}
1254
1255template <class LocalOrdinal, class GlobalOrdinal, class Node>
1256typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1257 local_inds_dualv_type::t_host::const_type
1260 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1261 return typename local_inds_dualv_type::t_host::const_type();
1262 else
1263 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1264 rowinfo.allocSize,
1265 Access::ReadOnly);
1266}
1267
1268template <class LocalOrdinal, class GlobalOrdinal, class Node>
1270 local_inds_dualv_type::t_host
1273 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1274 return typename local_inds_dualv_type::t_host();
1275 else
1276 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1277 rowinfo.allocSize,
1278 Access::ReadWrite);
1279}
1280
1281template <class LocalOrdinal, class GlobalOrdinal, class Node>
1283 global_inds_dualv_type::t_host::const_type
1286 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1287 return typename global_inds_dualv_type::t_host::const_type();
1288 else
1289 return gblInds_wdv.getHostSubview(rowinfo.offset1D,
1290 rowinfo.allocSize,
1291 Access::ReadOnly);
1292}
1293
1294template <class LocalOrdinal, class GlobalOrdinal, class Node>
1296 local_inds_dualv_type::t_dev::const_type
1299 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1300 return typename local_inds_dualv_type::t_dev::const_type();
1301 else
1302 return lclIndsUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
1303 rowinfo.allocSize,
1304 Access::ReadOnly);
1305}
1306
1307template <class LocalOrdinal, class GlobalOrdinal, class Node>
1309 global_inds_dualv_type::t_dev::const_type
1312 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1313 return typename global_inds_dualv_type::t_dev::const_type();
1314 else
1315 return gblInds_wdv.getDeviceSubview(rowinfo.offset1D,
1316 rowinfo.allocSize,
1317 Access::ReadOnly);
1318}
1319
1320template <class LocalOrdinal, class GlobalOrdinal, class Node>
1321RowInfo
1323 getRowInfo(const LocalOrdinal myRow) const {
1324 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
1325 RowInfo ret;
1326 if (this->rowMap_.is_null() || !this->rowMap_->isNodeLocalElement(myRow)) {
1327 ret.localRow = STINV;
1328 ret.allocSize = 0;
1329 ret.numEntries = 0;
1330 ret.offset1D = STINV;
1331 return ret;
1332 }
1333
1334 ret.localRow = static_cast<size_t>(myRow);
1335 if (this->indicesAreAllocated()) {
1336 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1337 // Offsets tell us the allocation size in this case.
1338 if (rowPtrsUnpacked_host.extent(0) == 0) {
1339 ret.offset1D = 0;
1340 ret.allocSize = 0;
1341 } else {
1342 ret.offset1D = rowPtrsUnpacked_host(myRow);
1344 }
1345
1346 ret.numEntries = (this->k_numRowEntries_.extent(0) == 0) ? ret.allocSize : this->k_numRowEntries_(myRow);
1347 } else { // haven't performed allocation yet; probably won't hit this code
1348 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1349 // allocate, rather than doing lazy allocation at first insert.
1350 // This will make k_numAllocPerRow_ obsolete.
1351 ret.allocSize = (this->k_numAllocPerRow_.extent(0) != 0) ? this->k_numAllocPerRow_(myRow) : // this is a host View
1352 this->numAllocForAllRows_;
1353 ret.numEntries = 0;
1354 ret.offset1D = STINV;
1355 }
1356
1357 return ret;
1358}
1359
1360template <class LocalOrdinal, class GlobalOrdinal, class Node>
1361RowInfo
1364 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
1365 RowInfo ret;
1366 if (this->rowMap_.is_null()) {
1367 ret.localRow = STINV;
1368 ret.allocSize = 0;
1369 ret.numEntries = 0;
1370 ret.offset1D = STINV;
1371 return ret;
1372 }
1373 const LocalOrdinal myRow = this->rowMap_->getLocalElement(gblRow);
1374 if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid()) {
1375 ret.localRow = STINV;
1376 ret.allocSize = 0;
1377 ret.numEntries = 0;
1378 ret.offset1D = STINV;
1379 return ret;
1380 }
1381
1382 ret.localRow = static_cast<size_t>(myRow);
1383 if (this->indicesAreAllocated()) {
1384 // graph data structures have the info that we need
1385 //
1386 // if static graph, offsets tell us the allocation size
1387 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1388 if (rowPtrsUnpacked_host.extent(0) == 0) {
1389 ret.offset1D = 0;
1390 ret.allocSize = 0;
1391 } else {
1392 ret.offset1D = rowPtrsUnpacked_host(myRow);
1394 }
1395
1396 ret.numEntries = (this->k_numRowEntries_.extent(0) == 0) ? ret.allocSize : this->k_numRowEntries_(myRow);
1397 } else { // haven't performed allocation yet; probably won't hit this code
1398 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1399 // allocate, rather than doing lazy allocation at first insert.
1400 // This will make k_numAllocPerRow_ obsolete.
1401 ret.allocSize = (this->k_numAllocPerRow_.extent(0) != 0) ? this->k_numAllocPerRow_(myRow) : // this is a host View
1402 this->numAllocForAllRows_;
1403 ret.numEntries = 0;
1404 ret.offset1D = STINV;
1405 }
1407 return ret;
1408}
1409
1410template <class LocalOrdinal, class GlobalOrdinal, class Node>
1412 staticAssertions() const {
1413 using Teuchos::OrdinalTraits;
1414 typedef LocalOrdinal LO;
1415 typedef GlobalOrdinal GO;
1417
1418 // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1419 // This is so that we can store local indices in the memory
1420 // formerly occupied by global indices.
1421 static_assert(sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal),
1422 "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1423 // Assumption: max(size_t) >= max(LocalOrdinal)
1424 // This is so that we can represent any LocalOrdinal as a size_t.
1425 static_assert(sizeof(size_t) >= sizeof(LocalOrdinal),
1426 "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1427 static_assert(sizeof(GST) >= sizeof(size_t),
1428 "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1429
1430 // FIXME (mfh 30 Sep 2015) We're not using
1431 // Teuchos::CompileTimeAssert any more. Can we do these checks
1432 // with static_assert?
1433
1434 // can't call max() with CompileTimeAssert, because it isn't a
1435 // constant expression; will need to make this a runtime check
1436 const char msg[] =
1437 "Tpetra::CrsGraph: Object cannot be created with the "
1438 "given template arguments: size assumptions are not valid.";
1440 static_cast<size_t>(Teuchos::OrdinalTraits<LO>::max()) > Teuchos::OrdinalTraits<size_t>::max(),
1441 std::runtime_error, msg);
1443 static_cast<GST>(Teuchos::OrdinalTraits<LO>::max()) > static_cast<GST>(Teuchos::OrdinalTraits<GO>::max()),
1444 std::runtime_error, msg);
1446 static_cast<size_t>(Teuchos::OrdinalTraits<GO>::max()) > Teuchos::OrdinalTraits<GST>::max(),
1447 std::runtime_error, msg);
1449 Teuchos::OrdinalTraits<size_t>::max() > Teuchos::OrdinalTraits<GST>::max(),
1450 std::runtime_error, msg);
1451}
1452
1453template <class LocalOrdinal, class GlobalOrdinal, class Node>
1454size_t
1457 const SLocalGlobalViews& newInds,
1458 const ELocalGlobal lg,
1459 const ELocalGlobal I) {
1460 using Teuchos::ArrayView;
1461 typedef LocalOrdinal LO;
1462 typedef GlobalOrdinal GO;
1463 const char tfecfFuncName[] = "insertIndices: ";
1464
1465 size_t oldNumEnt = 0;
1466 if (debug_) {
1467 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1468 "lg must be either GlobalIndices or LocalIndices.");
1469 oldNumEnt = this->getNumEntriesInLocalRow(rowinfo.localRow);
1470 }
1471
1472 size_t numNewInds = 0;
1473 if (lg == GlobalIndices) { // input indices are global
1475 numNewInds = new_ginds.size();
1476 if (I == GlobalIndices) { // store global indices
1477 auto gind_view = gblInds_wdv.getHostView(Access::ReadWrite);
1478 if (debug_) {
1479 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(gind_view.size()) <
1480 rowinfo.numEntries + numNewInds,
1481 std::logic_error,
1482 "gind_view.size() = " << gind_view.size()
1483 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1484 << ") + numNewInds (= " << numNewInds << ").");
1486 GO* const gblColInds_out = gind_view.data() + rowinfo.offset1D + rowinfo.numEntries;
1487 for (size_t k = 0; k < numNewInds; ++k) {
1489 }
1490 } else if (I == LocalIndices) { // store local indices
1491 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1492 if (debug_) {
1493 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(lind_view.size()) <
1494 rowinfo.numEntries + numNewInds,
1495 std::logic_error,
1496 "lind_view.size() = " << lind_view.size()
1497 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1498 << ") + numNewInds (= " << numNewInds << ").");
1499 }
1500 LO* const lclColInds_out = lind_view.data() + rowinfo.offset1D + rowinfo.numEntries;
1501 for (size_t k = 0; k < numNewInds; ++k) {
1502 lclColInds_out[k] = colMap_->getLocalElement(new_ginds[k]);
1503 }
1504 }
1505 } else if (lg == LocalIndices) { // input indices are local
1506 ArrayView<const LO> new_linds = newInds.linds;
1507 numNewInds = new_linds.size();
1508 if (I == LocalIndices) { // store local indices
1509 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1510 if (debug_) {
1512 rowinfo.numEntries + numNewInds,
1513 std::logic_error,
1514 "lind_view.size() = " << lind_view.size()
1515 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1516 << ") + numNewInds (= " << numNewInds << ").");
1517 }
1518 LO* const lclColInds_out = lind_view.data() + rowinfo.offset1D + rowinfo.numEntries;
1519 for (size_t k = 0; k < numNewInds; ++k) {
1521 }
1522 } else if (I == GlobalIndices) {
1523 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
1524 "The case where the input indices are local "
1525 "and the indices to write are global (lg=LocalIndices, I="
1526 "GlobalIndices) is not implemented, because it does not make sense."
1527 << std::endl
1528 << "If you have correct local column indices, that "
1529 "means the graph has a column Map. In that case, you should be "
1530 "storing local indices.");
1531 }
1532 }
1533
1534 rowinfo.numEntries += numNewInds;
1535 this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1536 this->setLocallyModified();
1537
1538 if (debug_) {
1539 const size_t chkNewNumEnt =
1540 this->getNumEntriesInLocalRow(rowinfo.localRow);
1541 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1542 "chkNewNumEnt = " << chkNewNumEnt
1543 << " != oldNumEnt (= " << oldNumEnt
1544 << ") + numNewInds (= " << numNewInds << ").");
1545 }
1546
1547 return numNewInds;
1548}
1549
1550template <class LocalOrdinal, class GlobalOrdinal, class Node>
1551size_t
1555 const size_t numInputInds) {
1556 return this->insertGlobalIndicesImpl(this->getRowInfo(lclRow),
1558}
1559
1560template <class LocalOrdinal, class GlobalOrdinal, class Node>
1561size_t
1565 const size_t numInputInds,
1566 std::function<void(const size_t, const size_t, const size_t)> fun) {
1568 using Kokkos::MemoryUnmanaged;
1569 using Kokkos::subview;
1570 using Kokkos::View;
1571 using Teuchos::ArrayView;
1572 using LO = LocalOrdinal;
1573 using GO = GlobalOrdinal;
1574
1575 const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1576 const LO lclRow = static_cast<LO>(rowInfo.localRow);
1577
1578 auto numEntries = rowInfo.numEntries;
1581 size_t numInserted;
1582 {
1583 auto gblIndsHostView = this->gblInds_wdv.getHostView(Access::ReadWrite);
1584 numInserted = Details::insertCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1586 numEntries, inputInds, fun);
1587 }
1588
1589 const bool insertFailed =
1590 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1591 if (insertFailed) {
1592 constexpr size_t ONE(1);
1593 const int myRank = this->getComm()->getRank();
1594 std::ostringstream os;
1595
1596 os << "Proc " << myRank << ": Not enough capacity to insert "
1597 << numInputInds
1598 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1599 << " into local row " << lclRow << ", which currently has "
1600 << rowInfo.numEntries
1601 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1602 << " and total allocation size " << rowInfo.allocSize
1603 << ". ";
1604 const size_t maxNumToPrint =
1607 numInputInds);
1608 verbosePrintArray(os, inputGblColIndsView,
1609 "Input global "
1610 "column indices",
1612 os << ", ";
1613 auto curGblColInds = getGlobalIndsViewHost(rowInfo);
1615 rowInfo.numEntries);
1616 verbosePrintArray(os, curGblColIndsView,
1617 "Current global "
1618 "column indices",
1620 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
1621 }
1622
1623 this->k_numRowEntries_(lclRow) += numInserted;
1624
1625 this->setLocallyModified();
1626 return numInserted;
1627}
1628
1629template <class LocalOrdinal, class GlobalOrdinal, class Node>
1632 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1633 std::function<void(const size_t, const size_t, const size_t)> fun) {
1634 using Kokkos::MemoryUnmanaged;
1635 using Kokkos::subview;
1636 using Kokkos::View;
1637 using LO = LocalOrdinal;
1638
1639 const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1640
1641 const RowInfo rowInfo = this->getRowInfo(myRow);
1642
1643 size_t numNewInds = 0;
1644 size_t newNumEntries = 0;
1645
1646 auto numEntries = rowInfo.numEntries;
1647 // Note: Teuchos::ArrayViews are in HostSpace
1649 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1650 size_t numInserted = 0;
1651 {
1652 auto lclInds = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1653 numInserted = Details::insertCrsIndices(myRow, this->getRowPtrsUnpackedHost(), lclInds,
1654 numEntries, inputInds, fun);
1655 }
1656
1657 const bool insertFailed =
1658 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1659 if (insertFailed) {
1660 constexpr size_t ONE(1);
1661 const size_t numInputInds(indices.size());
1662 const int myRank = this->getComm()->getRank();
1663 std::ostringstream os;
1664 os << "On MPI Process " << myRank << ": Not enough capacity to "
1665 "insert "
1666 << numInputInds
1667 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1668 << " into local row " << myRow << ", which currently has "
1669 << rowInfo.numEntries
1670 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1671 << " and total allocation size " << rowInfo.allocSize << ".";
1672 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
1673 }
1675 newNumEntries = rowInfo.numEntries + numNewInds;
1676
1677 this->k_numRowEntries_(myRow) += numNewInds;
1678 this->setLocallyModified();
1679
1680 if (debug_) {
1681 const size_t chkNewNumEntries = this->getNumEntriesInLocalRow(myRow);
1683 "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1684 << " != newNumEntries = " << newNumEntries
1685 << ". Please report this bug to the Tpetra developers.");
1686 }
1687}
1688
1689template <class LocalOrdinal, class GlobalOrdinal, class Node>
1690size_t
1693 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1694 std::function<void(const size_t, const size_t, const size_t)> fun) const {
1695 using GO = GlobalOrdinal;
1696 using Kokkos::MemoryUnmanaged;
1697 using Kokkos::View;
1698
1699 auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1700
1702 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1703
1704 size_t numFound = 0;
1705 LocalOrdinal lclRow = rowInfo.localRow;
1706 if (this->isLocallyIndexed()) {
1707 if (this->colMap_.is_null())
1708 return invalidCount;
1709 const auto& colMap = *(this->colMap_);
1710 auto map = [&](GO const gblInd) { return colMap.getLocalElement(gblInd); };
1711 if (this->isSorted()) {
1712 numFound = Details::findCrsIndicesSorted(
1713 lclRow,
1714 this->getRowPtrsUnpackedHost(),
1715 rowInfo.numEntries,
1716 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly),
1717 inputInds,
1718 map,
1719 fun);
1720 } else {
1721 numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1722 rowInfo.numEntries,
1723 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly), inputInds, map, fun);
1724 }
1725 } else if (this->isGloballyIndexed()) {
1726 numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1727 rowInfo.numEntries,
1728 gblInds_wdv.getHostView(Access::ReadOnly), inputInds, fun);
1729 }
1730 return numFound;
1731}
1732
1733template <class LocalOrdinal, class GlobalOrdinal, class Node>
1735 setDomainRangeMaps(const Teuchos::RCP<const map_type>& domainMap,
1736 const Teuchos::RCP<const map_type>& rangeMap) {
1737 // simple pointer comparison for equality
1738 if (domainMap_ != domainMap) {
1739 domainMap_ = domainMap;
1740 importer_ = Teuchos::null;
1741 }
1742 if (rangeMap_ != rangeMap) {
1743 rangeMap_ = rangeMap;
1744 exporter_ = Teuchos::null;
1745 }
1746}
1747
1748template <class LocalOrdinal, class GlobalOrdinal, class Node>
1749void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1750 clearGlobalConstants() {
1751 const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
1752
1753 globalNumEntries_ = INV;
1754 globalMaxNumRowEntries_ = INV;
1755 haveGlobalConstants_ = false;
1756}
1757
1758template <class LocalOrdinal, class GlobalOrdinal, class Node>
1760 checkInternalState() const {
1762 if (debug_) {
1763 using std::endl;
1764 const char tfecfFuncName[] = "checkInternalState: ";
1765 const char suffix[] = " Please report this bug to the Tpetra developers.";
1766
1767 ProfilingRegion("Tpetra::CrsGraph::checkInternalState");
1768
1769 std::unique_ptr<std::string> prefix;
1770 if (verbose_) {
1771 prefix = this->createPrefix("CrsGraph", "checkInternalState");
1772 std::ostringstream os;
1773 os << *prefix << "Start" << endl;
1774 std::cerr << os.str();
1775 }
1776
1777 const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid();
1778 // const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
1779 // check the internal state of this data structure
1780 // this is called by numerous state-changing methods, in a debug build, to ensure that the object
1781 // always remains in a valid state
1782
1783 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->rowMap_.is_null(), std::logic_error,
1784 "Row Map is null." << suffix);
1785 // This may access the row Map, so we need to check first (above)
1786 // whether the row Map is null.
1787 const LocalOrdinal lclNumRows =
1788 static_cast<LocalOrdinal>(this->getLocalNumRows());
1789
1790 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillActive() == this->isFillComplete(), std::logic_error,
1791 "Graph cannot be both fill active and fill complete." << suffix);
1792 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillComplete() &&
1793 (this->colMap_.is_null() ||
1794 this->rangeMap_.is_null() ||
1795 this->domainMap_.is_null()),
1796 std::logic_error,
1797 "Graph is full complete, but at least one of {column, range, domain} "
1798 "Map is null."
1799 << suffix);
1800 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isStorageOptimized() && !this->indicesAreAllocated(),
1801 std::logic_error,
1802 "Storage is optimized, but indices are not "
1803 "allocated, not even trivially."
1804 << suffix);
1805
1806 size_t nodeAllocSize = 0;
1807 try {
1808 nodeAllocSize = this->getLocalAllocationSize();
1809 } catch (std::logic_error& e) {
1810 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1811 "getLocalAllocationSize threw "
1812 "std::logic_error: "
1813 << e.what());
1814 } catch (std::exception& e) {
1815 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1816 "getLocalAllocationSize threw an "
1817 "std::exception: "
1818 << e.what());
1819 } catch (...) {
1820 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1821 "getLocalAllocationSize threw an exception "
1822 "not a subclass of std::exception.");
1823 }
1824
1825 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isStorageOptimized() &&
1826 nodeAllocSize != this->getLocalNumEntries(),
1827 std::logic_error,
1828 "Storage is optimized, but "
1829 "this->getLocalAllocationSize() = "
1830 << nodeAllocSize
1831 << " != this->getLocalNumEntries() = " << this->getLocalNumEntries()
1832 << "." << suffix);
1833 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->haveGlobalConstants_ &&
1834 (this->globalNumEntries_ != GSTI ||
1835 this->globalMaxNumRowEntries_ != GSTI),
1836 std::logic_error,
1837 "Graph claims not to have global constants, but "
1838 "some of the global constants are not marked as invalid."
1839 << suffix);
1840 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->haveGlobalConstants_ &&
1841 (this->globalNumEntries_ == GSTI ||
1842 this->globalMaxNumRowEntries_ == GSTI),
1843 std::logic_error,
1844 "Graph claims to have global constants, but "
1845 "some of them are marked as invalid."
1846 << suffix);
1847 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->haveGlobalConstants_ &&
1848 (this->globalNumEntries_ < this->getLocalNumEntries() ||
1849 this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
1850 std::logic_error,
1851 "Graph claims to have global constants, and "
1852 "all of the values of the global constants are valid, but "
1853 "some of the local constants are greater than "
1854 "their corresponding global constants."
1855 << suffix);
1856 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->indicesAreAllocated() &&
1857 (this->numAllocForAllRows_ != 0 ||
1858 this->k_numAllocPerRow_.extent(0) != 0),
1859 std::logic_error,
1860 "The graph claims that its indices are allocated, but "
1861 "either numAllocForAllRows_ (= "
1862 << this->numAllocForAllRows_ << ") is "
1863 "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
1864 "the graph is supposed to release its \"allocation specifications\" "
1865 "when it allocates its indices."
1866 << suffix);
1867 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1868 auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
1870 std::logic_error,
1871 "The host and device views of k_rowPtrs_ have "
1872 "different sizes; rowPtrsUnpacked_host_ has size "
1873 << rowPtrsUnpacked_host.extent(0)
1874 << ", but rowPtrsUnpacked_dev_ has size "
1875 << rowPtrsUnpacked_dev.extent(0)
1876 << "." << suffix);
1877 if (isGloballyIndexed() && rowPtrsUnpacked_host.extent(0) != 0) {
1879 std::logic_error,
1880 "The graph is globally indexed and "
1881 "k_rowPtrs has nonzero size "
1882 << rowPtrsUnpacked_host.extent(0)
1883 << ", but that size does not equal lclNumRows+1 = "
1884 << (lclNumRows + 1) << "." << suffix);
1886 std::logic_error,
1887 "The graph is globally indexed and "
1888 "k_rowPtrs_ has nonzero size "
1889 << rowPtrsUnpacked_host.extent(0)
1890 << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
1892 << " != gblInds_wdv.extent(0)="
1893 << gblInds_wdv.extent(0) << "." << suffix);
1894 }
1895 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() &&
1896 rowPtrsUnpacked_host.extent(0) != 0 &&
1897 (static_cast<size_t>(rowPtrsUnpacked_host.extent(0)) !=
1898 static_cast<size_t>(lclNumRows + 1) ||
1900 static_cast<size_t>(this->lclIndsUnpacked_wdv.extent(0))),
1901 std::logic_error,
1902 "If k_rowPtrs_ has nonzero size and "
1903 "the graph is locally indexed, then "
1904 "k_rowPtrs_ must have N+1 rows, and "
1905 "k_rowPtrs_(N) must equal lclIndsUnpacked_wdv.extent(0)."
1906 << suffix);
1908 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->indicesAreAllocated() &&
1909 nodeAllocSize > 0 &&
1910 this->lclIndsUnpacked_wdv.extent(0) == 0 &&
1911 this->gblInds_wdv.extent(0) == 0,
1912 std::logic_error,
1913 "Graph is allocated nontrivially, but "
1914 "but 1-D allocations are not present."
1915 << suffix);
1916
1917 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->indicesAreAllocated() &&
1918 ((rowPtrsUnpacked_host.extent(0) != 0 ||
1919 this->k_numRowEntries_.extent(0) != 0) ||
1920 this->lclIndsUnpacked_wdv.extent(0) != 0 ||
1921 this->gblInds_wdv.extent(0) != 0),
1922 std::logic_error,
1923 "If indices are not allocated, "
1924 "then none of the buffers should be."
1925 << suffix);
1926 // indices may be local or global only if they are allocated
1927 // (numAllocated is redundant; could simply be indicesAreLocal_ ||
1928 // indicesAreGlobal_)
1929 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
1930 !this->indicesAreAllocated_,
1931 std::logic_error,
1932 "Indices may be local or global only if they are "
1933 "allocated."
1934 << suffix);
1935 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->indicesAreLocal_ && this->indicesAreGlobal_,
1936 std::logic_error, "Indices may not be both local and global." << suffix);
1937 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreLocal_ && gblInds_wdv.extent(0) != 0,
1938 std::logic_error,
1939 "Indices are local, but "
1940 "gblInds_wdv.extent(0) (= "
1941 << gblInds_wdv.extent(0)
1942 << ") != 0. In other words, if indices are local, then "
1943 "allocations of global indices should not be present."
1944 << suffix);
1945 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreGlobal_ && lclIndsUnpacked_wdv.extent(0) != 0,
1946 std::logic_error,
1947 "Indices are global, but "
1948 "lclIndsUnpacked_wdv.extent(0) (= "
1949 << lclIndsUnpacked_wdv.extent(0)
1950 << ") != 0. In other words, if indices are global, "
1951 "then allocations for local indices should not be present."
1952 << suffix);
1953 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreLocal_ && nodeAllocSize > 0 &&
1954 lclIndsUnpacked_wdv.extent(0) == 0 && getLocalNumRows() > 0,
1955 std::logic_error,
1956 "Indices are local and "
1957 "getLocalAllocationSize() = "
1958 << nodeAllocSize << " > 0, but "
1959 "lclIndsUnpacked_wdv.extent(0) = 0 and getLocalNumRows() = "
1960 << getLocalNumRows() << " > 0." << suffix);
1961 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreGlobal_ && nodeAllocSize > 0 &&
1962 gblInds_wdv.extent(0) == 0 && getLocalNumRows() > 0,
1963 std::logic_error,
1964 "Indices are global and "
1965 "getLocalAllocationSize() = "
1966 << nodeAllocSize << " > 0, but "
1967 "gblInds_wdv.extent(0) = 0 and getLocalNumRows() = "
1968 << getLocalNumRows() << " > 0." << suffix);
1969 // check the actual allocations
1970 if (this->indicesAreAllocated() &&
1971 rowPtrsUnpacked_host.extent(0) != 0) {
1972 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(rowPtrsUnpacked_host.extent(0)) !=
1973 this->getLocalNumRows() + 1,
1974 std::logic_error,
1975 "Indices are allocated and "
1976 "k_rowPtrs_ has nonzero length, but rowPtrsUnpacked_host_.extent(0) = "
1977 << rowPtrsUnpacked_host.extent(0) << " != getLocalNumRows()+1 = "
1978 << (this->getLocalNumRows() + 1) << "." << suffix);
1979 const size_t actualNumAllocated =
1980 rowPtrsUnpacked_host(this->getLocalNumRows());
1981 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() &&
1982 static_cast<size_t>(this->lclIndsUnpacked_wdv.extent(0)) != actualNumAllocated,
1983 std::logic_error,
1984 "Graph is locally indexed, indices are "
1985 "are allocated, and k_rowPtrs_ has nonzero length, but "
1986 "lclIndsUnpacked_wdv.extent(0) = "
1987 << this->lclIndsUnpacked_wdv.extent(0)
1988 << " != actualNumAllocated = " << actualNumAllocated << suffix);
1989 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isGloballyIndexed() &&
1990 static_cast<size_t>(this->gblInds_wdv.extent(0)) != actualNumAllocated,
1991 std::logic_error,
1992 "Graph is globally indexed, indices "
1993 "are allocated, and k_rowPtrs_ has nonzero length, but "
1994 "gblInds_wdv.extent(0) = "
1995 << this->gblInds_wdv.extent(0)
1996 << " != actualNumAllocated = " << actualNumAllocated << suffix);
1997 }
1998
1999 if (verbose_) {
2000 std::ostringstream os;
2001 os << *prefix << "Done" << endl;
2002 std::cerr << os.str();
2003 }
2004 }
2005}
2006
2007template <class LocalOrdinal, class GlobalOrdinal, class Node>
2008size_t
2011 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(globalRow);
2012 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2013 return Teuchos::OrdinalTraits<size_t>::invalid();
2014 } else {
2015 return rowInfo.numEntries;
2016 }
2017}
2018
2019template <class LocalOrdinal, class GlobalOrdinal, class Node>
2020size_t
2022 getNumEntriesInLocalRow(LocalOrdinal localRow) const {
2023 const RowInfo rowInfo = this->getRowInfo(localRow);
2024 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2025 return Teuchos::OrdinalTraits<size_t>::invalid();
2026 } else {
2027 return rowInfo.numEntries;
2028 }
2029}
2030
2031template <class LocalOrdinal, class GlobalOrdinal, class Node>
2032size_t
2035 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(globalRow);
2036 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2037 return Teuchos::OrdinalTraits<size_t>::invalid();
2038 } else {
2039 return rowInfo.allocSize;
2040 }
2041}
2042
2043template <class LocalOrdinal, class GlobalOrdinal, class Node>
2044size_t
2047 const RowInfo rowInfo = this->getRowInfo(localRow);
2048 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2049 return Teuchos::OrdinalTraits<size_t>::invalid();
2050 } else {
2051 return rowInfo.allocSize;
2052 }
2053}
2054
2055template <class LocalOrdinal, class GlobalOrdinal, class Node>
2056typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type
2058 getLocalRowPtrsHost() const {
2059 return getRowPtrsPackedHost();
2060}
2061
2062template <class LocalOrdinal, class GlobalOrdinal, class Node>
2063typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_device_view_type
2065 getLocalRowPtrsDevice() const {
2066 return getRowPtrsPackedDevice();
2067}
2068
2069template <class LocalOrdinal, class GlobalOrdinal, class Node>
2070typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type
2072 getLocalIndicesHost() const {
2073 return lclIndsPacked_wdv.getHostView(Access::ReadOnly);
2075
2076template <class LocalOrdinal, class GlobalOrdinal, class Node>
2079 getLocalIndicesDevice() const {
2080 return lclIndsPacked_wdv.getDeviceView(Access::ReadOnly);
2081}
2082
2083template <class LocalOrdinal, class GlobalOrdinal, class Node>
2086 nonconst_local_inds_host_view_type& indices,
2087 size_t& numEntries) const {
2088 using Teuchos::ArrayView;
2089 const char tfecfFuncName[] = "getLocalRowCopy: ";
2090
2092 isGloballyIndexed() && !hasColMap(), std::runtime_error,
2093 "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2094 "does not have a column Map yet. That means we don't have local indices "
2095 "for columns yet, so it doesn't make sense to call this method. If the "
2096 "graph doesn't have a column Map yet, you should call fillComplete on "
2097 "it first.");
2099 // This does the right thing (reports an empty row) if the input
2100 // row is invalid.
2101 const RowInfo rowinfo = this->getRowInfo(localRow);
2102 // No side effects on error.
2103 const size_t theNumEntries = rowinfo.numEntries;
2104 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) < theNumEntries, std::runtime_error,
2105 "Specified storage (size==" << indices.size() << ") does not suffice "
2106 "to hold all "
2107 << theNumEntries << " entry/ies for this row.");
2108 numEntries = theNumEntries;
2109
2110 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid()) {
2111 if (isLocallyIndexed()) {
2112 auto lclInds = getLocalIndsViewHost(rowinfo);
2113 for (size_t j = 0; j < theNumEntries; ++j) {
2114 indices[j] = lclInds(j);
2115 }
2116 } else if (isGloballyIndexed()) {
2117 auto gblInds = getGlobalIndsViewHost(rowinfo);
2118 for (size_t j = 0; j < theNumEntries; ++j) {
2119 indices[j] = colMap_->getLocalElement(gblInds(j));
2120 }
2121 }
2122 }
2123}
2124
2125template <class LocalOrdinal, class GlobalOrdinal, class Node>
2128 nonconst_global_inds_host_view_type& indices,
2129 size_t& numEntries) const {
2130 using Teuchos::ArrayView;
2131 const char tfecfFuncName[] = "getGlobalRowCopy: ";
2132
2133 // This does the right thing (reports an empty row) if the input
2134 // row is invalid.
2135 const RowInfo rowinfo = getRowInfoFromGlobalRowIndex(globalRow);
2136 const size_t theNumEntries = rowinfo.numEntries;
2138 static_cast<size_t>(indices.size()) < theNumEntries, std::runtime_error,
2139 "Specified storage (size==" << indices.size() << ") does not suffice "
2140 "to hold all "
2141 << theNumEntries << " entry/ies for this row.");
2142 numEntries = theNumEntries; // first side effect
2143
2144 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid()) {
2145 if (isLocallyIndexed()) {
2146 auto lclInds = getLocalIndsViewHost(rowinfo);
2147 bool err = colMap_->getGlobalElements(lclInds.data(), theNumEntries, indices.data());
2148 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(err, std::runtime_error, "getGlobalElements error");
2149 } else if (isGloballyIndexed()) {
2150 auto gblInds = getGlobalIndsViewHost(rowinfo);
2151 // Kokkos zero-extent views return null from .data(); glibc declares memcpy's
2152 // dst/src params __attribute__((nonnull(1,2))), so UBSan nonnull-arg fires
2153 // on a null pointer even when the byte count is zero.
2154 if (theNumEntries > 0) {
2155 std::memcpy(
2156 (void*)indices.data(),
2157 (const void*)gblInds.data(),
2158 theNumEntries * sizeof(*indices.data()));
2159 }
2160 }
2161 }
2162}
2163
2164template <class LocalOrdinal, class GlobalOrdinal, class Node>
2167 const LocalOrdinal localRow,
2168 local_inds_host_view_type& indices) const {
2169 const char tfecfFuncName[] = "getLocalRowView: ";
2170
2171 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isGloballyIndexed(), std::runtime_error,
2172 "The graph's indices are "
2173 "currently stored as global indices, so we cannot return a view with "
2174 "local column indices, whether or not the graph has a column Map. If "
2175 "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2176
2177 const RowInfo rowInfo = getRowInfo(localRow);
2178 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid() &&
2179 rowInfo.numEntries > 0) {
2180 indices = lclIndsUnpacked_wdv.getHostSubview(rowInfo.offset1D,
2181 rowInfo.numEntries,
2182 Access::ReadOnly);
2183 } else {
2184 // This does the right thing (reports an empty row) if the input
2185 // row is invalid.
2186 indices = local_inds_host_view_type();
2187 }
2188
2189 if (debug_) {
2190 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) !=
2191 getNumEntriesInLocalRow(localRow),
2192 std::logic_error,
2193 "indices.size() "
2194 "= " << indices.extent(0)
2195 << " != getNumEntriesInLocalRow(localRow=" << localRow << ") = " << getNumEntriesInLocalRow(localRow) << ". Please report this bug to the Tpetra developers.");
2196 }
2197}
2198
2199template <class LocalOrdinal, class GlobalOrdinal, class Node>
2203 global_inds_host_view_type& indices) const {
2204 const char tfecfFuncName[] = "getGlobalRowView: ";
2205
2206 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isLocallyIndexed(), std::runtime_error,
2207 "The graph's indices are "
2208 "currently stored as local indices, so we cannot return a view with "
2209 "global column indices. Use getGlobalRowCopy() instead.");
2210
2211 // This does the right thing (reports an empty row) if the input
2212 // row is invalid.
2213 const RowInfo rowInfo = getRowInfoFromGlobalRowIndex(globalRow);
2214 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid() &&
2215 rowInfo.numEntries > 0) {
2216 indices = gblInds_wdv.getHostSubview(rowInfo.offset1D,
2217 rowInfo.numEntries,
2218 Access::ReadOnly);
2219 } else {
2220 indices = typename global_inds_dualv_type::t_host::const_type();
2221 }
2222 if (debug_) {
2223 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) !=
2224 getNumEntriesInGlobalRow(globalRow),
2225 std::logic_error, "indices.size() = " << indices.extent(0) << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = " << getNumEntriesInGlobalRow(globalRow) << ". Please report this bug to the Tpetra developers.");
2226 }
2227}
2228
2229template <class LocalOrdinal, class GlobalOrdinal, class Node>
2231 insertLocalIndices(const LocalOrdinal localRow,
2232 const Teuchos::ArrayView<const LocalOrdinal>& indices) {
2233 const char tfecfFuncName[] = "insertLocalIndices: ";
2234
2235 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive(), std::runtime_error, "Fill must be active.");
2236 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isGloballyIndexed(), std::runtime_error,
2237 "Graph indices are global; use insertGlobalIndices().");
2238 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!hasColMap(), std::runtime_error,
2239 "Cannot insert local indices without a column Map.");
2240 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!rowMap_->isNodeLocalElement(localRow), std::runtime_error,
2241 "Local row index " << localRow << " is not in the row Map "
2242 "on the calling process.");
2243 if (!indicesAreAllocated()) {
2244 allocateIndices(LocalIndices, verbose_);
2245 }
2246
2247 if (debug_) {
2248 // In debug mode, if the graph has a column Map, test whether any
2249 // of the given column indices are not in the column Map. Keep
2250 // track of the invalid column indices so we can tell the user
2251 // about them.
2252 if (hasColMap()) {
2253 using std::endl;
2254 using Teuchos::Array;
2255 using Teuchos::toString;
2256 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2257
2258 const map_type& colMap = *colMap_;
2260 bool allInColMap = true;
2261 for (size_type k = 0; k < indices.size(); ++k) {
2262 if (!colMap.isNodeLocalElement(indices[k])) {
2263 allInColMap = false;
2264 badColInds.push_back(indices[k]);
2265 }
2266 }
2267 if (!allInColMap) {
2268 std::ostringstream os;
2269 os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2270 "entries in owned row "
2271 << localRow << ", at the following column "
2272 "indices: "
2273 << toString(indices) << "." << endl;
2274 os << "Of those, the following indices are not in the column Map on "
2275 "this process: "
2276 << toString(badColInds) << "." << endl
2277 << "Since "
2278 "the graph has a column Map already, it is invalid to insert entries "
2279 "at those locations.";
2280 TEUCHOS_TEST_FOR_EXCEPTION(!allInColMap, std::invalid_argument, os.str());
2281 }
2282 }
2283 }
2284
2285 insertLocalIndicesImpl(localRow, indices);
2286
2287 if (debug_) {
2288 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!indicesAreAllocated() || !isLocallyIndexed(), std::logic_error,
2289 "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2290 "! isLocallyIndexed() is true. Please report this bug to the "
2291 "Tpetra developers.");
2292 }
2293}
2294
2295template <class LocalOrdinal, class GlobalOrdinal, class Node>
2297 insertLocalIndices(const LocalOrdinal localRow,
2298 const LocalOrdinal numEnt,
2299 const LocalOrdinal inds[]) {
2300 Teuchos::ArrayView<const LocalOrdinal> indsT(inds, numEnt);
2301 this->insertLocalIndices(localRow, indsT);
2302}
2303
2304template <class LocalOrdinal, class GlobalOrdinal, class Node>
2309 typedef LocalOrdinal LO;
2310 const char tfecfFuncName[] = "insertGlobalIndices: ";
2311
2312 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed(), std::runtime_error,
2313 "graph indices are local; use insertLocalIndices().");
2314 // This can't really be satisfied for now, because if we are
2315 // fillComplete(), then we are local. In the future, this may
2316 // change. However, the rule that modification require active
2317 // fill will not change.
2318 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->isFillActive(), std::runtime_error,
2319 "You are not allowed to call this method if fill is not active. "
2320 "If fillComplete has been called, you must first call resumeFill "
2321 "before you may insert indices.");
2322 if (!indicesAreAllocated()) {
2323 allocateIndices(GlobalIndices, verbose_);
2324 }
2325 const LO lclRow = this->rowMap_->getLocalElement(gblRow);
2326 if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid()) {
2327 if (debug_) {
2328 if (this->hasColMap()) {
2329 using std::endl;
2330 const map_type& colMap = *(this->colMap_);
2331 // In a debug build, keep track of the nonowned ("bad") column
2332 // indices, so that we can display them in the exception
2333 // message. In a release build, just ditch the loop early if
2334 // we encounter a nonowned column index.
2335 std::vector<GlobalOrdinal> badColInds;
2336 bool allInColMap = true;
2337 for (LO k = 0; k < numInputInds; ++k) {
2338 if (!colMap.isNodeGlobalElement(inputGblColInds[k])) {
2339 allInColMap = false;
2340 badColInds.push_back(inputGblColInds[k]);
2341 }
2342 }
2343 if (!allInColMap) {
2344 std::ostringstream os;
2345 os << "You attempted to insert entries in owned row " << gblRow
2346 << ", at the following column indices: [";
2347 for (LO k = 0; k < numInputInds; ++k) {
2348 os << inputGblColInds[k];
2349 if (k + static_cast<LO>(1) < numInputInds) {
2350 os << ",";
2351 }
2352 }
2353 os << "]." << endl
2354 << "Of those, the following indices are not in "
2355 "the column Map on this process: [";
2356 for (size_t k = 0; k < badColInds.size(); ++k) {
2357 os << badColInds[k];
2358 if (k + size_t(1) < badColInds.size()) {
2359 os << ",";
2360 }
2361 }
2362 os << "]." << endl
2363 << "Since the matrix has a column Map already, "
2364 "it is invalid to insert entries at those locations.";
2365 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::invalid_argument, os.str());
2366 }
2367 }
2368 } // debug_
2369 this->insertGlobalIndicesImpl(lclRow, inputGblColInds, numInputInds);
2370 } else { // a nonlocal row
2371 this->insertGlobalIndicesIntoNonownedRows(gblRow, inputGblColInds,
2372 numInputInds);
2373 }
2374}
2375
2376template <class LocalOrdinal, class GlobalOrdinal, class Node>
2379 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds) {
2380 this->insertGlobalIndices(gblRow, inputGblColInds.size(),
2381 inputGblColInds.getRawPtr());
2382}
2383
2384template <class LocalOrdinal, class GlobalOrdinal, class Node>
2387 const GlobalOrdinal gblColInds[],
2389 typedef LocalOrdinal LO;
2390 typedef GlobalOrdinal GO;
2391 const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2392
2393 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed(), std::runtime_error,
2394 "Graph indices are local; use insertLocalIndices().");
2395 // This can't really be satisfied for now, because if we are
2396 // fillComplete(), then we are local. In the future, this may
2397 // change. However, the rule that modification require active
2398 // fill will not change.
2399 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->isFillActive(), std::runtime_error,
2400 "You are not allowed to call this method if fill is not active. "
2401 "If fillComplete has been called, you must first call resumeFill "
2402 "before you may insert indices.");
2403 if (!indicesAreAllocated()) {
2404 allocateIndices(GlobalIndices, verbose_);
2405 }
2406
2407 Teuchos::ArrayView<const GO> gblColInds_av(gblColInds, numGblColInds);
2408 // If we have a column Map, use it to filter the entries.
2409 if (!colMap_.is_null()) {
2410 const map_type& colMap = *(this->colMap_);
2411
2412 LO curOffset = 0;
2413 while (curOffset < numGblColInds) {
2414 // Find a sequence of input indices that are in the column Map
2415 // on the calling process. Doing a sequence at a time,
2416 // instead of one at a time, amortizes some overhead.
2417 LO endOffset = curOffset;
2418 for (; endOffset < numGblColInds; ++endOffset) {
2419 const LO lclCol = colMap.getLocalElement(gblColInds[endOffset]);
2420 if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid()) {
2421 break; // first entry, in current sequence, not in the column Map
2422 }
2423 }
2424 // curOffset, endOffset: half-exclusive range of indices in
2425 // the column Map on the calling process. If endOffset ==
2426 // curOffset, the range is empty.
2427 const LO numIndInSeq = (endOffset - curOffset);
2428 if (numIndInSeq != 0) {
2429 this->insertGlobalIndicesImpl(lclRow, gblColInds + curOffset,
2430 numIndInSeq);
2431 }
2432 // Invariant before this line: Either endOffset ==
2433 // numGblColInds, or gblColInds[endOffset] is not in the
2434 // column Map on the calling process.
2435 curOffset = endOffset + 1;
2436 }
2437 } else {
2438 this->insertGlobalIndicesImpl(lclRow, gblColInds_av.getRawPtr(),
2439 gblColInds_av.size());
2440 }
2441}
2442
2443template <class LocalOrdinal, class GlobalOrdinal, class Node>
2446 const GlobalOrdinal gblColInds[],
2448 // This creates the std::vector if it doesn't exist yet.
2449 // std::map's operator[] does a lookup each time, so it's better
2450 // to pull nonlocals_[grow] out of the loop.
2451 std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2452 for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2453 // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2454 // order to avoid duplicates. globalAssemble() sorts these
2455 // anyway.
2456 nonlocalRow.push_back(gblColInds[k]);
2457 }
2458}
2459
2460template <class LocalOrdinal, class GlobalOrdinal, class Node>
2463 const char tfecfFuncName[] = "removeLocalIndices: ";
2465 !isFillActive(), std::runtime_error, "requires that fill is active.");
2467 isStorageOptimized(), std::runtime_error,
2468 "cannot remove indices after optimizeStorage() has been called.");
2470 isGloballyIndexed(), std::runtime_error, "graph indices are global.");
2472 !rowMap_->isNodeLocalElement(lrow), std::runtime_error,
2473 "Local row " << lrow << " is not in the row Map on the calling process.");
2474 if (!indicesAreAllocated()) {
2475 allocateIndices(LocalIndices, verbose_);
2476 }
2477
2478 if (k_numRowEntries_.extent(0) != 0) {
2479 this->k_numRowEntries_(lrow) = 0;
2480 }
2481
2482 if (debug_) {
2483 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(getNumEntriesInLocalRow(lrow) != 0 ||
2484 !indicesAreAllocated() ||
2485 !isLocallyIndexed(),
2486 std::logic_error,
2487 "Violated stated post-conditions. Please contact Tpetra team.");
2488 }
2489}
2490
2491template <class LocalOrdinal, class GlobalOrdinal, class Node>
2493 setAllIndices(const typename local_graph_device_type::row_map_type& rowPointers,
2494 const typename local_graph_device_type::entries_type::non_const_type& columnIndices) {
2495 using ProfilingRegion = Details::ProfilingRegion;
2496 ProfilingRegion region("Tpetra::CrsGraph::setAllIndices");
2497 const char tfecfFuncName[] = "setAllIndices: ";
2499 !hasColMap() || getColMap().is_null(), std::runtime_error,
2500 "The graph must have a column Map before you may call this method.");
2501 LocalOrdinal numLocalRows = this->getLocalNumRows();
2502 {
2504 if (numLocalRows == 0) {
2506 rowPtrLen != 0 && rowPtrLen != 1,
2507 std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2508 } else {
2510 rowPtrLen != numLocalRows + 1,
2511 std::runtime_error, "rowPointers.size() = " << rowPtrLen << " != this->getLocalNumRows()+1 = " << (numLocalRows + 1) << ".");
2512 }
2513 }
2514
2515 if (debug_) {
2516 using exec_space = typename local_graph_device_type::execution_space;
2517 int columnsOutOfBounds = 0;
2518 local_ordinal_type numLocalCols = this->getLocalNumCols();
2519 Kokkos::parallel_reduce(
2520 Kokkos::RangePolicy<exec_space>(0, columnIndices.extent(0)),
2522 if (columnIndices(i) < 0 || columnIndices(i) >= numLocalCols)
2523 lOutOfBounds++;
2524 },
2526 int globalColsOutOfBounds = 0;
2527 auto comm = this->getComm();
2528 Teuchos::reduceAll<int, int>(*comm, Teuchos::REDUCE_MAX, columnsOutOfBounds,
2529 Teuchos::outArg(globalColsOutOfBounds));
2531 std::string message;
2532 if (columnsOutOfBounds) {
2533 // Only print message from ranks with the problem
2534 message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2535 }
2536 Details::gathervPrint(std::cout, message, *comm);
2537 throw std::invalid_argument("CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2538 }
2539 }
2540
2541 if (debug_ && this->isSorted()) {
2542 // Verify that the local indices are actually sorted
2543 int notSorted = 0;
2544 using exec_space = typename local_graph_device_type::execution_space;
2545 using size_type = typename local_graph_device_type::size_type;
2546 Kokkos::parallel_reduce(
2547 Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2548 KOKKOS_LAMBDA(const LocalOrdinal i, int& lNotSorted) {
2549 size_type rowBegin = rowPointers(i);
2550 size_type rowEnd = rowPointers(i + 1);
2551 for (size_type j = rowBegin + 1; j < rowEnd; j++) {
2552 if (columnIndices(j - 1) > columnIndices(j)) {
2553 lNotSorted = 1;
2554 }
2555 }
2556 },
2557 notSorted);
2558 // All-reduce notSorted to avoid rank divergence
2559 int globalNotSorted = 0;
2560 auto comm = this->getComm();
2561 Teuchos::reduceAll<int, int>(*comm, Teuchos::REDUCE_MAX, notSorted,
2562 Teuchos::outArg(globalNotSorted));
2563 if (globalNotSorted) {
2564 std::string message;
2565 if (notSorted) {
2566 // Only print message from ranks with the problem
2567 message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2568 }
2569 Details::gathervPrint(std::cout, message, *comm);
2570 throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2571 }
2572 }
2573
2574 indicesAreAllocated_ = true;
2575 indicesAreLocal_ = true;
2576 indicesAreSorted_ = true;
2577 noRedundancies_ = true;
2578 lclIndsPacked_wdv = local_inds_wdv_type(columnIndices);
2579 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
2580 setRowPtrs(rowPointers);
2581
2582 set_need_sync_host_uvm_access(); // columnIndices and rowPointers potentially still in a kernel
2583
2584 // Storage MUST be packed, since the interface doesn't give any
2585 // way to indicate any extra space at the end of each row.
2586 storageStatus_ = Details::STORAGE_1D_PACKED;
2587
2588 // These normally get cleared out at the end of allocateIndices.
2589 // It makes sense to clear them out here, because at the end of
2590 // this method, the graph is allocated on the calling process.
2591 numAllocForAllRows_ = 0;
2592 k_numAllocPerRow_ = decltype(k_numAllocPerRow_)();
2593
2594 checkInternalState();
2595}
2596
2597template <class LocalOrdinal, class GlobalOrdinal, class Node>
2599 setAllIndices(const Teuchos::ArrayRCP<size_t>& rowPointers,
2600 const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices) {
2601 using Kokkos::View;
2602 typedef typename local_graph_device_type::row_map_type row_map_type;
2603 typedef typename row_map_type::array_layout layout_type;
2604 typedef typename row_map_type::non_const_value_type row_offset_type;
2605 typedef View<size_t*, layout_type, Kokkos::HostSpace,
2606 Kokkos::MemoryUnmanaged>
2607 input_view_type;
2608 typedef typename row_map_type::non_const_type nc_row_map_type;
2609
2610 const size_t size = static_cast<size_t>(rowPointers.size());
2611 constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2612 input_view_type ptr_in(rowPointers.getRawPtr(), size);
2613
2614 nc_row_map_type ptr_rot("Tpetra::CrsGraph::ptr", size);
2615
2616 if constexpr (same) { // size_t == row_offset_type
2617 using lexecution_space = typename device_type::execution_space;
2618 Kokkos::deep_copy(lexecution_space(),
2619 ptr_rot,
2620 ptr_in);
2621 } else { // size_t != row_offset_type
2622 // CudaUvmSpace != HostSpace, so this will be false in that case.
2623 constexpr bool inHostMemory =
2624 std::is_same<typename row_map_type::memory_space,
2625 Kokkos::HostSpace>::value;
2626 if (inHostMemory) {
2627 // Copy (with cast from size_t to row_offset_type, with bounds
2628 // checking if necessary) to ptr_rot.
2630 } else { // Copy input row offsets to device first.
2631 //
2632 // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
2633 // execution space would avoid the double copy.
2634 //
2635 View<size_t*, layout_type, device_type> ptr_st("Tpetra::CrsGraph::ptr", size);
2636
2637 // DEEP_COPY REVIEW - NOT TESTED
2638 Kokkos::deep_copy(ptr_st, ptr_in);
2639 // Copy on device (casting from size_t to row_offset_type,
2640 // with bounds checking if necessary) to ptr_rot. This
2641 // executes in the output View's execution space, which is the
2642 // same as execution_space.
2644 }
2645 }
2646
2647 Kokkos::View<LocalOrdinal*, layout_type, device_type> k_ind =
2648 Kokkos::Compat::getKokkosViewDeepCopy<device_type>(columnIndices());
2649 setAllIndices(ptr_rot, k_ind);
2650}
2651
2652template <class LocalOrdinal, class GlobalOrdinal, class Node>
2655 using std::endl;
2656 using Teuchos::Comm;
2657 using Teuchos::outArg;
2658 using Teuchos::RCP;
2659 using Teuchos::rcp;
2660 using Teuchos::REDUCE_MAX;
2661 using Teuchos::REDUCE_MIN;
2662 using Teuchos::reduceAll;
2663 using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
2664 using LO = local_ordinal_type;
2665 using GO = global_ordinal_type;
2666 using size_type = typename Teuchos::Array<GO>::size_type;
2667
2668 const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
2669
2670 Details::ProfilingRegion regionGA("Tpetra::CrsGraph::globalAssemble");
2671
2672 std::unique_ptr<std::string> prefix;
2673 if (verbose_) {
2674 prefix = this->createPrefix("CrsGraph", "globalAssemble");
2675 std::ostringstream os;
2676 os << *prefix << "Start" << endl;
2677 std::cerr << os.str();
2678 }
2679 RCP<const Comm<int>> comm = getComm();
2680
2681 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive(), std::runtime_error,
2682 "Fill must be active before "
2683 "you may call this method.");
2684
2685 const size_t myNumNonlocalRows = this->nonlocals_.size();
2686
2687 // If no processes have nonlocal rows, then we don't have to do
2688 // anything. Checking this is probably cheaper than constructing
2689 // the Map of nonlocal rows (see below) and noticing that it has
2690 // zero global entries.
2691 {
2692 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
2693 int someoneHasNonlocalRows = 0;
2696 if (someoneHasNonlocalRows == 0) {
2697 if (verbose_) {
2698 std::ostringstream os;
2699 os << *prefix << "Done: No nonlocal rows" << endl;
2700 std::cerr << os.str();
2701 }
2702 return;
2703 } else if (verbose_) {
2704 std::ostringstream os;
2705 os << *prefix << "At least 1 process has nonlocal rows"
2706 << endl;
2707 std::cerr << os.str();
2708 }
2709 }
2710
2711 // 1. Create a list of the "nonlocal" rows on each process. this
2712 // requires iterating over nonlocals_, so while we do this,
2713 // deduplicate the entries and get a count for each nonlocal
2714 // row on this process.
2715 // 2. Construct a new row Map corresponding to those rows. This
2716 // Map is likely overlapping. We know that the Map is not
2717 // empty on all processes, because the above all-reduce and
2718 // return exclude that case.
2719
2721 // Keep this for CrsGraph's constructor.
2722 Teuchos::Array<size_t> numEntPerNonlocalRow(myNumNonlocalRows);
2723 {
2724 Teuchos::Array<GO> myNonlocalGblRows(myNumNonlocalRows);
2725 size_type curPos = 0;
2726 for (auto mapIter = this->nonlocals_.begin();
2727 mapIter != this->nonlocals_.end();
2728 ++mapIter, ++curPos) {
2730 std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
2731 std::sort(gblCols.begin(), gblCols.end());
2732 auto vecLast = std::unique(gblCols.begin(), gblCols.end());
2733 gblCols.erase(vecLast, gblCols.end());
2735 }
2736
2737 // Currently, Map requires that its indexBase be the global min
2738 // of all its global indices. Map won't compute this for us, so
2739 // we must do it. If our process has no nonlocal rows, set the
2740 // "min" to the max possible GO value. This ensures that if
2741 // some process has at least one nonlocal row, then it will pick
2742 // that up as the min. We know that at least one process has a
2743 // nonlocal row, since the all-reduce and return at the top of
2744 // this method excluded that case.
2745 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max();
2746 {
2747 auto iter = std::min_element(myNonlocalGblRows.begin(),
2748 myNonlocalGblRows.end());
2749 if (iter != myNonlocalGblRows.end()) {
2751 }
2752 }
2753 GO gblMinNonlocalGblRow = 0;
2757 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
2759 }
2760
2761 if (verbose_) {
2762 std::ostringstream os;
2763 os << *prefix << "nonlocalRowMap->getIndexBase()="
2764 << nonlocalRowMap->getIndexBase() << endl;
2765 std::cerr << os.str();
2766 }
2767
2768 // 3. Use the column indices for each nonlocal row, as stored in
2769 // nonlocals_, to construct a CrsGraph corresponding to
2770 // nonlocal rows. We need, but we have, exact counts of the
2771 // number of entries in each nonlocal row.
2772
2774 rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow()));
2775 {
2776 size_type curPos = 0;
2777 for (auto mapIter = this->nonlocals_.begin();
2778 mapIter != this->nonlocals_.end();
2779 ++mapIter, ++curPos) {
2780 const GO gblRow = mapIter->first;
2781 std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
2782 const LO numEnt = static_cast<LO>(numEntPerNonlocalRow[curPos]);
2783 nonlocalGraph->insertGlobalIndices(gblRow, numEnt, gblCols.data());
2784 }
2785 }
2786 if (verbose_) {
2787 std::ostringstream os;
2788 os << *prefix << "Built nonlocal graph" << endl;
2789 std::cerr << os.str();
2790 }
2791 // There's no need to fill-complete the nonlocals graph.
2792 // We just use it as a temporary container for the Export.
2793
2794 // 4. If the original row Map is one to one, then we can Export
2795 // directly from nonlocalGraph into this. Otherwise, we have
2796 // to create a temporary graph with a one-to-one row Map,
2797 // Export into that, then Import from the temporary graph into
2798 // *this.
2799
2800 auto origRowMap = this->getRowMap();
2801 const bool origRowMapIsOneToOne = origRowMap->isOneToOne();
2802
2804 if (verbose_) {
2805 std::ostringstream os;
2806 os << *prefix << "Original row Map is 1-to-1" << endl;
2807 std::cerr << os.str();
2808 }
2810 this->doExport(*nonlocalGraph, exportToOrig, Tpetra::INSERT);
2811 // We're done at this point!
2812 } else {
2813 if (verbose_) {
2814 std::ostringstream os;
2815 os << *prefix << "Original row Map is NOT 1-to-1" << endl;
2816 std::cerr << os.str();
2817 }
2818 // If you ask a Map whether it is one to one, it does some
2819 // communication and stashes intermediate results for later use
2820 // by createOneToOne. Thus, calling createOneToOne doesn't cost
2821 // much more then the original cost of calling isOneToOne.
2824
2825 // Create a temporary graph with the one-to-one row Map.
2826 //
2827 // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
2828 // row, to avoid reallocation during the Export operation.
2829 crs_graph_type oneToOneGraph(oneToOneRowMap, 0);
2830
2831 // Export from graph of nonlocals into the temp one-to-one graph.
2832 if (verbose_) {
2833 std::ostringstream os;
2834 os << *prefix << "Export nonlocal graph" << endl;
2835 std::cerr << os.str();
2836 }
2838
2839 // We don't need the graph of nonlocals anymore, so get rid of
2840 // it, to keep the memory high-water mark down.
2841 nonlocalGraph = Teuchos::null;
2842
2843 // Import from the one-to-one graph to the original graph.
2845 if (verbose_) {
2846 std::ostringstream os;
2847 os << *prefix << "Import nonlocal graph" << endl;
2848 std::cerr << os.str();
2849 }
2850 this->doImport(oneToOneGraph, importToOrig, Tpetra::INSERT);
2851 }
2852
2853 // It's safe now to clear out nonlocals_, since we've already
2854 // committed side effects to *this. The standard idiom for
2855 // clearing a Container like std::map, is to swap it with an empty
2856 // Container and let the swapped Container fall out of scope.
2857 decltype(this->nonlocals_) newNonlocals;
2858 std::swap(this->nonlocals_, newNonlocals);
2859
2860 checkInternalState();
2861 if (verbose_) {
2862 std::ostringstream os;
2863 os << *prefix << "Done" << endl;
2864 std::cerr << os.str();
2865 }
2866}
2867
2868template <class LocalOrdinal, class GlobalOrdinal, class Node>
2870 resumeFill(const Teuchos::RCP<Teuchos::ParameterList>& params) {
2871 clearGlobalConstants();
2872 if (params != Teuchos::null) this->setParameterList(params);
2873 // either still sorted/merged or initially sorted/merged
2874 indicesAreSorted_ = true;
2875 noRedundancies_ = true;
2876 fillComplete_ = false;
2877}
2878
2879template <class LocalOrdinal, class GlobalOrdinal, class Node>
2881 fillComplete(const Teuchos::RCP<Teuchos::ParameterList>& params) {
2882 // If the graph already has domain and range Maps, don't clobber
2883 // them. If it doesn't, use the current row Map for both the
2884 // domain and range Maps.
2885 //
2886 // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
2887 // column Map, and column indices are inserted which are not in
2888 // the row Map on any process, this will cause troubles. However,
2889 // that is not a common case for most applications that we
2890 // encounter, and checking for it might require more
2891 // communication.
2892 Teuchos::RCP<const map_type> domMap = this->getDomainMap();
2893 if (domMap.is_null()) {
2894 domMap = this->getRowMap();
2895 }
2896 Teuchos::RCP<const map_type> ranMap = this->getRangeMap();
2897 if (ranMap.is_null()) {
2898 ranMap = this->getRowMap();
2899 }
2900 this->fillComplete(domMap, ranMap, params);
2901}
2902
2903template <class LocalOrdinal, class GlobalOrdinal, class Node>
2905 fillComplete(const Teuchos::RCP<const map_type>& domainMap,
2906 const Teuchos::RCP<const map_type>& rangeMap,
2907 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2908 using std::endl;
2909
2910 const char tfecfFuncName[] = "fillComplete: ";
2911 const bool verbose = verbose_;
2912
2913 Details::ProfilingRegion regionFC("Tpetra::CrsGraph::fillComplete");
2914
2915 std::unique_ptr<std::string> prefix;
2916 if (verbose) {
2917 prefix = this->createPrefix("CrsGraph", "fillComplete");
2918 std::ostringstream os;
2919 os << *prefix << "Start" << endl;
2920 std::cerr << os.str();
2921 }
2922
2923 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive() || isFillComplete(), std::runtime_error,
2924 "Graph fill state must be active (isFillActive() "
2925 "must be true) before calling fillComplete().");
2926
2927 const int numProcs = getComm()->getSize();
2928
2929 //
2930 // Read and set parameters
2931 //
2932
2933 // Does the caller want to sort remote GIDs (within those owned by
2934 // the same process) in makeColMap()?
2935 if (!params.is_null()) {
2936 if (params->isParameter("sort column map ghost gids")) {
2937 sortGhostsAssociatedWithEachProcessor_ =
2938 params->get<bool>("sort column map ghost gids",
2939 sortGhostsAssociatedWithEachProcessor_);
2940 } else if (params->isParameter("Sort column Map ghost GIDs")) {
2941 sortGhostsAssociatedWithEachProcessor_ =
2942 params->get<bool>("Sort column Map ghost GIDs",
2943 sortGhostsAssociatedWithEachProcessor_);
2944 }
2945 }
2946
2947 // If true, the caller promises that no process did nonlocal
2948 // changes since the last call to fillComplete.
2949 bool assertNoNonlocalInserts = false;
2950 if (!params.is_null()) {
2952 params->get<bool>("No Nonlocal Changes", assertNoNonlocalInserts);
2953 }
2954
2955 //
2956 // Allocate indices, if they haven't already been allocated
2957 //
2958 if (!indicesAreAllocated()) {
2959 if (hasColMap()) {
2960 // We have a column Map, so use local indices.
2961 allocateIndices(LocalIndices, verbose);
2962 } else {
2963 // We don't have a column Map, so use global indices.
2964 allocateIndices(GlobalIndices, verbose);
2965 }
2966 }
2967
2968 //
2969 // Do global assembly, if requested and if the communicator
2970 // contains more than one process.
2971 //
2974 // This first checks if we need to do global assembly.
2975 // The check costs a single all-reduce.
2976 globalAssemble();
2977 } else {
2978 const size_t numNonlocals = nonlocals_.size();
2979 if (verbose) {
2980 std::ostringstream os;
2981 os << *prefix << "Do not need to call globalAssemble; "
2982 "assertNoNonlocalInserts="
2983 << (assertNoNonlocalInserts ? "true" : "false")
2984 << "numProcs=" << numProcs
2985 << ", nonlocals_.size()=" << numNonlocals << endl;
2986 std::cerr << os.str();
2987 }
2988 const int lclNeededGlobalAssemble =
2989 (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
2990 if (lclNeededGlobalAssemble != 0 && verbose) {
2991 std::ostringstream os;
2992 os << *prefix;
2993 Details::Impl::verbosePrintMap(
2994 os, nonlocals_.begin(), nonlocals_.end(),
2995 nonlocals_.size(), "nonlocals_");
2996 std::cerr << os.str() << endl;
2997 }
2998
2999 if (debug_) {
3000 auto map = this->getMap();
3001 auto comm = map.is_null() ? Teuchos::null : map->getComm();
3003 if (!comm.is_null()) {
3004 using Teuchos::REDUCE_MAX;
3005 using Teuchos::reduceAll;
3007 Teuchos::outArg(gblNeededGlobalAssemble));
3008 }
3010 "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3011 "least one process in the CrsGraph's communicator. This "
3012 "means either that you incorrectly set the "
3013 "\"No Nonlocal Changes\" fillComplete parameter to true, "
3014 "or that you inserted invalid entries. "
3015 "Rerun with the environment variable TPETRA_VERBOSE="
3016 "CrsGraph set to see the entries of nonlocals_ on every "
3017 "MPI process (WARNING: lots of output).");
3018 } else {
3020 "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3021 "calling process. This means either that you incorrectly "
3022 "set the \"No Nonlocal Changes\" fillComplete parameter "
3023 "to true, or that you inserted invalid entries. "
3024 "Rerun with the environment "
3025 "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3026 "of nonlocals_ on every MPI process (WARNING: lots of "
3027 "output).");
3028 }
3029 }
3030
3031 // Set domain and range Map. This may clear the Import / Export
3032 // objects if the new Maps differ from any old ones.
3033 setDomainRangeMaps(domainMap, rangeMap);
3034
3035 // If the graph does not already have a column Map (either from
3036 // the user constructor calling the version of the constructor
3037 // that takes a column Map, or from a previous fillComplete call),
3038 // then create it.
3039 Teuchos::Array<int> remotePIDs(0);
3040 const bool mustBuildColMap = !this->hasColMap();
3041 if (mustBuildColMap) {
3042 this->makeColMap(remotePIDs); // resized on output
3043 }
3044
3045 // Make indices local, if they aren't already.
3046 // The method doesn't do any work if the indices are already local.
3047 const std::pair<size_t, std::string> makeIndicesLocalResult =
3048 this->makeIndicesLocal(verbose);
3049
3050 if (debug_) {
3052 using Teuchos::outArg;
3053 using Teuchos::RCP;
3054 using Teuchos::REDUCE_MIN;
3055 using Teuchos::reduceAll;
3056
3057 RCP<const map_type> map = this->getMap();
3059 if (!map.is_null()) {
3060 comm = map->getComm();
3061 }
3062 if (comm.is_null()) {
3064 makeIndicesLocalResult.second);
3065 } else {
3066 const int lclSuccess = (makeIndicesLocalResult.first == 0);
3067 int gblSuccess = 0; // output argument
3069 if (gblSuccess != 1) {
3070 std::ostringstream os;
3071 gathervPrint(os, makeIndicesLocalResult.second, *comm);
3072 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
3073 }
3074 }
3075 } else {
3076 // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3077 // the error state to makeImportExport or
3078 // computeGlobalConstants, which may do all-reduces and thus may
3079 // have the opportunity to communicate that error state.
3081 makeIndicesLocalResult.second);
3082 }
3083
3084 // If this process has no indices, then CrsGraph considers it
3085 // already trivially sorted and merged. Thus, this method need
3086 // not be called on all processes in the row Map's communicator.
3087 this->sortAndMergeAllIndices(this->isSorted(), this->isMerged());
3088
3089 // Make Import and Export objects, if they haven't been made
3090 // already. If we made a column Map above, reuse information from
3091 // that process to avoid communiation in the Import setup.
3092 this->makeImportExport(remotePIDs, mustBuildColMap);
3093
3094 // Create the KokkosSparse::StaticCrsGraph, if it doesn't already exist.
3095 this->fillLocalGraph(params);
3096
3097 const bool callComputeGlobalConstants = params.get() == nullptr ||
3098 params->get("compute global constants", true);
3100 this->computeGlobalConstants();
3101 } else {
3102 this->computeLocalConstants();
3103 }
3104 this->fillComplete_ = true;
3105 this->checkInternalState();
3106
3107 if (verbose) {
3108 std::ostringstream os;
3109 os << *prefix << "Done" << endl;
3110 std::cerr << os.str();
3111 }
3112}
3113
3114template <class LocalOrdinal, class GlobalOrdinal, class Node>
3116 expertStaticFillComplete(const Teuchos::RCP<const map_type>& domainMap,
3117 const Teuchos::RCP<const map_type>& rangeMap,
3118 const Teuchos::RCP<const import_type>& importer,
3119 const Teuchos::RCP<const export_type>& exporter,
3120 const Teuchos::RCP<Teuchos::ParameterList>& params) {
3121 const char tfecfFuncName[] = "expertStaticFillComplete: ";
3122 Tpetra::Details::ProfilingRegion prESFC("Tpetra::CrsGraph::expertStaticFillComplete");
3123
3125 domainMap.is_null() || rangeMap.is_null(),
3126 std::runtime_error, "The input domain Map and range Map must be nonnull.");
3128 isFillComplete() || !hasColMap(), std::runtime_error,
3129 "You may not "
3130 "call this method unless the graph has a column Map.");
3131 auto rowPtrsUnpackedLength = this->getRowPtrsUnpackedDevice().extent(0);
3133 getLocalNumRows() > 0 && rowPtrsUnpackedLength == 0,
3134 std::runtime_error, "The calling process has getLocalNumRows() = " << getLocalNumRows() << " > 0 rows, but the row offsets array has not "
3135 "been set.");
3137 static_cast<size_t>(rowPtrsUnpackedLength) != getLocalNumRows() + 1,
3138 std::runtime_error, "The row offsets array has length " << rowPtrsUnpackedLength << " != getLocalNumRows()+1 = " << (getLocalNumRows() + 1) << ".");
3139
3140 // Note: We don't need to do the following things which are normally done in fillComplete:
3141 // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3142
3143 // Constants from allocateIndices
3144 //
3145 // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3146 // away once the graph is allocated. expertStaticFillComplete
3147 // either presumes that the graph is allocated, or "allocates" it.
3148 //
3149 // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3150 // version of CrsGraph is to allocate in the constructor, not
3151 // lazily on first insert. That will make both
3152 // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3153 numAllocForAllRows_ = 0;
3154 k_numAllocPerRow_ = decltype(k_numAllocPerRow_)();
3155 indicesAreAllocated_ = true;
3156
3157 // Constants from makeIndicesLocal
3158 //
3159 // The graph has a column Map, so its indices had better be local.
3160 indicesAreLocal_ = true;
3161 indicesAreGlobal_ = false;
3162
3163 // set domain/range map: may clear the import/export objects
3164 {
3165 Tpetra::Details::ProfilingRegion pr("Tpetra ESFC-G-Maps");
3166 setDomainRangeMaps(domainMap, rangeMap);
3167 }
3168
3169 // Presume the user sorted and merged the arrays first
3170 indicesAreSorted_ = true;
3171 noRedundancies_ = true;
3172
3173 // makeImportExport won't create a new importer/exporter if I set one here first.
3174
3175 importer_ = Teuchos::null;
3176 exporter_ = Teuchos::null;
3177 if (importer != Teuchos::null) {
3178 Tpetra::Details::ProfilingRegion pr("Tpetra ESFC-G-mIXcheckI");
3180 !importer->getSourceMap()->isSameAs(*getDomainMap()) ||
3181 !importer->getTargetMap()->isSameAs(*getColMap()),
3182 std::invalid_argument, ": importer does not match matrix maps.");
3183 importer_ = importer;
3184 }
3185
3186 if (exporter != Teuchos::null) {
3187 Tpetra::Details::ProfilingRegion pr("Tpetra ESFC-G-mIXcheckE");
3189 !exporter->getSourceMap()->isSameAs(*getRowMap()) ||
3190 !exporter->getTargetMap()->isSameAs(*getRangeMap()),
3191 std::invalid_argument, ": exporter does not match matrix maps.");
3192 exporter_ = exporter;
3193 }
3194
3195 {
3196 Tpetra::Details::ProfilingRegion pr("Tpetra ESFC-G-mIXmake");
3197 Teuchos::Array<int> remotePIDs(0); // unused output argument
3198 this->makeImportExport(remotePIDs, false);
3199 }
3200
3201 {
3202 Tpetra::Details::ProfilingRegion pr("Tpetra ESFC-G-fLG");
3203 this->fillLocalGraph(params);
3204 }
3205
3206 const bool callComputeGlobalConstants = params.get() == nullptr ||
3207 params->get("compute global constants", true);
3208
3210 Tpetra::Details::ProfilingRegion pr("Tpetra ESFC-G-cGC (const)");
3211 this->computeGlobalConstants();
3212 } else {
3213 Tpetra::Details::ProfilingRegion pr("Tpetra ESFC-G-cGC (noconst)");
3214 this->computeLocalConstants();
3215 }
3216
3217 fillComplete_ = true;
3218
3219 checkInternalState();
3220}
3221
3222template <class LocalOrdinal, class GlobalOrdinal, class Node>
3224 fillLocalGraph(const Teuchos::RCP<Teuchos::ParameterList>& params) {
3225 using ::Tpetra::Details::computeOffsetsFromCounts;
3226 typedef typename local_graph_device_type::row_map_type row_map_type;
3227 typedef typename row_map_type::non_const_type non_const_row_map_type;
3228 typedef typename local_graph_device_type::entries_type::non_const_type lclinds_1d_type;
3229 const char tfecfFuncName[] =
3230 "fillLocalGraph (called from fillComplete or "
3231 "expertStaticFillComplete): ";
3232 const size_t lclNumRows = this->getLocalNumRows();
3233
3234 Details::ProfilingRegion regionFLG("Tpetra::CrsGraph::fillLocalGraph");
3235
3236 // This method's goal is to fill in the two arrays (compressed
3237 // sparse row format) that define the sparse graph's structure.
3238
3239 bool requestOptimizedStorage = true;
3240 if (!params.is_null() && !params->get("Optimize Storage", true)) {
3242 }
3243
3244 // The graph's column indices are currently stored in a 1-D
3245 // format, with row offsets in rowPtrsUnpacked_host_ and local column indices
3246 // in k_lclInds1D_.
3247
3248 if (debug_) {
3249 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3250 // The graph's array of row offsets must already be allocated.
3251 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked.extent(0) == 0, std::logic_error,
3252 "rowPtrsUnpacked_host_ has size zero, but shouldn't");
3253 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked.extent(0) != lclNumRows + 1, std::logic_error,
3254 "rowPtrsUnpacked_host_.extent(0) = "
3255 << rowPtrsUnpacked.extent(0) << " != (lclNumRows + 1) = "
3256 << (lclNumRows + 1) << ".");
3257 const size_t numOffsets = rowPtrsUnpacked.extent(0);
3258 const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3259 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numOffsets != 0 &&
3260 lclIndsUnpacked_wdv.extent(0) != valToCheck,
3261 std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3262 " and lclIndsUnpacked_wdv.extent(0)="
3263 << lclIndsUnpacked_wdv.extent(0) << " != rowPtrsUnpacked_host_(" << numOffsets << ")=" << valToCheck << ".");
3264 }
3265
3266 size_t allocSize = 0;
3267 try {
3268 allocSize = this->getLocalAllocationSize();
3269 } catch (std::logic_error& e) {
3270 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
3271 "getLocalAllocationSize threw "
3272 "std::logic_error: "
3273 << e.what());
3274 } catch (std::runtime_error& e) {
3275 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
3276 "getLocalAllocationSize threw "
3277 "std::runtime_error: "
3278 << e.what());
3279 } catch (std::exception& e) {
3280 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
3281 "getLocalAllocationSize threw "
3282 "std::exception: "
3283 << e.what());
3284 } catch (...) {
3285 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
3286 "getLocalAllocationSize threw "
3287 "an exception not a subclass of std::exception.");
3288 }
3289
3290 if (this->getLocalNumEntries() != allocSize) {
3291 // Use the nonconst version of row_map_type for ptr_d, because
3292 // the latter is const and we need to modify ptr_d here.
3293 non_const_row_map_type ptr_d;
3294 row_map_type ptr_d_const;
3295
3296 // The graph's current 1-D storage is "unpacked." This means
3297 // the row offsets may differ from what the final row offsets
3298 // should be. This could happen, for example, if the user set
3299 // an upper bound on the number of entries in each row, but
3300 // didn't fill all those entries.
3301
3302 if (debug_) {
3303 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3304 if (rowPtrsUnpacked.extent(0) != 0) {
3305 const size_t numOffsets =
3306 static_cast<size_t>(rowPtrsUnpacked.extent(0));
3307 const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3308 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != size_t(lclIndsUnpacked_wdv.extent(0)),
3309 std::logic_error,
3310 "(Unpacked branch) Before allocating "
3311 "or packing, k_rowPtrs_("
3312 << (numOffsets - 1) << ")="
3313 << valToCheck << " != lclIndsUnpacked_wdv.extent(0)="
3314 << lclIndsUnpacked_wdv.extent(0) << ".");
3315 }
3316 }
3317
3318 // Pack the row offsets into ptr_d, by doing a sum-scan of the
3319 // array of valid entry counts per row (k_numRowEntries_).
3320
3321 // Total number of entries in the matrix on the calling
3322 // process. We will compute this in the loop below. It's
3323 // cheap to compute and useful as a sanity check.
3324 size_t lclTotalNumEntries = 0;
3325 {
3326 // Allocate the packed row offsets array.
3327 ptr_d =
3328 non_const_row_map_type("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3329 ptr_d_const = ptr_d;
3330
3331 // It's ok that k_numRowEntries_ is a host View; the
3332 // function can handle this.
3333 typename num_row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3334 if (debug_) {
3335 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(size_t(numRowEnt_h.extent(0)) != lclNumRows,
3336 std::logic_error,
3337 "(Unpacked branch) "
3338 "numRowEnt_h.extent(0)="
3339 << numRowEnt_h.extent(0)
3340 << " != getLocalNumRows()=" << lclNumRows << "");
3341 }
3342
3343 lclTotalNumEntries = computeOffsetsFromCounts(ptr_d, numRowEnt_h);
3344
3345 if (debug_) {
3346 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(ptr_d.extent(0)) != lclNumRows + 1,
3347 std::logic_error,
3348 "(Unpacked branch) After allocating "
3349 "ptr_d, ptr_d.extent(0) = "
3350 << ptr_d.extent(0)
3351 << " != lclNumRows+1 = " << (lclNumRows + 1) << ".");
3352 const auto valToCheck =
3353 ::Tpetra::Details::getEntryOnHost(ptr_d, lclNumRows);
3354 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != lclTotalNumEntries, std::logic_error,
3355 "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3356 "after filling ptr_d, ptr_d(lclNumRows="
3357 << lclNumRows
3358 << ") = " << valToCheck << " != total number of entries "
3359 "on the calling process = "
3360 << lclTotalNumEntries
3361 << ".");
3362 }
3363 }
3364
3365 // Allocate the array of packed column indices.
3366 lclinds_1d_type ind_d =
3367 lclinds_1d_type("Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3368
3369 // k_rowPtrs_ and lclIndsUnpacked_wdv are currently unpacked. Pack
3370 // them, using the packed row offsets array ptr_d that we
3371 // created above.
3372 //
3373 // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3374 // CrsMatrix?), we need to keep around the unpacked row
3375 // offsets and column indices.
3376
3377 // Pack the column indices from unpacked lclIndsUnpacked_wdv into
3378 // packed ind_d. We will replace lclIndsUnpacked_wdv below.
3379 typedef pack_functor<
3380 typename local_graph_device_type::entries_type::non_const_type,
3381 typename local_inds_dualv_type::t_dev::const_type,
3382 row_map_type,
3383 typename local_graph_device_type::row_map_type>
3384 inds_packer_type;
3385 inds_packer_type f(ind_d,
3386 lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3387 ptr_d, this->getRowPtrsUnpackedDevice());
3388 {
3389 typedef typename decltype(ind_d)::execution_space exec_space;
3390 typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3391 Kokkos::parallel_for(range_type(0, lclNumRows), f);
3392 }
3393
3394 if (debug_) {
3395 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(ptr_d.extent(0) == 0, std::logic_error,
3396 "(\"Optimize Storage\"=true branch) After packing, "
3397 "ptr_d.extent(0)=0.");
3398 if (ptr_d.extent(0) != 0) {
3399 const size_t numOffsets = static_cast<size_t>(ptr_d.extent(0));
3400 const auto valToCheck =
3401 ::Tpetra::Details::getEntryOnHost(ptr_d, numOffsets - 1);
3402 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(valToCheck) != ind_d.extent(0),
3403 std::logic_error,
3404 "(\"Optimize Storage\"=true branch) "
3405 "After packing, ptr_d("
3406 << (numOffsets - 1) << ")="
3407 << valToCheck << " != ind_d.extent(0)="
3408 << ind_d.extent(0) << ".");
3409 }
3410 }
3411 // Build the local graph.
3412 if (requestOptimizedStorage)
3413 setRowPtrs(ptr_d_const);
3414 else
3415 setRowPtrsPacked(ptr_d_const);
3416 lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3417 } else { // We don't have to pack, so just set the pointers.
3418 // Set both packed and unpacked rowptrs to this
3419 this->setRowPtrs(rowPtrsUnpacked_dev_);
3420 lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3421
3422 if (debug_) {
3423 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3424 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3425 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsPacked_dev.extent(0) == 0, std::logic_error,
3426 "(\"Optimize Storage\"=false branch) "
3427 "rowPtrsPacked_dev_.extent(0) = 0.");
3428 if (rowPtrsPacked_dev.extent(0) != 0) {
3429 const size_t numOffsets =
3430 static_cast<size_t>(rowPtrsPacked_dev.extent(0));
3431 const size_t valToCheck =
3432 rowPtrsPacked_host(numOffsets - 1);
3433 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != size_t(lclIndsPacked_wdv.extent(0)),
3434 std::logic_error,
3435 "(\"Optimize Storage\"=false branch) "
3436 "rowPtrsPacked_dev_("
3437 << (numOffsets - 1) << ")="
3438 << valToCheck
3439 << " != lclIndsPacked_wdv.extent(0)="
3440 << lclIndsPacked_wdv.extent(0) << ".");
3441 }
3442 }
3443 }
3444
3445 if (debug_) {
3446 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3447 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3448 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(rowPtrsPacked_dev.extent(0)) != lclNumRows + 1,
3449 std::logic_error, "After packing, rowPtrsPacked_dev_.extent(0) = " << rowPtrsPacked_dev.extent(0) << " != lclNumRows+1 = " << (lclNumRows + 1) << ".");
3450 if (rowPtrsPacked_dev.extent(0) != 0) {
3451 const size_t numOffsets = static_cast<size_t>(rowPtrsPacked_dev.extent(0));
3452 const auto valToCheck = rowPtrsPacked_host(numOffsets - 1);
3453 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(valToCheck) != lclIndsPacked_wdv.extent(0),
3454 std::logic_error, "After packing, rowPtrsPacked_dev_(" << (numOffsets - 1) << ") = " << valToCheck << " != lclIndsPacked_wdv.extent(0) = " << lclIndsPacked_wdv.extent(0) << ".");
3455 }
3456 }
3457
3458 if (requestOptimizedStorage) {
3459 // With optimized storage, we don't need to store
3460 // the array of row entry counts.
3461
3462 // Free graph data structures that are only needed for
3463 // unpacked 1-D storage.
3464 k_numRowEntries_ = num_row_entries_type();
3465
3466 // Keep the new 1-D packed allocations.
3467 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3468
3469 storageStatus_ = Details::STORAGE_1D_PACKED;
3470 }
3471
3472 set_need_sync_host_uvm_access(); // make sure kernel setup of indices is fenced before a host access
3473}
3474
3475template <class LocalOrdinal, class GlobalOrdinal, class Node>
3477 replaceColMap(const Teuchos::RCP<const map_type>& newColMap) {
3478 // NOTE: This safety check matches the code, but not the documentation of Crsgraph
3479 //
3480 // FIXME (mfh 18 Aug 2014) This will break if the calling process
3481 // has no entries, because in that case, currently it is neither
3482 // locally nor globally indexed. This will change once we get rid
3483 // of lazy allocation (so that the constructor allocates indices
3484 // and therefore commits to local vs. global).
3485 const char tfecfFuncName[] = "replaceColMap: ";
3487 isLocallyIndexed() || isGloballyIndexed(), std::runtime_error,
3488 "Requires matching maps and non-static graph.");
3489 colMap_ = newColMap;
3490}
3491
3492template <class LocalOrdinal, class GlobalOrdinal, class Node>
3494 reindexColumns(const Teuchos::RCP<const map_type>& newColMap,
3495 const Teuchos::RCP<const import_type>& newImport,
3496 const bool sortIndicesInEachRow) {
3497 using Teuchos::RCP;
3498 using Teuchos::REDUCE_MIN;
3499 using Teuchos::reduceAll;
3500 typedef GlobalOrdinal GO;
3501 typedef LocalOrdinal LO;
3502 using col_inds_type_dev = typename local_inds_dualv_type::t_dev;
3503 const char tfecfFuncName[] = "reindexColumns: ";
3504
3506 isFillComplete(), std::runtime_error,
3507 "The graph is fill complete "
3508 "(isFillComplete() returns true). You must call resumeFill() before "
3509 "you may call this method.");
3510
3511 // mfh 19 Aug 2014: This method does NOT redistribute data; it
3512 // doesn't claim to do the work of an Import or Export. This
3513 // means that for all processes, the calling process MUST own all
3514 // column indices, in both the old column Map (if it exists) and
3515 // the new column Map. We check this via an all-reduce.
3516 //
3517 // Some processes may be globally indexed, others may be locally
3518 // indexed, and others (that have no graph entries) may be
3519 // neither. This method will NOT change the graph's current
3520 // state. If it's locally indexed, it will stay that way, and
3521 // vice versa. It would easy to add an option to convert indices
3522 // from global to local, so as to save a global-to-local
3523 // conversion pass. However, we don't do this here. The intended
3524 // typical use case is that the graph already has a column Map and
3525 // is locally indexed, and this is the case for which we optimize.
3526
3527 const LO lclNumRows = static_cast<LO>(this->getLocalNumRows());
3528
3529 // Attempt to convert indices to the new column Map's version of
3530 // local. This will fail if on the calling process, the graph has
3531 // indices that are not on that process in the new column Map.
3532 // After the local conversion attempt, we will do an all-reduce to
3533 // see if any processes failed.
3534
3535 // If this is false, then either the graph contains a column index
3536 // which is invalid in the CURRENT column Map, or the graph is
3537 // locally indexed but currently has no column Map. In either
3538 // case, there is no way to convert the current local indices into
3539 // global indices, so that we can convert them into the new column
3540 // Map's local indices. It's possible for this to be true on some
3541 // processes but not others, due to replaceColMap.
3542 bool allCurColIndsValid = true;
3543 // On the calling process, are all valid current column indices
3544 // also in the new column Map on the calling process? In other
3545 // words, does local reindexing suffice, or should the user have
3546 // done an Import or Export instead?
3547 bool localSuffices = true;
3548
3549 {
3550 // Final arrays for the local indices. We will allocate exactly
3551 // one of these ONLY if the graph is locally indexed on the
3552 // calling process, and ONLY if the graph has one or more entries
3553 // (is not empty) on the calling process. In that case, we
3554 // allocate the first (1-D storage) if the graph has a static
3555 // profile, else we allocate the second (2-D storage).
3557
3558 // If indices aren't allocated, that means the calling process
3559 // owns no entries in the graph. Thus, there is nothing to
3560 // convert, and it trivially succeeds locally.
3561 if (indicesAreAllocated()) {
3562 if (isLocallyIndexed()) {
3563 if (hasColMap()) { // locally indexed, and currently has a column Map
3564 const map_type& oldColMap = *(getColMap());
3565
3566 // Allocate storage for the new local indices.
3567 const size_t allocSize = this->getLocalAllocationSize();
3568 auto oldLclInds1D = lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly);
3569 newLclInds1D_dev = col_inds_type_dev("Tpetra::CrsGraph::lclIndsReindexed",
3570 allocSize);
3571 auto oldLclColMap = oldColMap.getLocalMap();
3572 auto newLclColMap = newColMap->getLocalMap();
3573
3574 const auto LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
3575 const auto GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
3576
3577 const int NOT_ALL_LOCAL_INDICES_ARE_VALID = 1;
3578 const int LOCAL_DOES_NOT_SUFFICE = 2;
3579 int errorStatus = 0;
3580 Kokkos::parallel_reduce(
3581 "Tpetra::CrsGraph::reindexColumns",
3582 Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, allocSize),
3583 KOKKOS_LAMBDA(const LocalOrdinal k, int& result) {
3585 if (oldLclCol == LO_INVALID) {
3587 } else {
3588 const GO gblCol = oldLclColMap.getGlobalElement(oldLclCol);
3589 if (gblCol == GO_INVALID) {
3591 } else {
3592 const LocalOrdinal newLclCol = newLclColMap.getLocalElement(gblCol);
3593 if (newLclCol == LO_INVALID) {
3595 } else {
3597 }
3598 }
3599 }
3600 },
3601 Kokkos::LOr<int>(errorStatus));
3604 } else { // locally indexed, but no column Map
3605 // This case is only possible if replaceColMap() was called
3606 // with a null argument on the calling process. It's
3607 // possible, but it means that this method can't possibly
3608 // succeed, since we have no way of knowing how to convert
3609 // the current local indices to global indices.
3610 allCurColIndsValid = false;
3611 }
3612 } else { // globally indexed
3613 // If the graph is globally indexed, we don't need to save
3614 // local indices, but we _do_ need to know whether the current
3615 // global indices are valid in the new column Map. We may
3616 // need to do a getRemoteIndexList call to find this out.
3617 //
3618 // In this case, it doesn't matter whether the graph currently
3619 // has a column Map. We don't need the old column Map to
3620 // convert from global indices to the _new_ column Map's local
3621 // indices. Furthermore, we can use the same code, whether
3622 // the graph is static or dynamic profile.
3623
3624 // Test whether the current global indices are in the new
3625 // column Map on the calling process.
3626 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3627 const RowInfo rowInfo = this->getRowInfo(lclRow);
3628 auto oldGblRowView = this->getGlobalIndsViewHost(rowInfo);
3629 for (size_t k = 0; k < rowInfo.numEntries; ++k) {
3630 const GO gblCol = oldGblRowView(k);
3631 if (!newColMap->isNodeGlobalElement(gblCol)) {
3632 localSuffices = false;
3633 break; // Stop at the first invalid index
3634 }
3635 } // for each entry in the current row
3636 } // for each locally owned row
3637 } // locally or globally indexed
3638 } // whether indices are allocated
3639
3640 // Do an all-reduce to check both possible error conditions.
3641 int lclSuccess[2];
3642 lclSuccess[0] = allCurColIndsValid ? 1 : 0;
3643 lclSuccess[1] = localSuffices ? 1 : 0;
3644 int gblSuccess[2];
3645 gblSuccess[0] = 0;
3646 gblSuccess[1] = 0;
3648 getRowMap().is_null() ? Teuchos::null : getRowMap()->getComm();
3649 if (!comm.is_null()) {
3651 }
3652
3654 gblSuccess[0] == 0, std::runtime_error,
3655 "It is not possible to continue."
3656 " The most likely reason is that the graph is locally indexed, but the "
3657 "column Map is missing (null) on some processes, due to a previous call "
3658 "to replaceColMap().");
3659
3661 gblSuccess[1] == 0, std::runtime_error,
3662 "On some process, the graph "
3663 "contains column indices that are in the old column Map, but not in the "
3664 "new column Map (on that process). This method does NOT redistribute "
3665 "data; it does not claim to do the work of an Import or Export operation."
3666 " This means that for all processess, the calling process MUST own all "
3667 "column indices, in both the old column Map and the new column Map. In "
3668 "this case, you will need to do an Import or Export operation to "
3669 "redistribute data.");
3670
3671 // Commit the results.
3672 if (isLocallyIndexed()) {
3673 lclIndsUnpacked_wdv = local_inds_wdv_type(newLclInds1D_dev);
3674 }
3675 // end of scope for newLclInds1D_dev
3676 // sortAndMergeAllIndices needs host access
3677 }
3678
3679 if (isLocallyIndexed()) {
3680 // We've reindexed, so we don't know if the indices are sorted.
3681 //
3682 // FIXME (mfh 17 Sep 2014) It could make sense to check this,
3683 // since we're already going through all the indices above. We
3684 // could also sort each row in place; that way, we would only
3685 // have to make one pass over the rows.
3686 indicesAreSorted_ = false;
3688 // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
3689 // order to call this method.
3690 //
3691 // FIXME (mfh 17 Sep 2014) This violates the strong exception
3692 // guarantee. It would be better to sort the new index arrays
3693 // before committing them.
3694 const bool sorted = false; // need to resort
3695 const bool merged = true; // no need to merge, since no dups
3696 this->sortAndMergeAllIndices(sorted, merged);
3697 }
3698 }
3699 colMap_ = newColMap;
3700
3701 if (newImport.is_null()) {
3702 // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
3703 // check whether the input Import is null on any process.
3704 //
3705 // If the domain Map hasn't been set yet, we can't compute a new
3706 // Import object. Leave it what it is; it should be null, but
3707 // it doesn't matter. If the domain Map _has_ been set, then
3708 // compute a new Import object if necessary.
3709 if (!domainMap_.is_null()) {
3710 if (!domainMap_->isSameAs(*newColMap)) {
3711 importer_ = Teuchos::rcp(new import_type(domainMap_, newColMap));
3712 } else {
3713 importer_ = Teuchos::null; // don't need an Import
3714 }
3715 }
3716 } else {
3717 // The caller gave us an Import object. Assume that it's valid.
3718 importer_ = newImport;
3719 }
3720}
3721
3722template <class LocalOrdinal, class GlobalOrdinal, class Node>
3724 replaceDomainMap(const Teuchos::RCP<const map_type>& newDomainMap) {
3725 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMap: ";
3727 colMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3728 "this method unless the graph already has a column Map.");
3730 newDomainMap.is_null(), std::invalid_argument,
3731 prefix << "The new domain Map must be nonnull.");
3732
3733 // Create a new importer, if needed
3734 Teuchos::RCP<const import_type> newImporter = Teuchos::null;
3735 if (newDomainMap != colMap_ && (!newDomainMap->isSameAs(*colMap_))) {
3736 newImporter = rcp(new import_type(newDomainMap, colMap_));
3737 }
3738 this->replaceDomainMapAndImporter(newDomainMap, newImporter);
3739}
3740
3741template <class LocalOrdinal, class GlobalOrdinal, class Node>
3743 replaceDomainMapAndImporter(const Teuchos::RCP<const map_type>& newDomainMap,
3744 const Teuchos::RCP<const import_type>& newImporter) {
3745 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
3747 colMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3748 "this method unless the graph already has a column Map.");
3750 newDomainMap.is_null(), std::invalid_argument,
3751 prefix << "The new domain Map must be nonnull.");
3752
3753 if (debug_) {
3754 if (newImporter.is_null()) {
3755 // It's not a good idea to put expensive operations in a macro
3756 // clause, even if they are side effect - free, because macros
3757 // don't promise that they won't evaluate their arguments more
3758 // than once. It's polite for them to do so, but not required.
3759 const bool colSameAsDom = colMap_->isSameAs(*newDomainMap);
3760 TEUCHOS_TEST_FOR_EXCEPTION(!colSameAsDom, std::invalid_argument,
3761 "If the new Import is null, "
3762 "then the new domain Map must be the same as the current column Map.");
3763 } else {
3764 const bool colSameAsTgt =
3765 colMap_->isSameAs(*(newImporter->getTargetMap()));
3766 const bool newDomSameAsSrc =
3767 newDomainMap->isSameAs(*(newImporter->getSourceMap()));
3768 TEUCHOS_TEST_FOR_EXCEPTION(!colSameAsTgt || !newDomSameAsSrc, std::invalid_argument,
3769 "If the "
3770 "new Import is nonnull, then the current column Map must be the same "
3771 "as the new Import's target Map, and the new domain Map must be the "
3772 "same as the new Import's source Map.");
3773 }
3774 }
3775
3776 domainMap_ = newDomainMap;
3777 importer_ = Teuchos::rcp_const_cast<import_type>(newImporter);
3778}
3779
3780template <class LocalOrdinal, class GlobalOrdinal, class Node>
3782 replaceRangeMap(const Teuchos::RCP<const map_type>& newRangeMap) {
3783 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMap: ";
3785 rowMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3786 "this method unless the graph already has a row Map.");
3788 newRangeMap.is_null(), std::invalid_argument,
3789 prefix << "The new range Map must be nonnull.");
3790
3791 // Create a new exporter, if needed
3792 Teuchos::RCP<const export_type> newExporter = Teuchos::null;
3793 if (newRangeMap != rowMap_ && (!newRangeMap->isSameAs(*rowMap_))) {
3794 newExporter = rcp(new export_type(rowMap_, newRangeMap));
3795 }
3796 this->replaceRangeMapAndExporter(newRangeMap, newExporter);
3797}
3798
3799template <class LocalOrdinal, class GlobalOrdinal, class Node>
3801 replaceRangeMapAndExporter(const Teuchos::RCP<const map_type>& newRangeMap,
3802 const Teuchos::RCP<const export_type>& newExporter) {
3803 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
3805 rowMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3806 "this method unless the graph already has a column Map.");
3808 newRangeMap.is_null(), std::invalid_argument,
3809 prefix << "The new domain Map must be nonnull.");
3810
3811 if (debug_) {
3812 if (newExporter.is_null()) {
3813 // It's not a good idea to put expensive operations in a macro
3814 // clause, even if they are side effect - free, because macros
3815 // don't promise that they won't evaluate their arguments more
3816 // than once. It's polite for them to do so, but not required.
3817 const bool rowSameAsRange = rowMap_->isSameAs(*newRangeMap);
3818 TEUCHOS_TEST_FOR_EXCEPTION(!rowSameAsRange, std::invalid_argument,
3819 "If the new Export is null, "
3820 "then the new range Map must be the same as the current row Map.");
3821 } else {
3822 const bool newRangeSameAsTgt =
3823 newRangeMap->isSameAs(*(newExporter->getTargetMap()));
3824 const bool rowSameAsSrc =
3825 rowMap_->isSameAs(*(newExporter->getSourceMap()));
3826 TEUCHOS_TEST_FOR_EXCEPTION(!rowSameAsSrc || !newRangeSameAsTgt, std::invalid_argument,
3827 "If the "
3828 "new Export is nonnull, then the current row Map must be the same "
3829 "as the new Export's source Map, and the new range Map must be the "
3830 "same as the new Export's target Map.");
3831 }
3832 }
3833
3834 rangeMap_ = newRangeMap;
3835 exporter_ = Teuchos::rcp_const_cast<export_type>(newExporter);
3836}
3837
3838template <class LocalOrdinal, class GlobalOrdinal, class Node>
3841 getLocalGraphDevice() const {
3843 lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
3844 this->getRowPtrsPackedDevice());
3845}
3846
3847template <class LocalOrdinal, class GlobalOrdinal, class Node>
3850 getLocalGraphHost() const {
3851 return local_graph_host_type(
3852 lclIndsPacked_wdv.getHostView(Access::ReadWrite),
3853 this->getRowPtrsPackedHost());
3854}
3855
3856template <class LocalOrdinal, class GlobalOrdinal, class Node>
3859 using Teuchos::ArrayView;
3860 using Teuchos::outArg;
3861 using Teuchos::reduceAll;
3862 using ::Tpetra::Details::ProfilingRegion;
3863 typedef global_size_t GST;
3864
3865 ProfilingRegion regionCGC("Tpetra::CrsGraph::computeGlobalConstants");
3866
3867 GST lcl, gbl;
3868 std::shared_ptr<Details::CommRequest> req;
3869 if (!this->haveGlobalConstants_) {
3870 lcl = static_cast<GST>(this->getLocalNumEntries());
3871 req = Details::iallreduce(lcl, gbl, Teuchos::REDUCE_SUM, *this->getComm());
3872 }
3873
3874 this->computeLocalConstants();
3875
3876 // Compute global constants from local constants. Processes that
3877 // already have local constants still participate in the
3878 // all-reduces, using their previously computed values.
3879 if (!this->haveGlobalConstants_) {
3880 // Promote all the nodeNum* and nodeMaxNum* quantities from
3881 // size_t to global_size_t, when doing the all-reduces for
3882 // globalNum* / globalMaxNum* results.
3883 //
3884 // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
3885 // this in two all-reduces (one for the sum and the other for
3886 // the max), or use a custom MPI_Op that combines the sum and
3887 // the max. The latter might even be slower than two
3888 // all-reduces on modern network hardware. It would also be a
3889 // good idea to use nonblocking all-reduces (MPI 3), so that we
3890 // don't have to wait around for the first one to finish before
3891 // starting the second one.
3892 const GST lclMaxNumRowEnt = static_cast<GST>(this->nodeMaxNumRowEntries_);
3893 auto req2 = Details::iallreduce(lclMaxNumRowEnt, this->globalMaxNumRowEntries_, Teuchos::REDUCE_MAX, *this->getComm());
3894
3895 req->wait();
3896 this->globalNumEntries_ = gbl;
3897
3898 req2->wait();
3899 this->haveGlobalConstants_ = true;
3900 }
3901}
3902
3903template <class LocalOrdinal, class GlobalOrdinal, class Node>
3906 using ::Tpetra::Details::ProfilingRegion;
3907
3908 ProfilingRegion regionCLC("Tpetra::CrsGraph::computeLocalConstants");
3909 if (this->haveLocalConstants_) {
3910 return;
3911 }
3912
3913 // Reset local properties
3914 this->nodeMaxNumRowEntries_ =
3915 Teuchos::OrdinalTraits<size_t>::invalid();
3916
3917 using LO = local_ordinal_type;
3918
3919 auto ptr = this->getRowPtrsPackedDevice();
3920 const LO lclNumRows = ptr.extent(0) == 0 ? static_cast<LO>(0) : (static_cast<LO>(ptr.extent(0)) - static_cast<LO>(1));
3921
3922 const LO lclMaxNumRowEnt =
3923 ::Tpetra::Details::maxDifference("Tpetra::CrsGraph: nodeMaxNumRowEntries",
3924 ptr, lclNumRows);
3925 this->nodeMaxNumRowEntries_ = static_cast<size_t>(lclMaxNumRowEnt);
3926 this->haveLocalConstants_ = true;
3927}
3928
3929template <class LocalOrdinal, class GlobalOrdinal, class Node>
3930std::pair<size_t, std::string>
3932 makeIndicesLocal(const bool verbose) {
3934 using std::endl;
3935 using Teuchos::arcp;
3936 using Teuchos::Array;
3937 typedef LocalOrdinal LO;
3938 typedef GlobalOrdinal GO;
3939 typedef device_type DT;
3940 typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
3941 typedef typename num_row_entries_type::non_const_value_type num_ent_type;
3942 const char tfecfFuncName[] = "makeIndicesLocal: ";
3943 ProfilingRegion regionMakeIndicesLocal("Tpetra::CrsGraph::makeIndicesLocal");
3944
3945 std::unique_ptr<std::string> prefix;
3946 if (verbose) {
3947 prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
3948 std::ostringstream os;
3949 os << *prefix << "lclNumRows: " << getLocalNumRows() << endl;
3950 std::cerr << os.str();
3951 }
3952
3953 // These are somewhat global properties, so it's safe to have
3954 // exception checks for them, rather than returning an error code.
3955 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap(), std::logic_error,
3956 "The graph does not have a "
3957 "column Map yet. This method should never be called in that case. "
3958 "Please report this bug to the Tpetra developers.");
3959 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->getColMap().is_null(), std::logic_error,
3960 "The graph claims "
3961 "that it has a column Map, because hasColMap() returns true. However, "
3962 "the result of getColMap() is null. This should never happen. Please "
3963 "report this bug to the Tpetra developers.");
3964
3965 // Return value 1: The number of column indices (counting
3966 // duplicates) that could not be converted to local indices,
3967 // because they were not in the column Map on the calling process.
3968 size_t lclNumErrs = 0;
3969 std::ostringstream errStrm; // for return value 2 (error string)
3970
3971 const LO lclNumRows = static_cast<LO>(this->getLocalNumRows());
3972 const map_type& colMap = *(this->getColMap());
3973
3974 if (this->isGloballyIndexed() && lclNumRows != 0) {
3975 // This is a host-accessible View.
3976 typename num_row_entries_type::const_type h_numRowEnt =
3977 this->k_numRowEntries_;
3978
3979 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
3980
3981 // Allocate space for local indices.
3982 if (rowPtrsUnpacked_host.extent(0) == 0) {
3983 errStrm << "Unpacked row pointers (rowPtrsUnpacked_dev_) has length 0. This should never "
3984 "happen here. Please report this bug to the Tpetra developers."
3985 << endl;
3986 // Need to return early.
3987 return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid(),
3988 errStrm.str());
3989 }
3991
3992 // mfh 17 Dec 2016: We don't need initial zero-fill of
3993 // lclIndsUnpacked_wdv, because we will fill it below anyway.
3994 // AllowPadding would only help for aligned access (e.g.,
3995 // for vectorization) if we also were to pad each row to the
3996 // same alignment, so we'll skip AllowPadding for now.
3997
3998 // using Kokkos::AllowPadding;
3999 using Kokkos::view_alloc;
4000 using Kokkos::WithoutInitializing;
4001
4002 // When giving the label as an argument to
4003 // Kokkos::view_alloc, the label must be a string and not a
4004 // char*, else the code won't compile. This is because
4005 // view_alloc also allows a raw pointer as its first
4006 // argument. See
4007 // https://github.com/kokkos/kokkos/issues/434. This is a
4008 // large allocation typically, so the overhead of creating
4009 // an std::string is minor.
4010 const std::string label("Tpetra::CrsGraph::lclInd");
4011 if (verbose) {
4012 std::ostringstream os;
4013 os << *prefix << "(Re)allocate lclInd_wdv: old="
4014 << lclIndsUnpacked_wdv.extent(0) << ", new=" << numEnt << endl;
4015 std::cerr << os.str();
4016 }
4017
4018 local_inds_dualv_type lclInds_dualv =
4019 local_inds_dualv_type(view_alloc(label, WithoutInitializing),
4020 numEnt);
4021 lclIndsUnpacked_wdv = local_inds_wdv_type(lclInds_dualv);
4022
4023 auto lclColMap = colMap.getLocalMap();
4024 // This is a "device mirror" of the host View h_numRowEnt.
4025 //
4026 // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4027 // Device instance is to use its default constructor. See the
4028 // following Kokkos issue:
4029 //
4030 // https://github.com/kokkos/kokkos/issues/442
4031 if (verbose) {
4032 std::ostringstream os;
4033 os << *prefix << "Allocate device mirror k_numRowEnt: "
4034 << h_numRowEnt.extent(0) << endl;
4035 std::cerr << os.str();
4036 }
4037 auto k_numRowEnt =
4038 Kokkos::create_mirror_view_and_copy(device_type(), h_numRowEnt);
4039
4040 using ::Tpetra::Details::convertColumnIndicesFromGlobalToLocal;
4041 lclNumErrs =
4043 lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4044 gblInds_wdv.getDeviceView(Access::ReadOnly),
4045 this->getRowPtrsUnpackedDevice(),
4046 lclColMap,
4047 k_numRowEnt);
4048 if (lclNumErrs != 0) {
4049 const int myRank = [this]() {
4050 auto map = this->getMap();
4051 if (map.is_null()) {
4052 return 0;
4053 } else {
4054 auto comm = map->getComm();
4055 return comm.is_null() ? 0 : comm->getRank();
4056 }
4057 }();
4058 const bool pluralNumErrs = (lclNumErrs != static_cast<size_t>(1));
4059 errStrm << "(Process " << myRank << ") When converting column "
4060 "indices from global to local, we encountered "
4061 << lclNumErrs
4062 << " ind" << (pluralNumErrs ? "ices" : "ex")
4063 << " that do" << (pluralNumErrs ? "" : "es")
4064 << " not live in the column Map on this process." << endl;
4065 }
4066
4067 // We've converted column indices from global to local, so we
4068 // can deallocate the global column indices (which we know are
4069 // in 1-D storage, because the graph has static profile).
4070 if (verbose) {
4071 std::ostringstream os;
4072 os << *prefix << "Free gblInds_wdv: "
4073 << gblInds_wdv.extent(0) << endl;
4074 std::cerr << os.str();
4075 }
4076 gblInds_wdv = global_inds_wdv_type();
4077 } // globallyIndexed() && lclNumRows > 0
4078
4079 this->indicesAreLocal_ = true;
4080 this->indicesAreGlobal_ = false;
4081 this->checkInternalState();
4082
4083 return std::make_pair(lclNumErrs, errStrm.str());
4084}
4085
4086template <class LocalOrdinal, class GlobalOrdinal, class Node>
4088 makeColMap(Teuchos::Array<int>& remotePIDs) {
4090 using std::endl;
4091 const char tfecfFuncName[] = "makeColMap";
4092
4093 ProfilingRegion regionSortAndMerge("Tpetra::CrsGraph::makeColMap");
4094 std::unique_ptr<std::string> prefix;
4095 if (verbose_) {
4096 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4097 std::ostringstream os;
4098 os << *prefix << "Start" << endl;
4099 std::cerr << os.str();
4100 }
4101
4102 // this->colMap_ should be null at this point, but we accept the
4103 // future possibility that it might not be (esp. if we decide
4104 // later to support graph structure changes after first
4105 // fillComplete, which CrsGraph does not currently (as of 12 Feb
4106 // 2017) support).
4107 Teuchos::RCP<const map_type> colMap = this->colMap_;
4108 const bool sortEachProcsGids =
4109 this->sortGhostsAssociatedWithEachProcessor_;
4110
4111 // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4112 // per-process error code. If an error does occur on a process,
4113 // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4114 // notice that error. This is the caller's responsibility. For
4115 // now, we only propagate (to all processes) and report the error
4116 // in debug mode. In the future, we need to add the local/global
4117 // error handling scheme used in BlockCrsMatrix to this class.
4118 if (debug_) {
4119 using Teuchos::outArg;
4120 using Teuchos::REDUCE_MIN;
4121 using Teuchos::reduceAll;
4122
4123 std::ostringstream errStrm;
4124 const int lclErrCode =
4125 Details::makeColMap(colMap, remotePIDs,
4126 getDomainMap(), *this, sortEachProcsGids, &errStrm);
4127 auto comm = this->getComm();
4128 if (!comm.is_null()) {
4129 const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4130 int gblSuccess = 0; // output argument
4133 if (gblSuccess != 1) {
4134 std::ostringstream os;
4135 Details::gathervPrint(os, errStrm.str(), *comm);
4136 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
4137 ": An error happened on at "
4138 "least one process in the CrsGraph's communicator. "
4139 "Here are all processes' error messages:"
4140 << std::endl
4141 << os.str());
4142 }
4143 }
4144 } else {
4145 (void)Details::makeColMap(colMap, remotePIDs,
4146 getDomainMap(), *this, sortEachProcsGids, nullptr);
4147 }
4148 // See above. We want to admit the possibility of makeColMap
4149 // actually revising an existing column Map, even though that
4150 // doesn't currently (as of 10 May 2017) happen.
4151 this->colMap_ = colMap;
4152
4153 checkInternalState();
4154 if (verbose_) {
4155 std::ostringstream os;
4156 os << *prefix << "Done" << endl;
4157 std::cerr << os.str();
4158 }
4159}
4160
4161template <class execution_space, class LO, class rowptr_type, class colinds_type, class numRowEntries_type>
4162void prepareSortMergeUnpackedGraph(rowptr_type rowptr, colinds_type colinds, numRowEntries_type numRowEntries) {
4163 using ATS = KokkosKernels::ArithTraits<LO>;
4164 const auto unused = ATS::max();
4165
4166 auto numRows = rowptr.extent(0) - 1;
4167
4168 // make sure that unused entries will get ordered last
4169 Kokkos::parallel_for(
4170 "flag_unused_entries", Kokkos::RangePolicy<execution_space, LO>(0, numRows), KOKKOS_LAMBDA(const LO rlid) {
4171 for (size_t jj = rowptr(rlid) + numRowEntries(rlid); jj < rowptr(rlid + 1); ++jj) {
4172 colinds(jj) = unused;
4173 }
4174 });
4175}
4176
4177template <class execution_space, class LO, class rowptr_type, class colinds_type, class numRowEntries_type>
4178void mergeUnpackedGraph(rowptr_type rowptr, colinds_type colinds, numRowEntries_type numRowEntries) {
4179 auto numRows = rowptr.extent(0) - 1;
4180
4181 // merge
4182 // We cannot use KokkosSparse::sort_and_merge_matrix since we
4183 // do not actually want to change the allocations.
4184
4185 Kokkos::parallel_for(
4186 "merge_entries", Kokkos::RangePolicy<execution_space>(0, numRows), KOKKOS_LAMBDA(const LO rlid) {
4187 auto rowNNZ = numRowEntries(rlid);
4188 if (rowNNZ == 0) {
4189 return;
4190 }
4191 auto rowBegin = rowptr(rlid);
4192 auto pos = rowBegin;
4193 for (size_t offset = rowBegin + 1; offset < rowBegin + rowNNZ; ++offset) {
4194 if ((colinds(offset) != colinds(pos))) {
4195 colinds(++pos) = colinds(offset);
4196 }
4197 }
4198 numRowEntries(rlid) = pos + 1 - rowBegin;
4199 });
4200}
4201
4202template <class LocalOrdinal, class GlobalOrdinal, class Node>
4203void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4204 sortAndMergeAllIndices(const bool sorted, const bool merged) {
4205 using std::endl;
4206 const char tfecfFuncName[] = "sortAndMergeAllIndices";
4207
4208 std::unique_ptr<std::string> prefix;
4209 if (verbose_) {
4210 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4211 std::ostringstream os;
4212 os << *prefix << "Start: "
4213 << "sorted=" << (sorted ? "true" : "false")
4214 << ", merged=" << (merged ? "true" : "false") << endl;
4215 std::cerr << os.str();
4216 }
4217 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isGloballyIndexed(), std::logic_error,
4218 "This method may only be called after makeIndicesLocal.");
4219 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!merged && this->isStorageOptimized(), std::logic_error,
4220 "The graph is already storage optimized, so we shouldn't be "
4221 "merging any indices. "
4222 "Please report this bug to the Tpetra developers.");
4223
4224 if (!sorted || !merged) {
4225 Details::ProfilingRegion regionSortAndMerge("Tpetra::CrsGraph::sortAndMergeAllIndices");
4226
4227 if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
4228 // We are sorting & merging the unpacked views.
4229 // This means that not all entries are actually in use. We need to take k_numRowEntries_ into account.
4230 auto rowptr = rowPtrsUnpacked_dev_;
4231 auto colinds = lclIndsUnpacked_wdv.getDeviceView(Access::ReadWrite);
4232
4233 // Create a device copy of k_numRowEntries_.
4234 auto k_numRowEntries_d = Kokkos::create_mirror_view_and_copy(execution_space(), k_numRowEntries_);
4235
4236 // set set unused column entries so they get sorted last
4237 prepareSortMergeUnpackedGraph<execution_space, LocalOrdinal>(rowptr, colinds, k_numRowEntries_d);
4238
4239 if (!sorted) {
4240 // For this to work correctly, we require that the unused column entries have been filled
4241 // with indices that get ordered last.
4242 Import_Util::sortCrsEntries(rowptr, colinds);
4243 this->indicesAreSorted_ = true; // we just sorted every row
4244 }
4245 if (!merged) {
4246 mergeUnpackedGraph<execution_space, LocalOrdinal>(rowptr, colinds, k_numRowEntries_d);
4247 Kokkos::deep_copy(k_numRowEntries_, k_numRowEntries_d);
4248 this->noRedundancies_ = true; // we just merged every row
4249 }
4250 } else {
4251 auto rowptr = rowPtrsPacked_dev_;
4252 auto colinds = lclIndsPacked_wdv.getDeviceView(Access::ReadWrite);
4253 if (!sorted && merged) {
4254 Import_Util::sortCrsEntries(rowptr, colinds);
4255 this->indicesAreSorted_ = true; // we just sorted every row
4256 } else {
4257 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
4258 "We should never get here."
4259 "Please report this bug to the Tpetra developers.");
4260 }
4261 }
4262 }
4263
4264 if (verbose_) {
4265 std::ostringstream os;
4266 os << *prefix << "Done" << endl;
4267 std::cerr << os.str();
4268 }
4269}
4270
4271template <class LocalOrdinal, class GlobalOrdinal, class Node>
4273 makeImportExport(Teuchos::Array<int>& remotePIDs,
4274 const bool useRemotePIDs) {
4275 using Teuchos::ParameterList;
4276 using Teuchos::RCP;
4277 using Teuchos::rcp;
4278 using ::Tpetra::Details::ProfilingRegion;
4279 const char tfecfFuncName[] = "makeImportExport: ";
4280 ProfilingRegion regionMIE("Tpetra::CrsGraph::makeImportExport");
4281
4282 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap(), std::logic_error,
4283 "This method may not be called unless the graph has a column Map.");
4284 RCP<ParameterList> params = this->getNonconstParameterList(); // could be null
4285
4286 // Don't do any checks to see if we need to create the Import, if
4287 // it exists already.
4288 //
4289 // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4290 // change CrsGraph in the future to allow changing the column
4291 // Map after fillComplete. For now, the column Map is fixed
4292 // after the first fillComplete call.
4293 if (importer_.is_null()) {
4294 // Create the Import instance if necessary.
4295 if (domainMap_ != colMap_ && (!domainMap_->isSameAs(*colMap_))) {
4296 if (params.is_null() || !params->isSublist("Import")) {
4297 if (useRemotePIDs) {
4298 importer_ = rcp(new import_type(domainMap_, colMap_, remotePIDs));
4299 } else {
4300 importer_ = rcp(new import_type(domainMap_, colMap_));
4301 }
4302 } else {
4304 if (useRemotePIDs) {
4306 rcp(new import_type(domainMap_, colMap_, remotePIDs,
4307 importSublist));
4308 importer_ = newImp;
4309 } else {
4310 importer_ = rcp(new import_type(domainMap_, colMap_, importSublist));
4311 }
4312 }
4313 }
4314 }
4315
4316 // Don't do any checks to see if we need to create the Export, if
4317 // it exists already.
4318 if (exporter_.is_null()) {
4319 // Create the Export instance if necessary.
4320 if (rangeMap_ != rowMap_ && !rangeMap_->isSameAs(*rowMap_)) {
4321 if (params.is_null() || !params->isSublist("Export")) {
4322 exporter_ = rcp(new export_type(rowMap_, rangeMap_));
4323 } else {
4325 exporter_ = rcp(new export_type(rowMap_, rangeMap_, exportSublist));
4326 }
4327 }
4328 }
4329}
4330
4331template <class LocalOrdinal, class GlobalOrdinal, class Node>
4332std::string
4334 description() const {
4335 std::ostringstream oss;
4336 oss << dist_object_type::description();
4337 if (isFillComplete()) {
4338 oss << "{status = fill complete"
4339 << ", global rows = " << getGlobalNumRows()
4340 << ", global cols = " << getGlobalNumCols()
4341 << ", global num entries = " << getGlobalNumEntries()
4342 << "}";
4343 } else {
4344 oss << "{status = fill not complete"
4345 << ", global rows = " << getGlobalNumRows()
4346 << "}";
4347 }
4348 return oss.str();
4349}
4350
4351template <class LocalOrdinal, class GlobalOrdinal, class Node>
4353 describe(Teuchos::FancyOStream& out,
4354 const Teuchos::EVerbosityLevel verbLevel) const {
4355 using std::endl;
4356 using std::setw;
4357 using Teuchos::ArrayView;
4358 using Teuchos::Comm;
4359 using Teuchos::RCP;
4360 using Teuchos::VERB_DEFAULT;
4361 using Teuchos::VERB_EXTREME;
4362 using Teuchos::VERB_HIGH;
4363 using Teuchos::VERB_LOW;
4364 using Teuchos::VERB_MEDIUM;
4365 using Teuchos::VERB_NONE;
4366
4367 Teuchos::EVerbosityLevel vl = verbLevel;
4368 if (vl == VERB_DEFAULT) vl = VERB_LOW;
4369 RCP<const Comm<int>> comm = this->getComm();
4370 const int myImageID = comm->getRank(),
4371 numImages = comm->getSize();
4372 size_t width = 1;
4373 for (size_t dec = 10; dec < getGlobalNumRows(); dec *= 10) {
4374 ++width;
4375 }
4376 width = std::max<size_t>(width, static_cast<size_t>(11)) + 2;
4377 Teuchos::OSTab tab(out);
4378 // none: print nothing
4379 // low: print O(1) info from node 0
4380 // medium: print O(P) info, num entries per node
4381 // high: print O(N) info, num entries per row
4382 // extreme: print O(NNZ) info: print graph indices
4383 //
4384 // for medium and higher, print constituent objects at specified verbLevel
4385 if (vl != VERB_NONE) {
4386 if (myImageID == 0) out << this->description() << std::endl;
4387 // O(1) globals, minus what was already printed by description()
4388 if (isFillComplete() && myImageID == 0) {
4389 out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4390 }
4391 // constituent objects
4392 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4393 if (myImageID == 0) out << "\nRow map: " << std::endl;
4394 rowMap_->describe(out, vl);
4395 if (colMap_ != Teuchos::null) {
4396 if (myImageID == 0) out << "\nColumn map: " << std::endl;
4397 colMap_->describe(out, vl);
4398 }
4399 if (domainMap_ != Teuchos::null) {
4400 if (myImageID == 0) out << "\nDomain map: " << std::endl;
4401 domainMap_->describe(out, vl);
4402 }
4403 if (rangeMap_ != Teuchos::null) {
4404 if (myImageID == 0) out << "\nRange map: " << std::endl;
4405 rangeMap_->describe(out, vl);
4406 }
4407 }
4408 // O(P) data
4409 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4410 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4411 if (myImageID == imageCtr) {
4412 out << "Node ID = " << imageCtr << std::endl
4413 << "Node number of entries = " << this->getLocalNumEntries() << std::endl
4414 << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4415 if (!indicesAreAllocated()) {
4416 out << "Indices are not allocated." << std::endl;
4417 }
4418 }
4419 comm->barrier();
4420 comm->barrier();
4421 comm->barrier();
4422 }
4423 }
4424 // O(N) and O(NNZ) data
4425 if (vl == VERB_HIGH || vl == VERB_EXTREME) {
4426 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4427 if (myImageID == imageCtr) {
4428 out << std::setw(width) << "Node ID"
4429 << std::setw(width) << "Global Row"
4430 << std::setw(width) << "Num Entries";
4431 if (vl == VERB_EXTREME) {
4432 out << " Entries";
4433 }
4434 out << std::endl;
4435 const LocalOrdinal lclNumRows =
4436 static_cast<LocalOrdinal>(this->getLocalNumRows());
4437 for (LocalOrdinal r = 0; r < lclNumRows; ++r) {
4438 const RowInfo rowinfo = this->getRowInfo(r);
4439 GlobalOrdinal gid = rowMap_->getGlobalElement(r);
4440 out << std::setw(width) << myImageID
4441 << std::setw(width) << gid
4442 << std::setw(width) << rowinfo.numEntries;
4443 if (vl == VERB_EXTREME) {
4444 out << " ";
4445 if (isGloballyIndexed()) {
4446 auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4447 for (size_t j = 0; j < rowinfo.numEntries; ++j) {
4448 GlobalOrdinal colgid = rowview[j + rowinfo.offset1D];
4449 out << colgid << " ";
4450 }
4451 } else if (isLocallyIndexed()) {
4452 auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4453 for (size_t j = 0; j < rowinfo.numEntries; ++j) {
4454 LocalOrdinal collid = rowview[j + rowinfo.offset1D];
4455 out << colMap_->getGlobalElement(collid) << " ";
4456 }
4457 }
4458 }
4459 out << std::endl;
4460 }
4461 }
4462 comm->barrier();
4463 comm->barrier();
4464 comm->barrier();
4465 }
4466 }
4467 }
4468}
4469
4470template <class LocalOrdinal, class GlobalOrdinal, class Node>
4472 checkSizes(const SrcDistObject& /* source */) {
4473 // It's not clear what kind of compatibility checks on sizes can
4474 // be performed here. Epetra_CrsGraph doesn't check any sizes for
4475 // compatibility.
4476 return true;
4477}
4478
4479template <class LocalOrdinal, class GlobalOrdinal, class Node>
4482 const size_t numSameIDs,
4483 const Kokkos::DualView<const local_ordinal_type*,
4485 const Kokkos::DualView<const local_ordinal_type*,
4487 const CombineMode /*CM*/) {
4488 using std::endl;
4489 using LO = local_ordinal_type;
4490 using GO = global_ordinal_type;
4492 const char tfecfFuncName[] = "copyAndPermute: ";
4493 const bool verbose = verbose_;
4494
4496 const row_graph_type& srcRowGraph = dynamic_cast<const row_graph_type&>(source);
4497 copyAndPermuteNew(srcRowGraph, *this, numSameIDs, permuteToLIDs, permuteFromLIDs, INSERT);
4498 return;
4499 }
4500
4501 Details::ProfilingRegion regionCAP("Tpetra::CrsGraph::copyAndPermute");
4502
4503 std::unique_ptr<std::string> prefix;
4504 if (verbose) {
4505 prefix = this->createPrefix("CrsGraph", "copyAndPermute");
4506 std::ostringstream os;
4507 os << *prefix << endl;
4508 std::cerr << os.str();
4509 }
4510
4512 std::runtime_error, "permuteToLIDs.extent(0) = " << permuteToLIDs.extent(0) << " != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0) << ".");
4513
4514 // We know from checkSizes that the source object is a
4515 // row_graph_type, so we don't need to check again.
4517 dynamic_cast<const row_graph_type&>(source);
4518
4519 if (verbose) {
4520 std::ostringstream os;
4521 os << *prefix << "Compute padding" << endl;
4522 std::cerr << os.str();
4523 }
4524 auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4525 permuteToLIDs, permuteFromLIDs, verbose);
4526 applyCrsPadding(*padding, verbose);
4527
4528 // If the source object is actually a CrsGraph, we can use view
4529 // mode instead of copy mode to access the entries in each row,
4530 // if the graph is not fill complete.
4531 const this_CRS_type* srcCrsGraph =
4532 dynamic_cast<const this_CRS_type*>(&source);
4533
4534 const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4535 const map_type& tgtRowMap = *(getRowMap());
4536 const bool src_filled = srcRowGraph.isFillComplete();
4537 nonconst_global_inds_host_view_type row_copy;
4538 LO myid = 0;
4539
4540 //
4541 // "Copy" part of "copy and permute."
4542 //
4543 if (src_filled || srcCrsGraph == nullptr) {
4544 if (verbose) {
4545 std::ostringstream os;
4546 os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
4547 std::cerr << os.str();
4548 }
4549 // If the source graph is fill complete, we can't use view mode,
4550 // because the data might be stored in a different format not
4551 // compatible with the expectations of view mode. Also, if the
4552 // source graph is not a CrsGraph, we can't use view mode,
4553 // because RowGraph only provides copy mode access to the data.
4554 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4555 const GO gid = srcRowMap.getGlobalElement(myid);
4556 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow(gid);
4557 Kokkos::resize(row_copy, row_length);
4558 size_t check_row_length = 0;
4559 srcRowGraph.getGlobalRowCopy(gid, row_copy, check_row_length);
4560 this->insertGlobalIndices(gid, row_length, row_copy.data());
4561 }
4562 } else {
4563 if (verbose) {
4564 std::ostringstream os;
4565 os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
4566 std::cerr << os.str();
4567 }
4568 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4569 const GO gid = srcRowMap.getGlobalElement(myid);
4570 global_inds_host_view_type row;
4571 srcCrsGraph->getGlobalRowView(gid, row);
4572 this->insertGlobalIndices(gid, row.extent(0), row.data());
4573 }
4574 }
4575
4576 //
4577 // "Permute" part of "copy and permute."
4578 //
4579 auto permuteToLIDs_h = permuteToLIDs.view_host();
4580 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
4581
4582 if (src_filled || srcCrsGraph == nullptr) {
4583 for (LO i = 0; i < static_cast<LO>(permuteToLIDs_h.extent(0)); ++i) {
4584 const GO mygid = tgtRowMap.getGlobalElement(permuteToLIDs_h[i]);
4585 const GO srcgid = srcRowMap.getGlobalElement(permuteFromLIDs_h[i]);
4586 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow(srcgid);
4587 Kokkos::resize(row_copy, row_length);
4588 size_t check_row_length = 0;
4589 srcRowGraph.getGlobalRowCopy(srcgid, row_copy, check_row_length);
4590 this->insertGlobalIndices(mygid, row_length, row_copy.data());
4591 }
4592 } else {
4593 for (LO i = 0; i < static_cast<LO>(permuteToLIDs_h.extent(0)); ++i) {
4594 const GO mygid = tgtRowMap.getGlobalElement(permuteToLIDs_h[i]);
4595 const GO srcgid = srcRowMap.getGlobalElement(permuteFromLIDs_h[i]);
4596 global_inds_host_view_type row;
4597 srcCrsGraph->getGlobalRowView(srcgid, row);
4598 this->insertGlobalIndices(mygid, row.extent(0), row.data());
4599 }
4600 }
4601
4602 if (verbose) {
4603 std::ostringstream os;
4604 os << *prefix << "Done" << endl;
4605 std::cerr << os.str();
4606 }
4607}
4608
4609template <class LocalOrdinal, class GlobalOrdinal, class Node>
4611 applyCrsPadding(const padding_type& padding,
4612 const bool verbose) {
4615 using std::endl;
4616 using LO = local_ordinal_type;
4617 using row_ptrs_type =
4618 typename local_graph_device_type::row_map_type::non_const_type;
4619 using range_policy =
4620 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4621 const char tfecfFuncName[] = "applyCrsPadding";
4622 ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
4623
4624 std::unique_ptr<std::string> prefix;
4625 if (verbose) {
4626 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4627 std::ostringstream os;
4628 os << *prefix << "padding: ";
4629 padding.print(os);
4630 os << endl;
4631 std::cerr << os.str();
4632 }
4633 const int myRank = !verbose ? -1 : [&]() {
4634 auto map = this->getMap();
4635 if (map.is_null()) {
4636 return -1;
4637 }
4638 auto comm = map->getComm();
4639 if (comm.is_null()) {
4640 return -1;
4641 }
4642 return comm->getRank();
4643 }();
4644
4645 // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
4646 // row_ptrs_beg or allocate row_ptrs_end unless the allocation
4647 // size needs to increase. That should be the job of
4648 // padCrsArrays.
4649
4650 // Assume global indexing we don't have any indices yet
4651 if (!indicesAreAllocated()) {
4652 if (verbose) {
4653 std::ostringstream os;
4654 os << *prefix << "Call allocateIndices" << endl;
4655 std::cerr << os.str();
4656 }
4657 allocateIndices(GlobalIndices, verbose);
4658 }
4659 TEUCHOS_ASSERT(indicesAreAllocated());
4660
4661 // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
4662 // would use it directly.
4663
4664 auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
4665 if (verbose) {
4666 std::ostringstream os;
4667 os << *prefix << "Allocate row_ptrs_beg: "
4668 << rowPtrsUnpacked_dev.extent(0) << endl;
4669 std::cerr << os.str();
4670 }
4671 using Kokkos::view_alloc;
4672 using Kokkos::WithoutInitializing;
4673 row_ptrs_type row_ptrs_beg(
4674 view_alloc("row_ptrs_beg", WithoutInitializing),
4675 rowPtrsUnpacked_dev.extent(0));
4676 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
4677 Kokkos::deep_copy(execution_space(), row_ptrs_beg, rowPtrsUnpacked_dev);
4678
4679 const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) : size_t(row_ptrs_beg.extent(0) - 1);
4680 if (verbose) {
4681 std::ostringstream os;
4682 os << *prefix << "Allocate row_ptrs_end: " << N << endl;
4683 std::cerr << os.str();
4684 }
4685 row_ptrs_type row_ptrs_end(
4686 view_alloc("row_ptrs_end", WithoutInitializing), N);
4687 row_ptrs_type num_row_entries;
4688
4689 const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
4690
4691 execution_space().fence(); // we need above deep_copy to be done
4692
4693 if (refill_num_row_entries) { // Case 1: Unpacked storage
4694 // We can't assume correct *this capture until C++17, and it's
4695 // likely more efficient just to capture what we need anyway.
4696 num_row_entries =
4697 row_ptrs_type(view_alloc("num_row_entries", WithoutInitializing), N);
4698 Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
4699 Kokkos::parallel_for(
4700 "Fill end row pointers", range_policy(0, N),
4701 KOKKOS_LAMBDA(const size_t i) {
4702 row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
4703 });
4704 } else {
4705 // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
4706 // storage, we don't need row_ptr_end to be separate allocation;
4707 // could just have it alias row_ptr_beg+1.
4708 Kokkos::parallel_for(
4709 "Fill end row pointers", range_policy(0, N),
4710 KOKKOS_LAMBDA(const size_t i) {
4711 row_ptrs_end(i) = row_ptrs_beg(i + 1);
4712 });
4713 }
4714
4715 if (isGloballyIndexed()) {
4716 padCrsArrays(row_ptrs_beg, row_ptrs_end, gblInds_wdv,
4717 padding, myRank, verbose);
4718 } else {
4719 padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
4720 padding, myRank, verbose);
4721 }
4722
4723 if (refill_num_row_entries) {
4724 Kokkos::parallel_for(
4725 "Fill num entries", range_policy(0, N),
4726 KOKKOS_LAMBDA(const size_t i) {
4727 num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
4728 });
4729 Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
4730 }
4731 if (verbose) {
4732 std::ostringstream os;
4733 os << *prefix << "Reassign k_rowPtrs_; old size: "
4734 << rowPtrsUnpacked_dev.extent(0) << ", new size: "
4735 << row_ptrs_beg.extent(0) << endl;
4736 std::cerr << os.str();
4737 TEUCHOS_ASSERT(rowPtrsUnpacked_dev.extent(0) == row_ptrs_beg.extent(0));
4738 }
4739
4740 setRowPtrsUnpacked(row_ptrs_beg);
4741}
4742
4743template <class LocalOrdinal, class GlobalOrdinal, class Node>
4744std::unique_ptr<
4745 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4746CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4747 computeCrsPadding(
4748 const RowGraph<LocalOrdinal, GlobalOrdinal, Node>& source,
4749 const size_t numSameIDs,
4750 const Kokkos::DualView<const local_ordinal_type*,
4751 buffer_device_type>& permuteToLIDs,
4752 const Kokkos::DualView<const local_ordinal_type*,
4753 buffer_device_type>& permuteFromLIDs,
4754 const bool verbose) const {
4755 using LO = local_ordinal_type;
4756 using std::endl;
4757
4758 std::unique_ptr<std::string> prefix;
4759 if (verbose) {
4760 prefix = this->createPrefix("CrsGraph",
4761 "computeCrsPadding(same & permute)");
4762 std::ostringstream os;
4763 os << *prefix << "{numSameIDs: " << numSameIDs
4764 << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
4765 << endl;
4766 std::cerr << os.str();
4767 }
4768
4769 const int myRank = [&]() {
4770 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
4771 return comm.is_null() ? -1 : comm->getRank();
4772 }();
4773 std::unique_ptr<padding_type> padding(
4774 new padding_type(myRank, numSameIDs,
4775 permuteFromLIDs.extent(0)));
4776
4777 computeCrsPaddingForSameIDs(*padding, source,
4778 static_cast<LO>(numSameIDs));
4779 computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
4780 permuteFromLIDs);
4781 return padding;
4782}
4783
4784template <class LocalOrdinal, class GlobalOrdinal, class Node>
4785void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4786 computeCrsPaddingForSameIDs(
4787 padding_type& padding,
4788 const RowGraph<local_ordinal_type, global_ordinal_type,
4789 node_type>& source,
4790 const local_ordinal_type numSameIDs) const {
4791 using LO = local_ordinal_type;
4792 using GO = global_ordinal_type;
4793 using Details::Impl::getRowGraphGlobalRow;
4794 using std::endl;
4795 const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
4796
4797 std::unique_ptr<std::string> prefix;
4798 const bool verbose = verbose_;
4799 if (verbose) {
4800 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4801 std::ostringstream os;
4802 os << *prefix << "numSameIDs: " << numSameIDs << endl;
4803 std::cerr << os.str();
4804 }
4805
4806 if (numSameIDs == 0) {
4807 return;
4808 }
4809
4810 const map_type& srcRowMap = *(source.getRowMap());
4811 const map_type& tgtRowMap = *rowMap_;
4812 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
4813 const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
4814 const bool src_is_unique =
4815 srcCrs == nullptr ? false : srcCrs->isMerged();
4816 const bool tgt_is_unique = this->isMerged();
4817
4818 std::vector<GO> srcGblColIndsScratch;
4819 std::vector<GO> tgtGblColIndsScratch;
4820
4821 execute_sync_host_uvm_access(); // protect host UVM access
4822 for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
4823 const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
4824 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
4825 auto srcGblColInds = getRowGraphGlobalRow(
4826 srcGblColIndsScratch, source, srcGblRowInd);
4827 auto tgtGblColInds = getRowGraphGlobalRow(
4828 tgtGblColIndsScratch, *this, tgtGblRowInd);
4829 padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
4830 tgtGblColInds.size(), tgt_is_unique,
4831 srcGblColInds.getRawPtr(),
4832 srcGblColInds.size(), src_is_unique);
4833 }
4834 if (verbose) {
4835 std::ostringstream os;
4836 os << *prefix << "Done" << endl;
4837 std::cerr << os.str();
4838 }
4839}
4840
4841template <class LocalOrdinal, class GlobalOrdinal, class Node>
4842void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4843 computeCrsPaddingForPermutedIDs(
4844 padding_type& padding,
4845 const RowGraph<local_ordinal_type, global_ordinal_type,
4846 node_type>& source,
4847 const Kokkos::DualView<const local_ordinal_type*,
4848 buffer_device_type>& permuteToLIDs,
4849 const Kokkos::DualView<const local_ordinal_type*,
4850 buffer_device_type>& permuteFromLIDs) const {
4851 using LO = local_ordinal_type;
4852 using GO = global_ordinal_type;
4853 using Details::Impl::getRowGraphGlobalRow;
4854 using std::endl;
4855 const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
4856
4857 std::unique_ptr<std::string> prefix;
4858 const bool verbose = verbose_;
4859 if (verbose) {
4860 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4861 std::ostringstream os;
4862 os << *prefix << "permuteToLIDs.extent(0): "
4863 << permuteToLIDs.extent(0)
4864 << ", permuteFromLIDs.extent(0): "
4865 << permuteFromLIDs.extent(0) << endl;
4866 std::cerr << os.str();
4867 }
4868
4869 if (permuteToLIDs.extent(0) == 0) {
4870 return;
4871 }
4872
4873 const map_type& srcRowMap = *(source.getRowMap());
4874 const map_type& tgtRowMap = *rowMap_;
4875 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
4876 const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
4877 const bool src_is_unique =
4878 srcCrs == nullptr ? false : srcCrs->isMerged();
4879 const bool tgt_is_unique = this->isMerged();
4880
4881 TEUCHOS_ASSERT(!permuteToLIDs.need_sync_host());
4882 auto permuteToLIDs_h = permuteToLIDs.view_host();
4883 TEUCHOS_ASSERT(!permuteFromLIDs.need_sync_host());
4884 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
4885
4886 std::vector<GO> srcGblColIndsScratch;
4887 std::vector<GO> tgtGblColIndsScratch;
4888 const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
4889
4890 execute_sync_host_uvm_access(); // protect host UVM access
4891 for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
4892 const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
4893 const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
4894 auto srcGblColInds = getRowGraphGlobalRow(
4895 srcGblColIndsScratch, source, srcGblRowInd);
4896 const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
4897 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
4898 auto tgtGblColInds = getRowGraphGlobalRow(
4899 tgtGblColIndsScratch, *this, tgtGblRowInd);
4900 padding.update_permute(whichPermute, tgtLclRowInd,
4901 tgtGblColInds.getRawPtr(),
4902 tgtGblColInds.size(), tgt_is_unique,
4903 srcGblColInds.getRawPtr(),
4904 srcGblColInds.size(), src_is_unique);
4905 }
4906
4907 if (verbose) {
4908 std::ostringstream os;
4909 os << *prefix << "Done" << endl;
4910 std::cerr << os.str();
4911 }
4912}
4913
4914template <class LocalOrdinal, class GlobalOrdinal, class Node>
4915std::unique_ptr<
4916 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4917CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4918 computeCrsPaddingForImports(
4919 const Kokkos::DualView<const local_ordinal_type*,
4920 buffer_device_type>& importLIDs,
4921 Kokkos::DualView<packet_type*, buffer_device_type> imports,
4922 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
4923 const bool verbose) const {
4924 using Details::Impl::getRowGraphGlobalRow;
4925 using std::endl;
4926 using LO = local_ordinal_type;
4927 using GO = global_ordinal_type;
4928 const char tfecfFuncName[] = "computeCrsPaddingForImports";
4929
4930 std::unique_ptr<std::string> prefix;
4931 if (verbose) {
4932 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4933 std::ostringstream os;
4934 os << *prefix << "importLIDs.extent(0): "
4935 << importLIDs.extent(0)
4936 << ", imports.extent(0): "
4937 << imports.extent(0)
4938 << ", numPacketsPerLID.extent(0): "
4939 << numPacketsPerLID.extent(0) << endl;
4940 std::cerr << os.str();
4941 }
4942
4943 const LO numImports = static_cast<LO>(importLIDs.extent(0));
4944 const int myRank = [&]() {
4945 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
4946 return comm.is_null() ? -1 : comm->getRank();
4947 }();
4948 std::unique_ptr<padding_type> padding(
4949 new padding_type(myRank, numImports));
4950
4951 if (imports.need_sync_host()) {
4952 imports.sync_host();
4953 }
4954 auto imports_h = imports.view_host();
4955 if (numPacketsPerLID.need_sync_host()) {
4956 numPacketsPerLID.sync_host();
4957 }
4958 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
4959
4960 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
4961 auto importLIDs_h = importLIDs.view_host();
4962
4963 const map_type& tgtRowMap = *rowMap_;
4964 // Always merge source column indices, since isMerged() is
4965 // per-process state, and we don't know its value on other
4966 // processes that sent us data.
4967 constexpr bool src_is_unique = false;
4968 const bool tgt_is_unique = isMerged();
4969
4970 std::vector<GO> tgtGblColIndsScratch;
4971 size_t offset = 0;
4972 execute_sync_host_uvm_access(); // protect host UVM access
4973 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
4974 // CrsGraph packs just global column indices, while CrsMatrix
4975 // packs bytes (first the number of entries in the row, then the
4976 // global column indices, then other stuff like the matrix
4977 // values in that row).
4978 const LO origSrcNumEnt =
4979 static_cast<LO>(numPacketsPerLID_h[whichImport]);
4980 GO* const srcGblColInds = imports_h.data() + offset;
4981
4982 const LO tgtLclRowInd = importLIDs_h[whichImport];
4983 const GO tgtGblRowInd =
4984 tgtRowMap.getGlobalElement(tgtLclRowInd);
4985 auto tgtGblColInds = getRowGraphGlobalRow(
4986 tgtGblColIndsScratch, *this, tgtGblRowInd);
4987 const size_t origTgtNumEnt(tgtGblColInds.size());
4988
4989 padding->update_import(whichImport, tgtLclRowInd,
4990 tgtGblColInds.getRawPtr(),
4991 origTgtNumEnt, tgt_is_unique,
4992 srcGblColInds,
4993 origSrcNumEnt, src_is_unique);
4994 offset += origSrcNumEnt;
4995 }
4996
4997 if (verbose) {
4998 std::ostringstream os;
4999 os << *prefix << "Done" << endl;
5000 std::cerr << os.str();
5001 }
5002 return padding;
5003}
5004
5005template <class LocalOrdinal, class GlobalOrdinal, class Node>
5006std::unique_ptr<
5007 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
5008CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5009 computePaddingForCrsMatrixUnpack(
5010 const Kokkos::DualView<const local_ordinal_type*,
5011 buffer_device_type>& importLIDs,
5012 Kokkos::DualView<char*, buffer_device_type> imports,
5013 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5014 const bool verbose) const {
5015 using Details::PackTraits;
5016 using Details::Impl::getRowGraphGlobalRow;
5017 using std::endl;
5018 using LO = local_ordinal_type;
5019 using GO = global_ordinal_type;
5020 const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
5021
5022 std::unique_ptr<std::string> prefix;
5023 if (verbose) {
5024 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5025 std::ostringstream os;
5026 os << *prefix << "importLIDs.extent(0): "
5027 << importLIDs.extent(0)
5028 << ", imports.extent(0): "
5029 << imports.extent(0)
5030 << ", numPacketsPerLID.extent(0): "
5031 << numPacketsPerLID.extent(0) << endl;
5032 std::cerr << os.str();
5033 }
5034 const bool extraVerbose =
5035 verbose && Details::Behavior::verbose("CrsPadding");
5036
5037 const LO numImports = static_cast<LO>(importLIDs.extent(0));
5038 TEUCHOS_ASSERT(LO(numPacketsPerLID.extent(0)) >= numImports);
5039 const int myRank = [&]() {
5040 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
5041 return comm.is_null() ? -1 : comm->getRank();
5042 }();
5043 std::unique_ptr<padding_type> padding(
5044 new padding_type(myRank, numImports));
5045
5046 if (imports.need_sync_host()) {
5047 imports.sync_host();
5048 }
5049 auto imports_h = imports.view_host();
5050 if (numPacketsPerLID.need_sync_host()) {
5051 numPacketsPerLID.sync_host();
5052 }
5053 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5054
5055 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
5056 auto importLIDs_h = importLIDs.view_host();
5057
5058 const map_type& tgtRowMap = *rowMap_;
5059 // Always merge source column indices, since isMerged() is
5060 // per-process state, and we don't know its value on other
5061 // processes that sent us data.
5062 constexpr bool src_is_unique = false;
5063 const bool tgt_is_unique = isMerged();
5064
5065 std::vector<GO> srcGblColIndsScratch;
5066 std::vector<GO> tgtGblColIndsScratch;
5067 size_t offset = 0;
5068 execute_sync_host_uvm_access(); // protect host UVM access
5069 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5070 // CrsGraph packs just global column indices, while CrsMatrix
5071 // packs bytes (first the number of entries in the row, then the
5072 // global column indices, then other stuff like the matrix
5073 // values in that row).
5074 const size_t numBytes = numPacketsPerLID_h[whichImport];
5075 if (extraVerbose) {
5076 std::ostringstream os;
5077 os << *prefix << "whichImport=" << whichImport
5078 << ", numImports=" << numImports
5079 << ", numBytes=" << numBytes << endl;
5080 std::cerr << os.str();
5081 }
5082 if (numBytes == 0) {
5083 continue; // special case: no entries to unpack for this row
5084 }
5085 LO origSrcNumEnt = 0;
5086 const size_t numEntBeg = offset;
5087 const size_t numEntLen =
5088 PackTraits<LO>::packValueCount(origSrcNumEnt);
5089 TEUCHOS_ASSERT(numBytes >= numEntLen);
5090 TEUCHOS_ASSERT(imports_h.extent(0) >= numEntBeg + numEntLen);
5091 PackTraits<LO>::unpackValue(origSrcNumEnt,
5092 imports_h.data() + numEntBeg);
5093 if (extraVerbose) {
5094 std::ostringstream os;
5095 os << *prefix << "whichImport=" << whichImport
5096 << ", numImports=" << numImports
5097 << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5098 std::cerr << os.str();
5099 }
5100 TEUCHOS_ASSERT(origSrcNumEnt >= LO(0));
5101 TEUCHOS_ASSERT(numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)));
5102 const size_t gidsBeg = numEntBeg + numEntLen;
5103 if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5104 srcGblColIndsScratch.resize(origSrcNumEnt);
5105 }
5106 GO* const srcGblColInds = srcGblColIndsScratch.data();
5107 PackTraits<GO>::unpackArray(srcGblColInds,
5108 imports_h.data() + gidsBeg,
5109 origSrcNumEnt);
5110 const LO tgtLclRowInd = importLIDs_h[whichImport];
5111 const GO tgtGblRowInd =
5112 tgtRowMap.getGlobalElement(tgtLclRowInd);
5113 auto tgtGblColInds = getRowGraphGlobalRow(
5114 tgtGblColIndsScratch, *this, tgtGblRowInd);
5115 const size_t origNumTgtEnt(tgtGblColInds.size());
5116
5117 if (extraVerbose) {
5118 std::ostringstream os;
5119 os << *prefix << "whichImport=" << whichImport
5120 << ", numImports=" << numImports
5121 << ": Call padding->update_import" << endl;
5122 std::cerr << os.str();
5123 }
5124 padding->update_import(whichImport, tgtLclRowInd,
5125 tgtGblColInds.getRawPtr(),
5126 origNumTgtEnt, tgt_is_unique,
5127 srcGblColInds,
5128 origSrcNumEnt, src_is_unique);
5129 offset += numBytes;
5130 }
5131
5132 if (verbose) {
5133 std::ostringstream os;
5134 os << *prefix << "Done" << endl;
5135 std::cerr << os.str();
5136 }
5137 return padding;
5138}
5139
5140template <class LocalOrdinal, class GlobalOrdinal, class Node>
5141void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5142 packAndPrepare(const SrcDistObject& source,
5143 const Kokkos::DualView<const local_ordinal_type*,
5144 buffer_device_type>& exportLIDs,
5145 Kokkos::DualView<packet_type*,
5146 buffer_device_type>& exports,
5147 Kokkos::DualView<size_t*,
5148 buffer_device_type>
5149 numPacketsPerLID,
5150 size_t& constantNumPackets) {
5152 using GO = global_ordinal_type;
5153 using std::endl;
5154 using crs_graph_type =
5155 CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5156 const char tfecfFuncName[] = "packAndPrepare: ";
5157 ProfilingRegion region_papn("Tpetra::CrsGraph::packAndPrepare");
5158
5159 const bool verbose = verbose_;
5160 std::unique_ptr<std::string> prefix;
5161 if (verbose) {
5162 prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5163 std::ostringstream os;
5164 os << *prefix << "Start" << endl;
5165 std::cerr << os.str();
5166 }
5167
5168 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(exportLIDs.extent(0) != numPacketsPerLID.extent(0),
5169 std::runtime_error,
5170 "exportLIDs.extent(0) = " << exportLIDs.extent(0)
5171 << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
5172 << ".");
5173 const row_graph_type* srcRowGraphPtr =
5174 dynamic_cast<const row_graph_type*>(&source);
5175 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(srcRowGraphPtr == nullptr, std::invalid_argument,
5176 "Source of an Export "
5177 "or Import operation to a CrsGraph must be a RowGraph with the same "
5178 "template parameters.");
5179 // We don't check whether src_graph has had fillComplete called,
5180 // because it doesn't matter whether the *source* graph has been
5181 // fillComplete'd. The target graph can not be fillComplete'd yet.
5182 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillComplete(), std::runtime_error,
5183 "The target graph of an Import or Export must not be fill complete.");
5184
5185 const crs_graph_type* srcCrsGraphPtr =
5186 dynamic_cast<const crs_graph_type*>(&source);
5187
5188 if (srcCrsGraphPtr == nullptr) {
5189 using Teuchos::ArrayView;
5190 using LO = local_ordinal_type;
5191
5192 if (verbose) {
5193 std::ostringstream os;
5194 os << *prefix << "Source is a RowGraph but not a CrsGraph"
5195 << endl;
5196 std::cerr << os.str();
5197 }
5198 // RowGraph::pack serves the "old" DistObject interface. It
5199 // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5200 // entails deep-copying the exports buffer on output. RowGraph
5201 // is a convenience interface when not a CrsGraph, so we accept
5202 // the performance hit.
5203 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5204 auto exportLIDs_h = exportLIDs.view_host();
5205 ArrayView<const LO> exportLIDs_av(exportLIDs_h.data(),
5206 exportLIDs_h.extent(0));
5207 Teuchos::Array<GO> exports_a;
5208
5209 numPacketsPerLID.clear_sync_state();
5210 numPacketsPerLID.modify_host();
5211 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5212 ArrayView<size_t> numPacketsPerLID_av(numPacketsPerLID_h.data(),
5213 numPacketsPerLID_h.extent(0));
5214 srcRowGraphPtr->pack(exportLIDs_av, exports_a, numPacketsPerLID_av,
5215 constantNumPackets);
5216 const size_t newSize = static_cast<size_t>(exports_a.size());
5217 if (static_cast<size_t>(exports.extent(0)) != newSize) {
5218 using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5219 exports = exports_dv_type("exports", newSize);
5220 }
5221 Kokkos::View<const packet_type*, Kokkos::HostSpace,
5222 Kokkos::MemoryUnmanaged>
5223 exports_a_h(exports_a.getRawPtr(), newSize);
5224 exports.clear_sync_state();
5225 exports.modify_host();
5226 // DEEP_COPY REVIEW - NOT TESTED
5227 Kokkos::deep_copy(exports.view_host(), exports_a_h);
5228 }
5229 // packCrsGraphNew requires k_rowPtrsPacked_ to be set
5230 else if (!getColMap().is_null() &&
5231 (this->getRowPtrsPackedDevice().extent(0) != 0 ||
5232 getRowMap()->getLocalNumElements() == 0)) {
5233 if (verbose) {
5234 std::ostringstream os;
5235 os << *prefix << "packCrsGraphNew path" << endl;
5236 std::cerr << os.str();
5237 }
5238 using export_pids_type =
5239 Kokkos::DualView<const int*, buffer_device_type>;
5240 export_pids_type exportPIDs; // not filling it; needed for syntax
5241 using LO = local_ordinal_type;
5242 using NT = node_type;
5244 packCrsGraphNew<LO, GO, NT>(*srcCrsGraphPtr, exportLIDs, exportPIDs,
5245 exports, numPacketsPerLID,
5246 constantNumPackets, false);
5247 } else {
5248 srcCrsGraphPtr->packFillActiveNew(exportLIDs, exports, numPacketsPerLID,
5249 constantNumPackets);
5250 }
5251
5252 if (verbose) {
5253 std::ostringstream os;
5254 os << *prefix << "Done" << endl;
5255 std::cerr << os.str();
5256 }
5257}
5258
5259template <class LocalOrdinal, class GlobalOrdinal, class Node>
5261 pack(const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5262 Teuchos::Array<GlobalOrdinal>& exports,
5263 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5264 size_t& constantNumPackets) const {
5265 auto col_map = this->getColMap();
5266 // packCrsGraph requires k_rowPtrsPacked to be set
5267 if (!col_map.is_null() && (this->getRowPtrsPackedDevice().extent(0) != 0 || getRowMap()->getLocalNumElements() == 0)) {
5271 } else {
5272 this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5274 }
5275}
5276
5277template <class LocalOrdinal, class GlobalOrdinal, class Node>
5279 packFillActive(const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5280 Teuchos::Array<GlobalOrdinal>& exports,
5281 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5282 size_t& constantNumPackets) const {
5283 using std::endl;
5284 using LO = LocalOrdinal;
5285 using GO = GlobalOrdinal;
5286 using host_execution_space =
5287 typename Kokkos::View<size_t*, device_type>::
5288 host_mirror_type::execution_space;
5289 const char tfecfFuncName[] = "packFillActive: ";
5290 const bool verbose = verbose_;
5291
5292 const auto numExportLIDs = exportLIDs.size();
5293 std::unique_ptr<std::string> prefix;
5294 if (verbose) {
5295 prefix = this->createPrefix("CrsGraph", "allocateIndices");
5296 std::ostringstream os;
5297 os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5298 std::cerr << os.str();
5299 }
5300 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs != numPacketsPerLID.size(), std::runtime_error,
5301 "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5302 " = "
5303 << numPacketsPerLID.size() << ".");
5304
5305 const map_type& rowMap = *(this->getRowMap());
5306 const map_type* const colMapPtr = this->colMap_.getRawPtr();
5307 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() && colMapPtr == nullptr, std::logic_error,
5308 "This graph claims to be locally indexed, but its column Map is nullptr. "
5309 "This should never happen. Please report this bug to the Tpetra "
5310 "developers.");
5311
5312 // We may pack different amounts of data for different rows.
5313 constantNumPackets = 0;
5314
5315 // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5316 // it might be now, but we might as well be safe).
5317 size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr();
5318 const LO* const exportLIDs_raw = exportLIDs.getRawPtr();
5319
5320 // Count the total number of packets (column indices, in the case
5321 // of a CrsGraph) to pack. While doing so, set
5322 // numPacketsPerLID[i] to the number of entries owned by the
5323 // calling process in (local) row exportLIDs[i] of the graph, that
5324 // the caller wants us to send out.
5325 Kokkos::RangePolicy<host_execution_space, LO> inputRange(0, numExportLIDs);
5326 size_t totalNumPackets = 0;
5327 size_t errCount = 0;
5328 // lambdas turn what they capture const, so we can't
5329 // atomic_add(&errCount,1). Instead, we need a View to modify.
5330 typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5331 host_device_type;
5332 Kokkos::View<size_t, host_device_type> errCountView(&errCount);
5333 constexpr size_t ONE = 1;
5334
5335 execute_sync_host_uvm_access(); // protect host UVM access
5336 Kokkos::parallel_reduce(
5337 "Tpetra::CrsGraph::pack: totalNumPackets",
5338 inputRange,
5339 [=, *this](const LO& i, size_t& curTotalNumPackets) {
5340 const GO gblRow = rowMap.getGlobalElement(exportLIDs_raw[i]);
5341 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid()) {
5342 Kokkos::atomic_add(&errCountView(), ONE);
5343 numPacketsPerLID_raw[i] = 0;
5344 } else {
5345 const size_t numEnt = this->getNumEntriesInGlobalRow(gblRow);
5346 numPacketsPerLID_raw[i] = numEnt;
5347 curTotalNumPackets += numEnt;
5348 }
5349 },
5350 totalNumPackets);
5351
5352 if (verbose) {
5353 std::ostringstream os;
5354 os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5355 std::cerr << os.str();
5356 }
5357 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5358 "totalNumPackets count encountered "
5359 "one or more errors! errCount = "
5360 << errCount
5361 << ", totalNumPackets = " << totalNumPackets << ".");
5362 errCount = 0;
5363
5364 // Allocate space for all the column indices to pack.
5365 exports.resize(totalNumPackets);
5366
5367 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->supportsRowViews(), std::logic_error,
5368 "this->supportsRowViews() returns false; this should never happen. "
5369 "Please report this bug to the Tpetra developers.");
5370
5371 // Loop again over the rows to export, and pack rows of indices
5372 // into the output buffer.
5373
5374 if (verbose) {
5375 std::ostringstream os;
5376 os << *prefix << "Pack into exports" << endl;
5377 std::cerr << os.str();
5378 }
5379
5380 // Teuchos::ArrayView may not be thread safe, or may not be
5381 // efficiently thread safe. Better to use the raw pointer.
5382 GO* const exports_raw = exports.getRawPtr();
5383 errCount = 0;
5384 Kokkos::parallel_scan("Tpetra::CrsGraph::pack: pack from views",
5385 inputRange, [=, &prefix, *this](const LO i, size_t& exportsOffset, const bool final) {
5386 const size_t curOffset = exportsOffset;
5387 const GO gblRow = rowMap.getGlobalElement(exportLIDs_raw[i]);
5388 const RowInfo rowInfo =
5389 this->getRowInfoFromGlobalRowIndex(gblRow);
5390
5391 using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5392 if (rowInfo.localRow == TDO::invalid()) {
5393 if (verbose) {
5394 std::ostringstream os;
5395 os << *prefix << ": INVALID rowInfo: i=" << i
5396 << ", lclRow=" << exportLIDs_raw[i] << endl;
5397 std::cerr << os.str();
5398 }
5399 Kokkos::atomic_add(&errCountView(), ONE);
5400 } else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5401 if (verbose) {
5402 std::ostringstream os;
5403 os << *prefix << ": UH OH! For i=" << i << ", lclRow="
5404 << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5405 "(= "
5406 << curOffset << ") + numEnt (= " << rowInfo.numEntries
5407 << ") > totalNumPackets (= " << totalNumPackets << ")."
5408 << endl;
5409 std::cerr << os.str();
5410 }
5411 Kokkos::atomic_add(&errCountView(), ONE);
5412 } else {
5413 const LO numEnt = static_cast<LO>(rowInfo.numEntries);
5414 if (this->isLocallyIndexed()) {
5415 auto lclColInds = getLocalIndsViewHost(rowInfo);
5416 if (final) {
5417 for (LO k = 0; k < numEnt; ++k) {
5418 const LO lclColInd = lclColInds(k);
5419 const GO gblColInd = colMapPtr->getGlobalElement(lclColInd);
5420 // Pack it, even if it's wrong. Let the receiving
5421 // process deal with it. Otherwise, we'll miss out
5422 // on any correct data.
5423 exports_raw[curOffset + k] = gblColInd;
5424 } // for each entry in the row
5425 } // final pass?
5426 exportsOffset = curOffset + numEnt;
5427 } else if (this->isGloballyIndexed()) {
5428 auto gblColInds = getGlobalIndsViewHost(rowInfo);
5429 if (final) {
5430 for (LO k = 0; k < numEnt; ++k) {
5431 const GO gblColInd = gblColInds(k);
5432 // Pack it, even if it's wrong. Let the receiving
5433 // process deal with it. Otherwise, we'll miss out
5434 // on any correct data.
5435 exports_raw[curOffset + k] = gblColInd;
5436 } // for each entry in the row
5437 } // final pass?
5438 exportsOffset = curOffset + numEnt;
5439 }
5440 // If neither globally nor locally indexed, then the graph
5441 // has no entries in this row (or indeed, in any row on this
5442 // process) to pack.
5443 }
5444 });
5445
5446 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5447 "Packing encountered "
5448 "one or more errors! errCount = "
5449 << errCount
5450 << ", totalNumPackets = " << totalNumPackets << ".");
5451
5452 if (verbose) {
5453 std::ostringstream os;
5454 os << *prefix << "Done" << endl;
5455 std::cerr << os.str();
5456 }
5457}
5458
5459template <class LocalOrdinal, class GlobalOrdinal, class Node>
5460void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5461 packFillActiveNew(const Kokkos::DualView<const local_ordinal_type*,
5462 buffer_device_type>& exportLIDs,
5463 Kokkos::DualView<packet_type*,
5464 buffer_device_type>& exports,
5465 Kokkos::DualView<size_t*,
5466 buffer_device_type>
5467 numPacketsPerLID,
5468 size_t& constantNumPackets) const {
5469 using std::endl;
5470 using LO = local_ordinal_type;
5471 using GO = global_ordinal_type;
5472 using host_execution_space = typename Kokkos::View<size_t*,
5473 device_type>::host_mirror_type::execution_space;
5474 using host_device_type =
5475 Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5476 using exports_dv_type =
5477 Kokkos::DualView<packet_type*, buffer_device_type>;
5478 const char tfecfFuncName[] = "packFillActiveNew: ";
5479 const bool verbose = verbose_;
5480
5481 const auto numExportLIDs = exportLIDs.extent(0);
5482 std::unique_ptr<std::string> prefix;
5483 if (verbose) {
5484 prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
5485 std::ostringstream os;
5486 os << *prefix << "numExportLIDs: " << numExportLIDs
5487 << ", numPacketsPerLID.extent(0): "
5488 << numPacketsPerLID.extent(0) << endl;
5489 std::cerr << os.str();
5490 }
5491 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs != numPacketsPerLID.extent(0), std::runtime_error,
5492 "exportLIDs.extent(0) = " << numExportLIDs
5493 << " != numPacketsPerLID.extent(0) = "
5494 << numPacketsPerLID.extent(0) << ".");
5495 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5496 auto exportLIDs_h = exportLIDs.view_host();
5497
5498 const map_type& rowMap = *(this->getRowMap());
5499 const map_type* const colMapPtr = this->colMap_.getRawPtr();
5500 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() && colMapPtr == nullptr, std::logic_error,
5501 "This graph claims to be locally indexed, but its column Map is nullptr. "
5502 "This should never happen. Please report this bug to the Tpetra "
5503 "developers.");
5504
5505 // We may pack different amounts of data for different rows.
5506 constantNumPackets = 0;
5507
5508 numPacketsPerLID.clear_sync_state();
5509 numPacketsPerLID.modify_host();
5510 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5511
5512 // Count the total number of packets (column indices, in the case
5513 // of a CrsGraph) to pack. While doing so, set
5514 // numPacketsPerLID[i] to the number of entries owned by the
5515 // calling process in (local) row exportLIDs[i] of the graph, that
5516 // the caller wants us to send out.
5517 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5518 range_type inputRange(0, numExportLIDs);
5519 size_t totalNumPackets = 0;
5520 size_t errCount = 0;
5521 // lambdas turn what they capture const, so we can't
5522 // atomic_add(&errCount,1). Instead, we need a View to modify.
5523 Kokkos::View<size_t, host_device_type> errCountView(&errCount);
5524 constexpr size_t ONE = 1;
5525
5526 if (verbose) {
5527 std::ostringstream os;
5528 os << *prefix << "Compute totalNumPackets" << endl;
5529 std::cerr << os.str();
5530 }
5531
5532 execute_sync_host_uvm_access(); // protect host UVM access
5533 totalNumPackets = 0;
5534 for (size_t i = 0; i < numExportLIDs; ++i) {
5535 const LO lclRow = exportLIDs_h[i];
5536 const GO gblRow = rowMap.getGlobalElement(lclRow);
5537 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid()) {
5538 if (verbose) {
5539 std::ostringstream os;
5540 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5541 << " not in row Map on this process" << endl;
5542 std::cerr << os.str();
5543 }
5544 Kokkos::atomic_add(&errCountView(), ONE);
5545 numPacketsPerLID_h(i) = 0;
5546 } else {
5547 const size_t numEnt = this->getNumEntriesInGlobalRow(gblRow);
5548 numPacketsPerLID_h(i) = numEnt;
5549 totalNumPackets += numEnt;
5550 }
5551 }
5552
5553 if (verbose) {
5554 std::ostringstream os;
5555 os << *prefix << "totalNumPackets: " << totalNumPackets
5556 << ", errCount: " << errCount << endl;
5557 std::cerr << os.str();
5558 }
5559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5560 "totalNumPackets count encountered "
5561 "one or more errors! totalNumPackets: "
5562 << totalNumPackets
5563 << ", errCount: " << errCount << ".");
5564
5565 // Allocate space for all the column indices to pack.
5566 if (size_t(exports.extent(0)) < totalNumPackets) {
5567 // FIXME (mfh 09 Apr 2019) Create without initializing.
5568 exports = exports_dv_type("exports", totalNumPackets);
5569 }
5570
5571 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->supportsRowViews(), std::logic_error,
5572 "this->supportsRowViews() returns false; this should never happen. "
5573 "Please report this bug to the Tpetra developers.");
5574
5575 // Loop again over the rows to export, and pack rows of indices
5576 // into the output buffer.
5577
5578 if (verbose) {
5579 std::ostringstream os;
5580 os << *prefix << "Pack into exports buffer" << endl;
5581 std::cerr << os.str();
5582 }
5583
5584 exports.clear_sync_state();
5585 exports.modify_host();
5586 auto exports_h = exports.view_host();
5587
5588 errCount = 0;
5589
5590 // The following parallel_scan needs const host access to lclIndsUnpacked_wdv
5591 // (if locally indexed) or gblInds_wdv (if globally indexed).
5592 if (isLocallyIndexed())
5593 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5594 else if (isGloballyIndexed())
5595 gblInds_wdv.getHostView(Access::ReadOnly);
5596
5598 Kokkos::parallel_scan("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5599 inputRange, [=, &prefix, *this](const LO i, size_t& exportsOffset, const bool final) {
5600 const size_t curOffset = exportsOffset;
5601 const LO lclRow = exportLIDs_h(i);
5602 const GO gblRow = rowMap.getGlobalElement(lclRow);
5603 if (gblRow == Details::OrdinalTraits<GO>::invalid()) {
5604 if (verbose) {
5605 std::ostringstream os;
5606 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5607 << " not in row Map on this process" << endl;
5608 std::cerr << os.str();
5609 }
5610 Kokkos::atomic_add(&errCountView(), ONE);
5611 return;
5612 }
5613
5614 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(gblRow);
5615 if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid()) {
5616 if (verbose) {
5617 std::ostringstream os;
5618 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5619 << ", gblRow=" << gblRow << ": invalid rowInfo"
5620 << endl;
5621 std::cerr << os.str();
5622 }
5623 Kokkos::atomic_add(&errCountView(), ONE);
5624 return;
5625 }
5626
5627 if (curOffset + rowInfo.numEntries > totalNumPackets) {
5628 if (verbose) {
5629 std::ostringstream os;
5630 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5631 << ", gblRow=" << gblRow << ", curOffset (= "
5632 << curOffset << ") + numEnt (= " << rowInfo.numEntries
5633 << ") > totalNumPackets (= " << totalNumPackets
5634 << ")." << endl;
5635 std::cerr << os.str();
5636 }
5637 Kokkos::atomic_add(&errCountView(), ONE);
5638 return;
5639 }
5640
5641 const LO numEnt = static_cast<LO>(rowInfo.numEntries);
5642 if (this->isLocallyIndexed()) {
5643 auto lclColInds = getLocalIndsViewHost(rowInfo);
5644 if (final) {
5645 for (LO k = 0; k < numEnt; ++k) {
5646 const LO lclColInd = lclColInds(k);
5647 const GO gblColInd = colMapPtr->getGlobalElement(lclColInd);
5648 // Pack it, even if it's wrong. Let the receiving
5649 // process deal with it. Otherwise, we'll miss out
5650 // on any correct data.
5651 exports_h(curOffset + k) = gblColInd;
5652 } // for each entry in the row
5653 } // final pass?
5654 exportsOffset = curOffset + numEnt;
5655 } else if (this->isGloballyIndexed()) {
5656 auto gblColInds = getGlobalIndsViewHost(rowInfo);
5657 if (final) {
5658 for (LO k = 0; k < numEnt; ++k) {
5659 const GO gblColInd = gblColInds(k);
5660 // Pack it, even if it's wrong. Let the receiving
5661 // process deal with it. Otherwise, we'll miss out
5662 // on any correct data.
5663 exports_h(curOffset + k) = gblColInd;
5664 } // for each entry in the row
5665 } // final pass?
5666 exportsOffset = curOffset + numEnt;
5667 }
5668 // If neither globally nor locally indexed, then the graph
5669 // has no entries in this row (or indeed, in any row on this
5670 // process) to pack.
5671 });
5673
5674 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5675 // (errCount != 0, std::logic_error, "Packing encountered "
5676 // "one or more errors! errCount = " << errCount
5677 // << ", totalNumPackets = " << totalNumPackets << ".");
5678
5679 if (verbose) {
5680 std::ostringstream os;
5681 os << *prefix << "errCount=" << errCount << "; Done" << endl;
5682 std::cerr << os.str();
5683 }
5684}
5685
5686template <class LocalOrdinal, class GlobalOrdinal, class Node>
5688 unpackAndCombine(const Kokkos::DualView<const local_ordinal_type*,
5690 Kokkos::DualView<packet_type*,
5692 imports,
5693 Kokkos::DualView<size_t*,
5696 const size_t /* constantNumPackets */,
5697 const CombineMode /* combineMode */) {
5699 using std::endl;
5700 using LO = local_ordinal_type;
5701 using GO = global_ordinal_type;
5702 const char tfecfFuncName[] = "unpackAndCombine";
5703
5704 ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
5705 const bool verbose = verbose_;
5706
5707 std::unique_ptr<std::string> prefix;
5708 if (verbose) {
5709 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5710 std::ostringstream os;
5711 os << *prefix << "Start" << endl;
5712 std::cerr << os.str();
5713 }
5714 {
5715 auto padding = computeCrsPaddingForImports(
5716 importLIDs, imports, numPacketsPerLID, verbose);
5717 applyCrsPadding(*padding, verbose);
5718 if (verbose) {
5719 std::ostringstream os;
5720 os << *prefix << "Done computing & applying padding" << endl;
5721 std::cerr << os.str();
5722 }
5723 }
5724
5725 // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
5726 // reasonable meaning, whether or not the matrix is fill complete.
5727 // It's just more work to implement.
5728
5729 // We are not checking the value of the CombineMode input
5730 // argument. For CrsGraph, we only support import/export
5731 // operations if fillComplete has not yet been called. Any
5732 // incoming column-indices are inserted into the target graph. In
5733 // this context, CombineMode values of ADD vs INSERT are
5734 // equivalent. What is the meaning of REPLACE for CrsGraph? If a
5735 // duplicate column-index is inserted, it will be compressed out
5736 // when fillComplete is called.
5737 //
5738 // Note: I think REPLACE means that an existing row is replaced by
5739 // the imported row, i.e., the existing indices are cleared. CGB,
5740 // 6/17/2010
5741
5743 std::runtime_error, ": importLIDs.extent(0) = " << importLIDs.extent(0) << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0) << ".");
5744 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillComplete(), std::runtime_error,
5745 ": Import or Export operations are not allowed on a target "
5746 "CrsGraph that is fillComplete.");
5747
5748 const size_t numImportLIDs(importLIDs.extent(0));
5749 if (numPacketsPerLID.need_sync_host()) {
5750 numPacketsPerLID.sync_host();
5751 }
5752 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5753 if (imports.need_sync_host()) {
5754 imports.sync_host();
5755 }
5756 auto imports_h = imports.view_host();
5757 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
5758 auto importLIDs_h = importLIDs.view_host();
5759
5760 // If we're inserting in local indices, let's pre-allocate
5761 Teuchos::Array<LO> lclColInds;
5762 if (isLocallyIndexed()) {
5763 if (verbose) {
5764 std::ostringstream os;
5765 os << *prefix << "Preallocate local indices scratch" << endl;
5766 std::cerr << os.str();
5767 }
5768 size_t maxNumInserts = 0;
5769 for (size_t i = 0; i < numImportLIDs; ++i) {
5771 }
5772 if (verbose) {
5773 std::ostringstream os;
5774 os << *prefix << "Local indices scratch size: "
5775 << maxNumInserts << endl;
5776 std::cerr << os.str();
5777 }
5778 lclColInds.resize(maxNumInserts);
5779 } else {
5780 if (verbose) {
5781 std::ostringstream os;
5782 os << *prefix;
5783 if (isGloballyIndexed()) {
5784 os << "Graph is globally indexed";
5785 } else {
5786 os << "Graph is neither locally nor globally indexed";
5787 }
5788 os << endl;
5789 std::cerr << os.str();
5790 }
5791 }
5792
5793 TEUCHOS_ASSERT(!rowMap_.is_null());
5794 const map_type& rowMap = *rowMap_;
5795
5796 try {
5797 size_t importsOffset = 0;
5798 for (size_t i = 0; i < numImportLIDs; ++i) {
5799 if (verbose) {
5800 std::ostringstream os;
5801 os << *prefix << "i=" << i << ", numImportLIDs="
5802 << numImportLIDs << endl;
5803 std::cerr << os.str();
5804 }
5805 // We can only unpack into owned rows, since we only have
5806 // local row indices.
5807 const LO lclRow = importLIDs_h[i];
5808 const GO gblRow = rowMap.getGlobalElement(lclRow);
5809 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
5810 std::logic_error, "importLIDs[i=" << i << "]=" << lclRow << " is not in the row Map on the calling "
5811 "process.");
5812 const LO numEnt = numPacketsPerLID_h[i];
5813 const GO* const gblColInds = (numEnt == 0) ? nullptr : imports_h.data() + importsOffset;
5814 if (!isLocallyIndexed()) {
5815 insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
5816 } else {
5817 // FIXME (mfh 09 Feb 2020) Now would be a good time to do
5818 // column Map filtering.
5819 for (LO j = 0; j < numEnt; j++) {
5820 lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
5821 }
5822 insertLocalIndices(lclRow, numEnt, lclColInds.data());
5823 }
5825 }
5826 } catch (std::exception& e) {
5827 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
5828 "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
5829 "exception: "
5830 << endl
5831 << e.what());
5832 }
5833
5834 if (verbose) {
5835 std::ostringstream os;
5836 os << *prefix << "Done" << endl;
5837 std::cerr << os.str();
5838 }
5839}
5840
5841template <class LocalOrdinal, class GlobalOrdinal, class Node>
5843 removeEmptyProcessesInPlace(const Teuchos::RCP<const map_type>& newMap) {
5844 using Teuchos::Comm;
5845 using Teuchos::null;
5846 using Teuchos::ParameterList;
5847 using Teuchos::RCP;
5848
5849 // We'll set all the state "transactionally," so that this method
5850 // satisfies the strong exception guarantee. This object's state
5851 // won't be modified until the end of this method.
5855
5856 rowMap = newMap;
5858 (newMap.is_null()) ? null : newMap->getComm();
5859
5860 if (!domainMap_.is_null()) {
5861 if (domainMap_.getRawPtr() == rowMap_.getRawPtr()) {
5862 // Common case: original domain and row Maps are identical.
5863 // In that case, we need only replace the original domain Map
5864 // with the new Map. This ensures that the new domain and row
5865 // Maps _stay_ identical.
5866 domainMap = newMap;
5867 } else {
5868 domainMap = domainMap_->replaceCommWithSubset(newComm);
5869 }
5870 }
5871 if (!rangeMap_.is_null()) {
5872 if (rangeMap_.getRawPtr() == rowMap_.getRawPtr()) {
5873 // Common case: original range and row Maps are identical. In
5874 // that case, we need only replace the original range Map with
5875 // the new Map. This ensures that the new range and row Maps
5876 // _stay_ identical.
5877 rangeMap = newMap;
5878 } else {
5879 rangeMap = rangeMap_->replaceCommWithSubset(newComm);
5880 }
5881 }
5882 if (!colMap_.is_null()) {
5883 colMap = colMap_->replaceCommWithSubset(newComm);
5884 }
5885
5886 // (Re)create the Export and / or Import if necessary.
5887 if (!newComm.is_null()) {
5888 RCP<ParameterList> params = this->getNonconstParameterList(); // could be null
5889 //
5890 // The operations below are collective on the new communicator.
5891 //
5892 // (Re)create the Export object if necessary. If I haven't
5893 // called fillComplete yet, I don't have a rangeMap, so I must
5894 // first check if the _original_ rangeMap is not null. Ditto
5895 // for the Import object and the domain Map.
5896 if (!rangeMap_.is_null() &&
5897 rangeMap != rowMap &&
5898 !rangeMap->isSameAs(*rowMap)) {
5899 if (params.is_null() || !params->isSublist("Export")) {
5901 } else {
5904 }
5905 }
5906 // (Re)create the Import object if necessary.
5907 if (!domainMap_.is_null() &&
5908 domainMap != colMap &&
5909 !domainMap->isSameAs(*colMap)) {
5910 if (params.is_null() || !params->isSublist("Import")) {
5912 } else {
5915 }
5916 }
5917 } // if newComm is not null
5918
5919 // Defer side effects until the end. If no destructors throw
5920 // exceptions (they shouldn't anyway), then this method satisfies
5921 // the strong exception guarantee.
5922 exporter_ = exporter;
5923 importer_ = importer;
5924 rowMap_ = rowMap;
5925 // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
5926 // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
5927 // the same object. We might want to get rid of this redundant
5928 // pointer sometime, but for now, we'll leave it alone and just
5929 // set map_ to the same object.
5930 this->map_ = rowMap;
5931 domainMap_ = domainMap;
5932 rangeMap_ = rangeMap;
5933 colMap_ = colMap;
5934}
5935
5936template <class LocalOrdinal, class GlobalOrdinal, class Node>
5938 getLocalDiagOffsets(const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const {
5939 using std::endl;
5940 using LO = LocalOrdinal;
5941 using GO = GlobalOrdinal;
5942 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
5943 const bool verbose = verbose_;
5944
5945 std::unique_ptr<std::string> prefix;
5946 if (verbose) {
5947 prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
5948 std::ostringstream os;
5949 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
5950 << endl;
5951 std::cerr << os.str();
5952 }
5953
5954 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!hasColMap(), std::runtime_error, "The graph must have a column Map.");
5955 const LO lclNumRows = static_cast<LO>(this->getLocalNumRows());
5956 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<LO>(offsets.extent(0)) < lclNumRows,
5957 std::invalid_argument, "offsets.extent(0) = " << offsets.extent(0) << " < getLocalNumRows() = " << lclNumRows << ".");
5958
5959 const map_type& rowMap = *(this->getRowMap());
5960 const map_type& colMap = *(this->getColMap());
5961
5962 // We only use these in debug mode, but since debug mode is a
5963 // run-time option, they need to exist here. That's why we create
5964 // the vector with explicit size zero, to avoid overhead if debug
5965 // mode is off.
5966 bool allRowMapDiagEntriesInColMap = true;
5967 bool allDiagEntriesFound = true;
5968 bool allOffsetsCorrect = true;
5969 bool noOtherWeirdness = true;
5970 using wrong_offsets_type = std::vector<std::pair<LO, size_t>>;
5972
5973 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
5974 // the subset of Map functionality that we need below.
5975 auto lclRowMap = rowMap.getLocalMap();
5976 auto lclColMap = colMap.getLocalMap();
5977
5978 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
5979 // setup, at least on the host. For CUDA, we have to use LocalMap
5980 // (that comes from each of the two Maps).
5981
5982 const bool sorted = this->isSorted();
5983 if (isFillComplete()) {
5984 auto lclGraph = this->getLocalGraphDevice();
5985 ::Tpetra::Details::getGraphDiagOffsets(offsets, lclRowMap, lclColMap,
5986 lclGraph.row_map,
5987 lclGraph.entries, sorted);
5988 } else {
5989 // NOTE (mfh 22 Feb 2017): We have to run this code on host,
5990 // since the graph is not fill complete. The previous version
5991 // of this code assumed UVM; this version does not.
5992 auto offsets_h = Kokkos::create_mirror_view(offsets);
5993
5994 for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
5995 // Find the diagonal entry. Since the row Map and column Map
5996 // may differ, we have to compare global row and column
5997 // indices, not local.
5998 const GO gblRowInd = lclRowMap.getGlobalElement(lclRowInd);
5999 const GO gblColInd = gblRowInd;
6000 const LO lclColInd = lclColMap.getLocalElement(gblColInd);
6001
6002 if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid()) {
6004 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid();
6005 } else {
6006 const RowInfo rowInfo = this->getRowInfo(lclRowInd);
6007 if (static_cast<LO>(rowInfo.localRow) == lclRowInd &&
6008 rowInfo.numEntries > 0) {
6009 auto colInds = this->getLocalIndsViewHost(rowInfo);
6010 const size_t hint = 0; // not needed for this algorithm
6011 const size_t offset =
6012 KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
6015
6016 if (debug_) {
6017 // Now that we have what we think is an offset, make sure
6018 // that it really does point to the diagonal entry. Offsets
6019 // are _relative_ to each row, not absolute (for the whole
6020 // (local) graph).
6021 typename local_inds_dualv_type::t_host::const_type lclColInds;
6022 try {
6023 lclColInds = this->getLocalIndsViewHost(rowInfo);
6024 } catch (...) {
6025 noOtherWeirdness = false;
6026 }
6027 // Don't continue with error checking if the above failed.
6028 if (noOtherWeirdness) {
6029 const size_t numEnt = lclColInds.extent(0);
6030 if (offset >= numEnt) {
6031 // Offsets are relative to each row, so this means that
6032 // the offset is out of bounds.
6033 allOffsetsCorrect = false;
6034 wrongOffsets.push_back(std::make_pair(lclRowInd, offset));
6035 } else {
6036 const LO actualLclColInd = lclColInds(offset);
6037 const GO actualGblColInd = lclColMap.getGlobalElement(actualLclColInd);
6038 if (actualGblColInd != gblColInd) {
6039 allOffsetsCorrect = false;
6040 wrongOffsets.push_back(std::make_pair(lclRowInd, offset));
6041 }
6042 }
6043 }
6044 } // debug_
6045 } else { // either row is empty, or something went wrong w/ getRowInfo()
6046 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid();
6047 allDiagEntriesFound = false;
6048 }
6049 } // whether lclColInd is a valid local column index
6050 } // for each local row
6051 // DEEP_COPY REVIEW - NOT TESTED
6052 Kokkos::deep_copy(offsets, offsets_h);
6053 } // whether the graph is fill complete
6054
6055 if (verbose && wrongOffsets.size() != 0) {
6056 std::ostringstream os;
6057 os << *prefix << "Wrong offsets: [";
6058 for (size_t k = 0; k < wrongOffsets.size(); ++k) {
6059 os << "(" << wrongOffsets[k].first << ","
6060 << wrongOffsets[k].second << ")";
6061 if (k + 1 < wrongOffsets.size()) {
6062 os << ", ";
6063 }
6064 }
6065 os << "]" << endl;
6066 std::cerr << os.str();
6067 }
6068
6069 if (debug_) {
6070 using std::endl;
6071 using Teuchos::reduceAll;
6072 Teuchos::RCP<const Teuchos::Comm<int>> comm = this->getComm();
6073 const bool localSuccess =
6075 const int numResults = 5;
6076 int lclResults[5];
6078 lclResults[1] = allDiagEntriesFound ? 1 : 0;
6079 lclResults[2] = allOffsetsCorrect ? 1 : 0;
6080 lclResults[3] = noOtherWeirdness ? 1 : 0;
6081 // min-all-reduce will compute least rank of all the processes
6082 // that didn't succeed.
6083 lclResults[4] = !localSuccess ? comm->getRank() : comm->getSize();
6084
6085 int gblResults[5];
6086 gblResults[0] = 0;
6087 gblResults[1] = 0;
6088 gblResults[2] = 0;
6089 gblResults[3] = 0;
6090 gblResults[4] = 0;
6091 reduceAll<int, int>(*comm, Teuchos::REDUCE_MIN,
6093
6094 if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1 || gblResults[3] != 1) {
6095 std::ostringstream os; // build error message
6096 os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6097 "possibly among others): "
6098 << endl;
6099 if (gblResults[0] == 0) {
6100 os << " - The column Map does not contain at least one diagonal entry "
6101 "of the graph."
6102 << endl;
6103 }
6104 if (gblResults[1] == 0) {
6105 os << " - On one or more processes, some row does not contain a "
6106 "diagonal entry."
6107 << endl;
6108 }
6109 if (gblResults[2] == 0) {
6110 os << " - On one or more processes, some offsets are incorrect."
6111 << endl;
6112 }
6113 if (gblResults[3] == 0) {
6114 os << " - One or more processes had some other error."
6115 << endl;
6116 }
6117 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6118 }
6119 } // debug_
6120}
6121
6122template <class LocalOrdinal, class GlobalOrdinal, class Node>
6124 getLocalOffRankOffsets(offset_device_view_type& offsets) const {
6125 using std::endl;
6126 const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
6127 const bool verbose = verbose_;
6128
6129 std::unique_ptr<std::string> prefix;
6130 if (verbose) {
6131 prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
6132 std::ostringstream os;
6133 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6134 << endl;
6135 std::cerr << os.str();
6136 }
6137
6138 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!hasColMap(), std::runtime_error, "The graph must have a column Map.");
6139 // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.
6140
6141 const size_t lclNumRows = this->getLocalNumRows();
6142
6143 if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows + 1) {
6144 offsets = k_offRankOffsets_;
6145 return;
6146 }
6147 haveLocalOffRankOffsets_ = false;
6148
6149 const map_type& colMap = *(this->getColMap());
6150 const map_type& domMap = *(this->getDomainMap());
6151
6152 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6153 // the subset of Map functionality that we need below.
6154 auto lclColMap = colMap.getLocalMap();
6155 auto lclDomMap = domMap.getLocalMap();
6156
6157 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6158 // setup, at least on the host. For CUDA, we have to use LocalMap
6159 // (that comes from each of the two Maps).
6160
6161 TEUCHOS_ASSERT(this->isSorted());
6162 if (isFillComplete()) {
6163 k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows + 1);
6164 auto lclGraph = this->getLocalGraphDevice();
6165 ::Tpetra::Details::getGraphOffRankOffsets(k_offRankOffsets_,
6167 lclGraph);
6168 offsets = k_offRankOffsets_;
6169 haveLocalOffRankOffsets_ = true;
6170 } else {
6171 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, "Can't get off-rank offsets for non-fill-complete graph");
6172 }
6173}
6174
6175namespace { // (anonymous)
6176
6177// mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6178// below). The point is to avoid the deep copy between the input
6179// Teuchos::ArrayRCP and the internally used Kokkos::View. We
6180// can't use UVM to avoid the deep copy with CUDA, because the
6181// ArrayRCP is a host pointer, while the input to the graph's
6182// getLocalDiagOffsets method is a device pointer. Assigning a
6183// host pointer to a device pointer is incorrect unless the host
6184// pointer points to host pinned memory. The goal is to get rid
6185// of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6186// copy for backwards compatibility.
6187//
6188// We have to use template magic because
6189// "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6190// if device_type::memory_space is not Kokkos::HostSpace (as is
6191// the case with CUDA).
6192
6193template <class DeviceType,
6194 const bool memSpaceIsHostSpace =
6195 std::is_same<typename DeviceType::memory_space,
6196 Kokkos::HostSpace>::value>
6197struct HelpGetLocalDiagOffsets {};
6198
6199template <class DeviceType>
6200struct HelpGetLocalDiagOffsets<DeviceType, true> {
6201 typedef DeviceType device_type;
6202 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6203 Kokkos::MemoryUnmanaged>
6204 device_offsets_type;
6205 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6206 Kokkos::MemoryUnmanaged>
6207 host_offsets_type;
6208
6209 static device_offsets_type
6210 getDeviceOffsets(const host_offsets_type& hostOffsets) {
6211 // Host and device are the same; no need to allocate a
6212 // temporary device View.
6213 return hostOffsets;
6214 }
6215
6216 static void
6217 copyBackIfNeeded(const host_offsets_type& /* hostOffsets */,
6218 const device_offsets_type& /* deviceOffsets */) { /* copy back not needed; host and device are the same */
6219 }
6220};
6221
6222template <class DeviceType>
6223struct HelpGetLocalDiagOffsets<DeviceType, false> {
6224 typedef DeviceType device_type;
6225 // We have to do a deep copy, since host memory space != device
6226 // memory space. Thus, the device View is managed (we need to
6227 // allocate a temporary device View).
6228 typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6229 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6230 Kokkos::MemoryUnmanaged>
6231 host_offsets_type;
6232
6233 static device_offsets_type
6234 getDeviceOffsets(const host_offsets_type& hostOffsets) {
6235 // Host memory space != device memory space, so we must
6236 // allocate a temporary device View for the graph.
6237 return device_offsets_type("offsets", hostOffsets.extent(0));
6238 }
6239
6240 static void
6241 copyBackIfNeeded(const host_offsets_type& hostOffsets,
6242 const device_offsets_type& deviceOffsets) {
6243 // DEEP_COPY REVIEW - NOT TESTED
6244 Kokkos::deep_copy(hostOffsets, deviceOffsets);
6245 }
6246};
6247} // namespace
6248
6249template <class LocalOrdinal, class GlobalOrdinal, class Node>
6251 getLocalDiagOffsets(Teuchos::ArrayRCP<size_t>& offsets) const {
6252 typedef LocalOrdinal LO;
6253 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6254 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap(), std::runtime_error,
6255 "The graph does not yet have a column Map.");
6256 const LO myNumRows = static_cast<LO>(this->getLocalNumRows());
6257 if (static_cast<LO>(offsets.size()) != myNumRows) {
6258 // NOTE (mfh 21 Jan 2016) This means that the method does not
6259 // satisfy the strong exception guarantee (no side effects
6260 // unless successful).
6261 offsets.resize(myNumRows);
6262 }
6263
6264 // mfh 21 Jan 2016: This method unfortunately takes a
6265 // Teuchos::ArrayRCP, which is host memory. The graph wants a
6266 // device pointer. We can't access host memory from the device;
6267 // that's the wrong direction for UVM. (It's the right direction
6268 // for inefficient host pinned memory, but we don't want to use
6269 // that here.) Thus, if device memory space != host memory space,
6270 // we allocate and use a temporary device View to get the offsets.
6271 // If the two spaces are equal, the template magic makes the deep
6272 // copy go away.
6274 typedef typename helper_type::host_offsets_type host_offsets_type;
6275 // Unmanaged host View that views the output array.
6276 host_offsets_type hostOffsets(offsets.getRawPtr(), myNumRows);
6277 // Allocate temp device View if host != device, else reuse host array.
6278 auto deviceOffsets = helper_type::getDeviceOffsets(hostOffsets);
6279 // NOT recursion; this calls the overload that takes a device View.
6280 this->getLocalDiagOffsets(deviceOffsets);
6281 helper_type::copyBackIfNeeded(hostOffsets, deviceOffsets);
6282}
6283
6284template <class LocalOrdinal, class GlobalOrdinal, class Node>
6286 supportsRowViews() const {
6287 return true;
6288}
6289
6290template <class LocalOrdinal, class GlobalOrdinal, class Node>
6293 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6294 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>>& domainTransfer,
6295 const Teuchos::RCP<const map_type>& domainMap,
6296 const Teuchos::RCP<const map_type>& rangeMap,
6297 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6298 using std::string;
6299 using Teuchos::ArrayRCP;
6300 using Teuchos::ArrayView;
6301 using Teuchos::Comm;
6302 using Teuchos::ParameterList;
6303 using Teuchos::rcp;
6304 using Teuchos::RCP;
6309
6310 using LO = LocalOrdinal;
6311 using GO = GlobalOrdinal;
6312 using NT = node_type;
6315
6316 const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6317
6318 auto MM = rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC Pack-1"));
6319
6320 // Make sure that the input argument rowTransfer is either an
6321 // Import or an Export. Import and Export are the only two
6322 // subclasses of Transfer that we defined, but users might
6323 // (unwisely, for now at least) decide to implement their own
6324 // subclasses. Exclude this possibility.
6325 const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
6326 const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
6328 xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
6329 prefix << "The 'rowTransfer' input argument must be either an Import or "
6330 "an Export, and its template parameters must match the corresponding "
6331 "template parameters of the CrsGraph.");
6332
6333 // Make sure that the input argument domainTransfer is either an
6334 // Import or an Export. Import and Export are the only two
6335 // subclasses of Transfer that we defined, but users might
6336 // (unwisely, for now at least) decide to implement their own
6337 // subclasses. Exclude this possibility.
6338 Teuchos::RCP<const import_type> xferDomainAsImport =
6339 Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
6340 Teuchos::RCP<const export_type> xferDomainAsExport =
6341 Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
6342
6343 if (!domainTransfer.is_null()) {
6345 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6346 prefix << "The 'domainTransfer' input argument must be either an "
6347 "Import or an Export, and its template parameters must match the "
6348 "corresponding template parameters of the CrsGraph.");
6349
6351 (xferAsImport != nullptr || !xferDomainAsImport.is_null()) &&
6352 ((xferAsImport != nullptr && xferDomainAsImport.is_null()) ||
6353 (xferAsImport == nullptr && !xferDomainAsImport.is_null())),
6354 std::invalid_argument,
6355 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6356 "must be of the same type (either Import or Export).");
6357
6359 (xferAsExport != nullptr || !xferDomainAsExport.is_null()) &&
6360 ((xferAsExport != nullptr && xferDomainAsExport.is_null()) ||
6361 (xferAsExport == nullptr && !xferDomainAsExport.is_null())),
6362 std::invalid_argument,
6363 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6364 "must be of the same type (either Import or Export).");
6365
6366 } // domainTransfer != null
6367
6368 // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6369 // if the source Map is not distributed but the target Map is?
6370 const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6371
6372 //
6373 // Get the caller's parameters
6374 //
6375
6376 bool reverseMode = false; // Are we in reverse mode?
6377 bool restrictComm = false; // Do we need to restrict the communicator?
6378 RCP<ParameterList> graphparams; // parameters for the destination graph
6379 if (!params.is_null()) {
6380 reverseMode = params->get("Reverse Mode", reverseMode);
6381 restrictComm = params->get("Restrict Communicator", restrictComm);
6382 graphparams = sublist(params, "CrsGraph");
6383 }
6384
6385 // Get the new domain and range Maps. We need some of them for error
6386 // checking, now that we have the reverseMode parameter.
6387 RCP<const map_type> MyRowMap = reverseMode ? rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6388 RCP<const map_type> MyColMap; // create this below
6389 RCP<const map_type> MyDomainMap = !domainMap.is_null() ? domainMap : getDomainMap();
6390 RCP<const map_type> MyRangeMap = !rangeMap.is_null() ? rangeMap : getRangeMap();
6391 RCP<const map_type> BaseRowMap = MyRowMap;
6392 RCP<const map_type> BaseDomainMap = MyDomainMap;
6393
6394 // If the user gave us a nonnull destGraph, then check whether it's
6395 // "pristine." That means that it has no entries.
6396 //
6397 // FIXME (mfh 15 May 2014) If this is not true on all processes,
6398 // then this exception test may hang. It would be better to
6399 // forward an error flag to the next communication phase.
6400 if (!destGraph.is_null()) {
6401 // FIXME (mfh 15 May 2014): The Epetra idiom for checking
6402 // whether a graph or matrix has no entries on the calling
6403 // process, is that it is neither locally nor globally indexed.
6404 // This may change eventually with the Kokkos refactor version
6405 // of Tpetra, so it would be better just to check the quantity
6406 // of interest directly. Note that with the Kokkos refactor
6407 // version of Tpetra, asking for the total number of entries in
6408 // a graph or matrix that is not fill complete might require
6409 // computation (kernel launch), since it is not thread scalable
6410 // to update a count every time an entry is inserted.
6411 const bool NewFlag =
6412 !destGraph->isLocallyIndexed() && !destGraph->isGloballyIndexed();
6413 TEUCHOS_TEST_FOR_EXCEPTION(!NewFlag, std::invalid_argument,
6414 prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
6415 "if its graph is empty (neither locally nor globally indexed).");
6416
6417 // FIXME (mfh 15 May 2014) At some point, we want to change
6418 // graphs and matrices so that their DistObject Map
6419 // (this->getMap()) may differ from their row Map. This will
6420 // make redistribution for 2-D distributions more efficient. I
6421 // hesitate to change this check, because I'm not sure how much
6422 // the code here depends on getMap() and getRowMap() being the
6423 // same.
6424 TEUCHOS_TEST_FOR_EXCEPTION(
6425 !destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6426 prefix << "The (row) Map of the input argument 'destGraph' is not the "
6427 "same as the (row) Map specified by the input argument 'rowTransfer'.");
6428
6429 TEUCHOS_TEST_FOR_EXCEPTION(
6430 !destGraph->checkSizes(*this), std::invalid_argument,
6431 prefix << "You provided a nonnull destination graph, but checkSizes() "
6432 "indicates that it is not a legal legal target for redistribution from "
6433 "the source graph (*this). This may mean that they do not have the "
6434 "same dimensions.");
6435 }
6436
6437 // If forward mode (the default), then *this's (row) Map must be
6438 // the same as the source Map of the Transfer. If reverse mode,
6439 // then *this's (row) Map must be the same as the target Map of
6440 // the Transfer.
6441 //
6442 // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6443 // and matrices so that their DistObject Map (this->getMap()) may
6444 // differ from their row Map. This will make redistribution for
6445 // 2-D distributions more efficient. I hesitate to change this
6446 // check, because I'm not sure how much the code here depends on
6447 // getMap() and getRowMap() being the same.
6448 TEUCHOS_TEST_FOR_EXCEPTION(
6449 !(reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6450 std::invalid_argument, prefix << "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6451
6452 TEUCHOS_TEST_FOR_EXCEPTION(
6453 !(!reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6454 std::invalid_argument, prefix << "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6455
6456 // checks for domainTransfer
6457 TEUCHOS_TEST_FOR_EXCEPTION(
6458 !xferDomainAsImport.is_null() && !xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
6459 std::invalid_argument,
6460 prefix << "The target map of the 'domainTransfer' input argument must be "
6461 "the same as the rebalanced domain map 'domainMap'");
6462
6463 TEUCHOS_TEST_FOR_EXCEPTION(
6464 !xferDomainAsExport.is_null() && !xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
6465 std::invalid_argument,
6466 prefix << "The source map of the 'domainTransfer' input argument must be "
6467 "the same as the rebalanced domain map 'domainMap'");
6468
6469 // The basic algorithm here is:
6470 //
6471 // 1. Call the moral equivalent of "Distor.do" to handle the import.
6472 // 2. Copy all the Imported and Copy/Permuted data into the raw
6473 // CrsGraph pointers, still using GIDs.
6474 // 3. Call an optimized version of MakeColMap that avoids the
6475 // Directory lookups (since the importer knows who owns all the
6476 // GIDs) AND reindexes to LIDs.
6477 // 4. Call expertStaticFillComplete()
6478
6479 // Get information from the Importer
6480 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6481 ArrayView<const LO> ExportLIDs = reverseMode ? rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
6482 ArrayView<const LO> RemoteLIDs = reverseMode ? rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
6483 ArrayView<const LO> PermuteToLIDs = reverseMode ? rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
6484 ArrayView<const LO> PermuteFromLIDs = reverseMode ? rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
6485 Distributor& Distor = rowTransfer.getDistributor();
6486
6487 // Owning PIDs
6488 Teuchos::Array<int> SourcePids;
6489 Teuchos::Array<int> TargetPids;
6490 int MyPID = getComm()->getRank();
6491
6492 // Temp variables for sub-communicators
6493 RCP<const map_type> ReducedRowMap, ReducedColMap,
6494 ReducedDomainMap, ReducedRangeMap;
6495 RCP<const Comm<int>> ReducedComm;
6496
6497 // If the user gave us a null destGraph, then construct the new
6498 // destination graph. We will replace its column Map later.
6499 if (destGraph.is_null()) {
6500 destGraph = rcp(new this_CRS_type(MyRowMap, 0, graphparams));
6501 }
6502
6503 /***************************************************/
6504 /***** 1) First communicator restriction phase ****/
6505 /***************************************************/
6506 if (restrictComm) {
6507 ReducedRowMap = MyRowMap->removeEmptyProcesses();
6508 ReducedComm = ReducedRowMap.is_null() ? Teuchos::null : ReducedRowMap->getComm();
6509 destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
6510
6511 ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ? ReducedRowMap : MyDomainMap->replaceCommWithSubset(ReducedComm);
6512 ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ? ReducedRowMap : MyRangeMap->replaceCommWithSubset(ReducedComm);
6513
6514 // Reset the "my" maps
6515 MyRowMap = ReducedRowMap;
6516 MyDomainMap = ReducedDomainMap;
6517 MyRangeMap = ReducedRangeMap;
6518
6519 // Update my PID, if we've restricted the communicator
6520 if (!ReducedComm.is_null()) {
6521 MyPID = ReducedComm->getRank();
6522 } else {
6523 MyPID = -2; // For debugging
6524 }
6525 } else {
6526 ReducedComm = MyRowMap->getComm();
6527 }
6528
6529 /***************************************************/
6530 /***** 2) From Tpera::DistObject::doTransfer() ****/
6531 /***************************************************/
6532 MM = Teuchos::null;
6533 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC ImportSetup"));
6534 // Get the owning PIDs
6535 RCP<const import_type> MyImporter = getImporter();
6536
6537 // check whether domain maps of source graph and base domain map is the same
6538 bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
6539
6540 if (!restrictComm && !MyImporter.is_null() && bSameDomainMap) {
6541 // Same domain map as source graph
6542 //
6543 // NOTE: This won't work for restrictComm (because the Import
6544 // doesn't know the restricted PIDs), though writing an
6545 // optimized version for that case would be easy (Import an
6546 // IntVector of the new PIDs). Might want to add this later.
6547 Import_Util::getPids(*MyImporter, SourcePids, false);
6548 } else if (restrictComm && !MyImporter.is_null() && bSameDomainMap) {
6549 // Same domain map as source graph (restricted communicator)
6550 // We need one import from the domain to the column map
6551 ivector_type SourceDomain_pids(getDomainMap(), true);
6552 ivector_type SourceCol_pids(getColMap());
6553 // SourceDomain_pids contains the restricted pids
6554 SourceDomain_pids.putScalar(MyPID);
6555
6556 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6557 SourcePids.resize(getColMap()->getLocalNumElements());
6558 SourceCol_pids.get1dCopy(SourcePids());
6559 } else if (MyImporter.is_null() && bSameDomainMap) {
6560 // Graph has no off-process entries
6561 SourcePids.resize(getColMap()->getLocalNumElements());
6562 SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
6563 } else if (!MyImporter.is_null() &&
6564 !domainTransfer.is_null()) {
6565 // general implementation for rectangular matrices with
6566 // domain map different than SourceGraph domain map.
6567 // User has to provide a DomainTransfer object. We need
6568 // to communications (import/export)
6569
6570 // TargetDomain_pids lives on the rebalanced new domain map
6571 ivector_type TargetDomain_pids(domainMap);
6572 TargetDomain_pids.putScalar(MyPID);
6573
6574 // SourceDomain_pids lives on the non-rebalanced old domain map
6575 ivector_type SourceDomain_pids(getDomainMap());
6576
6577 // SourceCol_pids lives on the non-rebalanced old column map
6578 ivector_type SourceCol_pids(getColMap());
6579
6580 if (!reverseMode && !xferDomainAsImport.is_null()) {
6581 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6582 } else if (reverseMode && !xferDomainAsExport.is_null()) {
6583 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6584 } else if (!reverseMode && !xferDomainAsExport.is_null()) {
6585 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6586 } else if (reverseMode && !xferDomainAsImport.is_null()) {
6587 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6588 } else {
6589 TEUCHOS_TEST_FOR_EXCEPTION(
6590 true, std::logic_error,
6591 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6592 }
6593 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6594 SourcePids.resize(getColMap()->getLocalNumElements());
6595 SourceCol_pids.get1dCopy(SourcePids());
6596 } else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
6597 getDomainMap()->isSameAs(*getRowMap())) {
6598 // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
6599 ivector_type TargetRow_pids(domainMap);
6600 ivector_type SourceRow_pids(getRowMap());
6601 ivector_type SourceCol_pids(getColMap());
6602
6603 TargetRow_pids.putScalar(MyPID);
6604 if (!reverseMode && xferAsImport != nullptr) {
6605 SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
6606 } else if (reverseMode && xferAsExport != nullptr) {
6607 SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
6608 } else if (!reverseMode && xferAsExport != nullptr) {
6609 SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
6610 } else if (reverseMode && xferAsImport != nullptr) {
6611 SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
6612 } else {
6613 TEUCHOS_TEST_FOR_EXCEPTION(
6614 true, std::logic_error,
6615 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6616 }
6617 SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
6618 SourcePids.resize(getColMap()->getLocalNumElements());
6619 SourceCol_pids.get1dCopy(SourcePids());
6620 } else {
6621 TEUCHOS_TEST_FOR_EXCEPTION(
6622 true, std::invalid_argument,
6623 prefix << "This method only allows either domainMap == getDomainMap(), "
6624 "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
6625 }
6626
6627 // Tpetra-specific stuff
6628 size_t constantNumPackets = destGraph->constantNumberOfPackets();
6629 if (constantNumPackets == 0) {
6630 destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
6631 RemoteLIDs.size());
6632 } else {
6633 // There are a constant number of packets per element. We
6634 // already know (from the number of "remote" (incoming)
6635 // elements) how many incoming elements we expect, so we can
6636 // resize the buffer accordingly.
6637 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
6638 destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
6639 }
6640
6641 {
6642 // packAndPrepare* methods modify numExportPacketsPerLID_.
6643 destGraph->numExportPacketsPerLID_.modify_host();
6644 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
6645 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6646
6647 // Pack & Prepare w/ owning PIDs
6648 packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
6649 numExportPacketsPerLID, ExportLIDs,
6650 SourcePids, constantNumPackets);
6651 }
6652
6653 // Do the exchange of remote data.
6654 MM = Teuchos::null;
6655 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC Transfer"));
6656
6657 if (communication_needed) {
6658 if (reverseMode) {
6659 if (constantNumPackets == 0) { // variable number of packets per LID
6660 // Make sure that host has the latest version, since we're
6661 // using the version on host. If host has the latest
6662 // version, syncing to host does nothing.
6663 destGraph->numExportPacketsPerLID_.sync_host();
6664 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6665 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6666 destGraph->numImportPacketsPerLID_.sync_host();
6667 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
6668 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6669
6670 Distor.doReversePostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
6671 destGraph->numImportPacketsPerLID_.view_host());
6672 size_t totalImportPackets = 0;
6673 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
6674 totalImportPackets += numImportPacketsPerLID[i];
6675 }
6676
6677 // Reallocation MUST go before setting the modified flag,
6678 // because it may clear out the flags.
6679 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
6680 destGraph->imports_.modify_host();
6681 auto hostImports = destGraph->imports_.view_host();
6682 // This is a legacy host pack/unpack path, so use the host
6683 // version of exports_.
6684 destGraph->exports_.sync_host();
6685 auto hostExports = destGraph->exports_.view_host();
6686 Distor.doReversePostsAndWaits(hostExports,
6687 numExportPacketsPerLID,
6688 hostImports,
6689 numImportPacketsPerLID);
6690 } else { // constant number of packets per LI
6691 destGraph->imports_.modify_host();
6692 auto hostImports = destGraph->imports_.view_host();
6693 // This is a legacy host pack/unpack path, so use the host
6694 // version of exports_.
6695 destGraph->exports_.sync_host();
6696 auto hostExports = destGraph->exports_.view_host();
6697 Distor.doReversePostsAndWaits(hostExports,
6698 constantNumPackets,
6699 hostImports);
6700 }
6701 } else { // forward mode (the default)
6702 if (constantNumPackets == 0) { // variable number of packets per LID
6703 // Make sure that host has the latest version, since we're
6704 // using the version on host. If host has the latest
6705 // version, syncing to host does nothing.
6706 destGraph->numExportPacketsPerLID_.sync_host();
6707 destGraph->numImportPacketsPerLID_.sync_host();
6708 Distor.doPostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
6709 destGraph->numImportPacketsPerLID_.view_host());
6710
6711 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
6712 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6713 size_t totalImportPackets = 0;
6714 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
6715 totalImportPackets += numImportPacketsPerLID[i];
6716 }
6717
6718 // Reallocation MUST go before setting the modified flag,
6719 // because it may clear out the flags.
6720 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
6721 destGraph->imports_.modify_host();
6722 auto hostImports = destGraph->imports_.view_host();
6723 // This is a legacy host pack/unpack path, so use the host
6724 // version of exports_.
6725 destGraph->exports_.sync_host();
6726 auto hostExports = destGraph->exports_.view_host();
6727 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6728 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6729 Distor.doPostsAndWaits(hostExports, numExportPacketsPerLID, hostImports, numImportPacketsPerLID);
6730 } else { // constant number of packets per LID
6731 destGraph->imports_.modify_host();
6732 auto hostImports = destGraph->imports_.view_host();
6733 // This is a legacy host pack/unpack path, so use the host
6734 // version of exports_.
6735 destGraph->exports_.sync_host();
6736 auto hostExports = destGraph->exports_.view_host();
6737 Distor.doPostsAndWaits(hostExports, constantNumPackets, hostImports);
6738 }
6739 }
6740 }
6741
6742 /*********************************************************************/
6743 /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
6744 /*********************************************************************/
6745
6746 MM = Teuchos::null;
6747 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC Unpack-1"));
6748
6749 // Backwards compatibility measure. We'll use this again below.
6750 destGraph->numImportPacketsPerLID_.sync_host();
6751 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
6752 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6753 destGraph->imports_.sync_host();
6754 Teuchos::ArrayView<const packet_type> hostImports =
6755 getArrayViewFromDualView(destGraph->imports_);
6756 size_t mynnz =
6757 unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
6758 numImportPacketsPerLID,
6759 constantNumPackets, INSERT,
6760 NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
6761 size_t N = BaseRowMap->getLocalNumElements();
6762
6763 // Allocations
6764 ArrayRCP<size_t> CSR_rowptr(N + 1);
6765 ArrayRCP<GO> CSR_colind_GID;
6766 ArrayRCP<LO> CSR_colind_LID;
6767 CSR_colind_GID.resize(mynnz);
6768
6769 // If LO and GO are the same, we can reuse memory when
6770 // converting the column indices from global to local indices.
6771 if (typeid(LO) == typeid(GO)) {
6772 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
6773 } else {
6774 CSR_colind_LID.resize(mynnz);
6775 }
6776
6777 // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
6778 // unpackAndCombine method on a "CrsArrays" object? This passing
6779 // in a huge list of arrays is icky. Can't we have a bit of an
6780 // abstraction? Implementing a concrete DistObject subclass only
6781 // takes five methods.
6782 unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
6783 numImportPacketsPerLID, constantNumPackets,
6784 INSERT, NumSameIDs, PermuteToLIDs,
6785 PermuteFromLIDs, N, mynnz, MyPID,
6786 CSR_rowptr(), CSR_colind_GID(),
6787 SourcePids(), TargetPids);
6788
6789 /**************************************************************/
6790 /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
6791 /**************************************************************/
6792 MM = Teuchos::null;
6793 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC Unpack-2"));
6794 // Call an optimized version of makeColMap that avoids the
6795 // Directory lookups (since the Import object knows who owns all
6796 // the GIDs).
6797 Teuchos::Array<int> RemotePids;
6798 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
6799 CSR_colind_LID(),
6800 CSR_colind_GID(),
6801 BaseDomainMap,
6802 TargetPids, RemotePids,
6803 MyColMap,
6804 params);
6805
6806 /*******************************************************/
6807 /**** 4) Second communicator restriction phase ****/
6808 /*******************************************************/
6809 if (restrictComm) {
6810 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ? ReducedRowMap : MyColMap->replaceCommWithSubset(ReducedComm);
6811 MyColMap = ReducedColMap; // Reset the "my" maps
6812 }
6813
6814 // Replace the col map
6815 destGraph->replaceColMap(MyColMap);
6816
6817 // Short circuit if the processor is no longer in the communicator
6818 //
6819 // NOTE: Epetra replaces modifies all "removed" processes so they
6820 // have a dummy (serial) Map that doesn't touch the original
6821 // communicator. Duplicating that here might be a good idea.
6822 if (ReducedComm.is_null()) {
6823 return;
6824 }
6825
6826 /***************************************************/
6827 /**** 5) Sort ****/
6828 /***************************************************/
6829 if ((!reverseMode && xferAsImport != nullptr) ||
6830 (reverseMode && xferAsExport != nullptr)) {
6831 Import_Util::sortCrsEntries(CSR_rowptr(),
6832 CSR_colind_LID());
6833 } else if ((!reverseMode && xferAsExport != nullptr) ||
6834 (reverseMode && xferAsImport != nullptr)) {
6835 Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
6836 CSR_colind_LID());
6837 if (CSR_rowptr[N] != mynnz) {
6838 CSR_colind_LID.resize(CSR_rowptr[N]);
6839 }
6840 } else {
6841 TEUCHOS_TEST_FOR_EXCEPTION(
6842 true, std::logic_error,
6843 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6844 }
6845 /***************************************************/
6846 /**** 6) Reset the colmap and the arrays ****/
6847 /***************************************************/
6848
6849 // Call constructor for the new graph (restricted as needed)
6850 //
6851 destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
6852
6853 /***************************************************/
6854 /**** 7) Build Importer & Call ESFC ****/
6855 /***************************************************/
6856 // Pre-build the importer using the existing PIDs
6857 Teuchos::ParameterList esfc_params;
6858 MM = Teuchos::null;
6859 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC CreateImporter"));
6860 RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
6861 MM = Teuchos::null;
6862 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC ESFC"));
6863
6864 if (!params.is_null())
6865 esfc_params.set("compute global constants", params->get("compute global constants", true));
6866
6867 destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
6868 MyImport, Teuchos::null, rcp(&esfc_params, false));
6869}
6870
6871template <class LocalOrdinal, class GlobalOrdinal, class Node>
6874 const import_type& importer,
6875 const Teuchos::RCP<const map_type>& domainMap,
6876 const Teuchos::RCP<const map_type>& rangeMap,
6877 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6878 transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
6879}
6880
6881template <class LocalOrdinal, class GlobalOrdinal, class Node>
6884 const import_type& rowImporter,
6886 const Teuchos::RCP<const map_type>& domainMap,
6887 const Teuchos::RCP<const map_type>& rangeMap,
6888 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6889 transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
6890}
6891
6892template <class LocalOrdinal, class GlobalOrdinal, class Node>
6895 const export_type& exporter,
6896 const Teuchos::RCP<const map_type>& domainMap,
6897 const Teuchos::RCP<const map_type>& rangeMap,
6898 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6899 transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
6900}
6901
6902template <class LocalOrdinal, class GlobalOrdinal, class Node>
6905 const export_type& rowExporter,
6907 const Teuchos::RCP<const map_type>& domainMap,
6908 const Teuchos::RCP<const map_type>& rangeMap,
6909 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6910 transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
6911}
6912
6913template <class LocalOrdinal, class GlobalOrdinal, class Node>
6916 std::swap(graph.need_sync_host_uvm_access, this->need_sync_host_uvm_access);
6917
6918 std::swap(graph.rowMap_, this->rowMap_);
6919 std::swap(graph.colMap_, this->colMap_);
6920 std::swap(graph.rangeMap_, this->rangeMap_);
6921 std::swap(graph.domainMap_, this->domainMap_);
6922
6923 std::swap(graph.importer_, this->importer_);
6924 std::swap(graph.exporter_, this->exporter_);
6925
6926 std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
6927
6928 std::swap(graph.globalNumEntries_, this->globalNumEntries_);
6929 std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
6930
6931 std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
6932
6933 std::swap(graph.rowPtrsPacked_dev_, this->rowPtrsPacked_dev_);
6934 std::swap(graph.rowPtrsPacked_host_, this->rowPtrsPacked_host_);
6935
6936 std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_);
6937 std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_);
6938 std::swap(graph.packedUnpackedRowPtrsMatch_, this->packedUnpackedRowPtrsMatch_);
6939
6940 std::swap(graph.k_offRankOffsets_, this->k_offRankOffsets_);
6941
6942 std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv);
6943 std::swap(graph.gblInds_wdv, this->gblInds_wdv);
6944 std::swap(graph.lclIndsPacked_wdv, this->lclIndsPacked_wdv);
6945
6946 std::swap(graph.storageStatus_, this->storageStatus_);
6947
6948 std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
6949 std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
6950 std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
6951 std::swap(graph.fillComplete_, this->fillComplete_);
6952 std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
6953 std::swap(graph.noRedundancies_, this->noRedundancies_);
6954 std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
6955 std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
6956 std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_);
6957
6958 std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
6959
6960 std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
6961 std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
6962 std::swap(graph.nonlocals_, this->nonlocals_); // std::map
6963}
6964
6965template <class LocalOrdinal, class GlobalOrdinal, class Node>
6968 auto compare_nonlocals = [&](const nonlocals_type& m1, const nonlocals_type& m2) {
6969 bool output = true;
6970 output = m1.size() == m2.size() ? output : false;
6971 for (auto& it_m : m1) {
6972 size_t key = it_m.first;
6973 output = m2.find(key) != m2.end() ? output : false;
6974 if (output) {
6975 auto v1 = m1.find(key)->second;
6976 auto v2 = m2.find(key)->second;
6977 std::sort(v1.begin(), v1.end());
6978 std::sort(v2.begin(), v2.end());
6979
6980 output = v1.size() == v2.size() ? output : false;
6981 for (size_t i = 0; output && i < v1.size(); i++) {
6982 output = v1[i] == v2[i] ? output : false;
6983 }
6984 }
6985 }
6986 return output;
6987 };
6988
6989 bool output = true;
6990
6991 output = this->rowMap_->isSameAs(*(graph.rowMap_)) ? output : false;
6992 output = this->colMap_->isSameAs(*(graph.colMap_)) ? output : false;
6993 output = this->rangeMap_->isSameAs(*(graph.rangeMap_)) ? output : false;
6994 output = this->domainMap_->isSameAs(*(graph.domainMap_)) ? output : false;
6995
6996 output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
6997
6998 output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
6999 output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
7000
7001 output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
7002
7003 output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
7004
7005 output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
7006 output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
7007 output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
7008 output = this->fillComplete_ == graph.fillComplete_ ? output : false;
7009 output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
7010 output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7011 output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7012 output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7013 output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false;
7014 output = this->sortGhostsAssociatedWithEachProcessor_ == graph.sortGhostsAssociatedWithEachProcessor_ ? output : false;
7015
7016 // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7017 // nonlocals_ isa std::map<GO, std::vector<GO> >
7018 output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7019
7020 // Compare k_numAllocPerRow_ isa Kokkos::View::host_mirror_type
7021 // - since this is a host_mirror_type type, it should be in host memory already
7022 output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7023 if (output && this->k_numAllocPerRow_.extent(0) > 0) {
7024 for (size_t i = 0; output && i < this->k_numAllocPerRow_.extent(0); i++)
7025 output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7026 }
7027
7028 // Compare k_numRowEntries_ isa Kokkos::View::host_mirror_type
7029 // - since this is a host_mirror_type type, it should be in host memory already
7030 output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7031 if (output && this->k_numRowEntries_.extent(0) > 0) {
7032 for (size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7033 output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7034 }
7035
7036 // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7037 {
7038 auto rowPtrsThis = this->getRowPtrsUnpackedHost();
7039 auto rowPtrsGraph = graph.getRowPtrsUnpackedHost();
7040 output = rowPtrsThis.extent(0) == rowPtrsGraph.extent(0) ? output : false;
7041 for (size_t i = 0; output && i < rowPtrsThis.extent(0); i++)
7042 output = rowPtrsThis(i) == rowPtrsGraph(i) ? output : false;
7043 }
7044
7045 // Compare lclIndsUnpacked_wdv isa Kokkos::View<LocalOrdinal*, ...>
7046 output = this->lclIndsUnpacked_wdv.extent(0) == graph.lclIndsUnpacked_wdv.extent(0) ? output : false;
7047 if (output && this->lclIndsUnpacked_wdv.extent(0) > 0) {
7048 auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7049 auto indGraph = graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7050 for (size_t i = 0; output && i < indThis.extent(0); i++)
7051 output = indThis(i) == indGraph(i) ? output : false;
7052 }
7053
7054 // Compare gblInds_wdv isa Kokkos::View<GlobalOrdinal*, ...>
7055 output = this->gblInds_wdv.extent(0) == graph.gblInds_wdv.extent(0) ? output : false;
7056 if (output && this->gblInds_wdv.extent(0) > 0) {
7057 auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7058 auto indtGraph = graph.gblInds_wdv.getHostView(Access::ReadOnly);
7059 for (size_t i = 0; output && i < indtThis.extent(0); i++)
7060 output = indtThis(i) == indtGraph(i) ? output : false;
7061 }
7062
7063 // Check lclGraph_ isa
7064 // KokkosSparse::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7065 // KokkosSparse::StaticCrsGraph has 3 data members in it:
7066 // Kokkos::View<size_type*, ...> row_map
7067 // (local_graph_device_type::row_map_type)
7068 // Kokkos::View<data_type*, ...> entries
7069 // (local_graph_device_type::entries_type)
7070 // Kokkos::View<size_type*, ...> row_block_offsets
7071 // (local_graph_device_type::row_block_type)
7072 // There is currently no KokkosSparse::StaticCrsGraph comparison function
7073 // that's built-in, so we will just compare
7074 // the three data items here. This can be replaced if Kokkos ever
7075 // puts in its own comparison routine.
7076 local_graph_host_type thisLclGraph = this->getLocalGraphHost();
7077 local_graph_host_type graphLclGraph = graph.getLocalGraphHost();
7078
7079 output = thisLclGraph.row_map.extent(0) == graphLclGraph.row_map.extent(0)
7080 ? output
7081 : false;
7082 if (output && thisLclGraph.row_map.extent(0) > 0) {
7085 for (size_t i = 0; output && i < lclGraph_rowmap_host_this.extent(0); i++)
7087 ? output
7088 : false;
7089 }
7090
7091 output = thisLclGraph.entries.extent(0) == graphLclGraph.entries.extent(0)
7092 ? output
7093 : false;
7094 if (output && thisLclGraph.entries.extent(0) > 0) {
7097 for (size_t i = 0; output && i < lclGraph_entries_host_this.extent(0); i++)
7099 ? output
7100 : false;
7101 }
7102
7103 output =
7104 thisLclGraph.row_block_offsets.extent(0) ==
7105 graphLclGraph.row_block_offsets.extent(0)
7106 ? output
7107 : false;
7108 if (output && thisLclGraph.row_block_offsets.extent(0) > 0) {
7109 auto lclGraph_rbo_host_this = thisLclGraph.row_block_offsets;
7110 auto lclGraph_rbo_host_graph = graphLclGraph.row_block_offsets;
7111 for (size_t i = 0; output && i < lclGraph_rbo_host_this.extent(0); i++)
7113 ? output
7114 : false;
7115 }
7116
7117 // For Importer and Exporter, we don't need to explicitly check them since
7118 // they will be consistent with the maps.
7119 // Note: importer_ isa Teuchos::RCP<const import_type>
7120 // exporter_ isa Teuchos::RCP<const export_type>
7121
7122 return output;
7123}
7124
7125template <class LocalOrdinal, class GlobalOrdinal, class Node>
7129 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
7130 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
7132 using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
7133 using LO = LocalOrdinal;
7134 using GO = GlobalOrdinal;
7135 typedef typename crs_graph_type::global_inds_device_view_type::non_const_value_type global_inds_device_value_t;
7136 typedef typename Node::execution_space exec_space;
7137 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
7138
7139 const LocalOrdinal LINV = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
7140 const GlobalOrdinal GINV = Teuchos::OrdinalTraits<GlobalOrdinal>::invalid();
7141
7142 using local_map_type = typename crs_graph_type::map_type::local_map_type;
7143 local_map_type srcRowMapLocal = srcCrsGraph.getRowMap()->getLocalMap();
7144 local_map_type srcColMapLocal = srcCrsGraph.getColMap()->getLocalMap();
7145 local_map_type tgtRowMapLocal = tgtCrsGraph.getRowMap()->getLocalMap();
7146
7147 auto tgtLocalRowPtrsDevice = tgtCrsGraph.getRowPtrsUnpackedDevice();
7148 auto tgtGlobalColInds = tgtCrsGraph.gblInds_wdv.getDeviceView(Access::ReadWrite);
7149 auto srcLocalRowPtrsDevice = srcCrsGraph.getLocalRowPtrsDevice();
7150 auto srcLocalColIndsDevice = srcCrsGraph.lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly);
7151
7152 typename crs_graph_type::num_row_entries_type::non_const_type h_numRowEnt = tgtCrsGraph.k_numRowEntries_;
7153
7154 auto k_numRowEnt = Kokkos::create_mirror_view_and_copy(device_type(), h_numRowEnt);
7155
7156 const bool sorted = false;
7157
7158 bool hasMap = permuteFromLIDs.extent(0) > 0;
7159 auto permuteToLIDs_d = permuteToLIDs.view_device();
7160 auto permuteFromLIDs_d = permuteFromLIDs.view_device();
7161
7162#ifdef CRSGRAPH_INNER_ABORT
7163#undef CRSGRAPH_INNER_ABORT
7164#endif
7165
7166#ifdef KOKKOS_ENABLE_SYCL
7167#define CRSGRAPH_INNER_ABORT(lin) \
7168 do { \
7169 sycl::ext::oneapi::experimental::printf("ERROR: Tpetra_CrsGraph_def.hpp:%d", lin); \
7170 Kokkos::abort("error"); \
7171 } while (0)
7172#else
7173#define CRSGRAPH_INNER_ABORT(lin) \
7174 do { \
7175 printf("ERROR: Tpetra_CrsGraph_def.hpp:%d", lin); \
7176 Kokkos::abort("error"); \
7177 } while (0)
7178#endif
7179
7180 Kokkos::parallel_for(
7181 "Tpetra_CrsGraph::copyAndPermuteNew",
7182 range_type(0, loopEnd),
7183 KOKKOS_LAMBDA(const LO sourceLID) {
7184 auto srcLid = sourceLID;
7185 auto tgtLid = sourceLID;
7186 if (hasMap) {
7189 }
7190 auto srcGid = srcRowMapLocal.getGlobalElement(srcLid);
7191 if (srcGid == GINV) CRSGRAPH_INNER_ABORT(__LINE__);
7192 auto tgtGid = tgtRowMapLocal.getGlobalElement(tgtLid);
7193 auto tgtLocalRow = tgtRowMapLocal.getLocalElement(tgtGid);
7194 if (tgtLocalRow == LINV) CRSGRAPH_INNER_ABORT(__LINE__);
7195 if (tgtLocalRow != tgtLid) CRSGRAPH_INNER_ABORT(__LINE__);
7196 auto tgtNumEntries = k_numRowEnt(tgtLocalRow);
7197
7198 // FIXME no auto use
7199 auto start = srcLocalRowPtrsDevice(srcLid);
7200 auto end = srcLocalRowPtrsDevice(srcLid + 1);
7201 auto rowLength = (end - start);
7202
7203 auto tstart = tgtLocalRowPtrsDevice(tgtLocalRow);
7204 auto tend = tstart + tgtNumEntries;
7205 auto tend1 = tgtLocalRowPtrsDevice(tgtLocalRow + 1);
7206
7207 const size_t num_avail = (tend1 < tend) ? size_t(0) : tend1 - tend;
7208 size_t num_inserted = 0;
7209
7210 global_inds_device_value_t* tgtGlobalColIndsPtr = tgtGlobalColInds.data();
7211
7212 size_t hint = 0;
7213 for (size_t j = 0; j < rowLength; j++) {
7214 auto ci = srcLocalColIndsDevice(start + j);
7215 GO gi = srcColMapLocal.getGlobalElement(ci);
7216 if (gi == GINV) CRSGRAPH_INNER_ABORT(__LINE__);
7217 auto numInTgtRow = (tend - tstart);
7218
7219 const size_t offset = KokkosSparse::findRelOffset(
7220 tgtGlobalColIndsPtr + tstart, numInTgtRow, gi, hint, sorted);
7221
7222 if (offset == numInTgtRow) {
7223 if (num_inserted >= num_avail) { // not enough room
7224 Kokkos::abort("num_avail");
7225 }
7226 tgtGlobalColIndsPtr[tstart + offset] = gi;
7227 ++tend;
7228 hint = offset + 1;
7229 ++num_inserted;
7230 }
7231 }
7232 k_numRowEnt(tgtLocalRow) += num_inserted;
7233 return size_t(0);
7234 });
7235 Kokkos::deep_copy(tgtCrsGraph.k_numRowEntries_, k_numRowEnt);
7236 tgtCrsGraph.setLocallyModified();
7237}
7238
7239template <class LocalOrdinal, class GlobalOrdinal, class Node>
7240void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::copyAndPermuteNew(
7241 const row_graph_type& srcRowGraph,
7242 row_graph_type& tgtRowGraph,
7243 const size_t numSameIDs,
7244 const Kokkos::DualView<const local_ordinal_type*,
7245 buffer_device_type>& permuteToLIDs,
7246 const Kokkos::DualView<const local_ordinal_type*,
7247 buffer_device_type>& permuteFromLIDs,
7248 const CombineMode CM) {
7249 using std::endl;
7250 using LO = local_ordinal_type;
7251 using GO = global_ordinal_type;
7252 const char tfecfFuncName[] = "copyAndPermuteNew: ";
7253 const bool verbose = verbose_;
7254
7255 Details::ProfilingRegion regionCAP("Tpetra::CrsGraph::copyAndPermuteNew");
7256 std::unique_ptr<std::string> prefix;
7257 if (verbose) {
7258 prefix = this->createPrefix("CrsGraph", "copyAndPermuteNew");
7259 std::ostringstream os;
7260 os << *prefix << endl;
7261 std::cerr << os.str();
7262 }
7263
7264 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7265 permuteToLIDs.extent(0) != permuteFromLIDs.extent(0),
7266 std::runtime_error,
7267 "permuteToLIDs.extent(0) = " << permuteToLIDs.extent(0) << " != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0) << ".");
7268
7269 if (verbose) {
7270 std::ostringstream os;
7271 os << *prefix << "Compute padding" << endl;
7272 std::cerr << os.str();
7273 }
7274
7275 using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
7276 const crs_graph_type* srcCrsGraphPtr = dynamic_cast<const crs_graph_type*>(&srcRowGraph);
7277 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7278 !srcCrsGraphPtr, std::runtime_error, "error srcGraph type= " << typeid(srcRowGraph).name());
7279 const crs_graph_type& srcCrsGraph = *srcCrsGraphPtr;
7280
7281 crs_graph_type* tgtCrsGraphPtr = dynamic_cast<crs_graph_type*>(&tgtRowGraph);
7282 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7283 !tgtCrsGraphPtr, std::runtime_error, "error tgtGraph type= " << typeid(tgtRowGraph).name());
7284
7285 crs_graph_type& tgtCrsGraph = *tgtCrsGraphPtr;
7286 auto padding = tgtCrsGraph.computeCrsPadding(
7287 srcRowGraph, numSameIDs, permuteToLIDs, permuteFromLIDs, verbose);
7288 tgtCrsGraph.applyCrsPadding(*padding, verbose);
7289
7290 const map_type& srcRowMap = *(srcRowGraph.getRowMap());
7291 const map_type& tgtRowMap = *(tgtRowGraph.getRowMap());
7292 const bool src_filled = srcRowGraph.isFillComplete();
7293 nonconst_global_inds_host_view_type row_copy;
7294 LO myid = 0;
7295
7296 //
7297 // "Copy" part of "copy and permute."
7298 //
7299 LO numSameIDs_as_LID = static_cast<LO>(numSameIDs);
7300
7301 if (src_filled || srcCrsGraphPtr == nullptr) {
7302 if (verbose) {
7303 std::ostringstream os;
7304 os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
7305 std::cerr << os.str();
7306 }
7307 // If the source graph is fill complete, we can't use view mode,
7308 // because the data might be stored in a different format not
7309 // compatible with the expectations of view mode. Also, if the
7310 // source graph is not a CrsGraph, we can't use view mode,
7311 // because RowGraph only provides copy mode access to the data.
7312 Kokkos::DualView<const local_ordinal_type*, buffer_device_type> noPermute;
7313 insertGlobalIndicesDevice(srcCrsGraph, tgtCrsGraph,
7314 noPermute, noPermute,
7315 numSameIDs_as_LID);
7316 } else {
7317 if (verbose) {
7318 std::ostringstream os;
7319 os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
7320 std::cerr << os.str();
7321 }
7322 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
7323 const GO gid = srcRowMap.getGlobalElement(myid);
7324 global_inds_host_view_type row;
7325 srcCrsGraph.getGlobalRowView(gid, row);
7326 tgtCrsGraph.insertGlobalIndices(gid, row.extent(0), row.data());
7327 }
7328 }
7329
7330 //
7331 // "Permute" part of "copy and permute."
7332 //
7333 auto permuteToLIDs_h = permuteToLIDs.view_host();
7334 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
7335 auto permuteToLIDs_d = permuteToLIDs.view_device();
7336 auto permuteFromLIDs_d = permuteFromLIDs.view_device();
7337
7338 if (src_filled || srcCrsGraphPtr == nullptr) {
7339 insertGlobalIndicesDevice(
7340 srcCrsGraph,
7341 tgtCrsGraph,
7342 permuteToLIDs,
7343 permuteFromLIDs, // note reversed arg order, tgt, then src
7344 static_cast<LO>(permuteToLIDs_h.extent(0)));
7345 } else {
7346 for (LO i = 0; i < static_cast<LO>(permuteToLIDs_h.extent(0)); ++i) {
7347 const GO mygid = tgtRowMap.getGlobalElement(permuteToLIDs_h[i]);
7348 const GO srcgid = srcRowMap.getGlobalElement(permuteFromLIDs_h[i]);
7349 global_inds_host_view_type row;
7350 srcCrsGraph.getGlobalRowView(srcgid, row);
7351 tgtCrsGraph.insertGlobalIndices(mygid, row.extent(0), row.data());
7352 }
7353 }
7354
7355 if (verbose) {
7356 std::ostringstream os;
7357 os << *prefix << "Done" << endl;
7358 std::cerr << os.str();
7359 }
7360}
7361
7362} // namespace Tpetra
7363
7364//
7365// Explicit instantiation macros
7366//
7367// Must be expanded from within the Tpetra namespace!
7368//
7369
7370#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7371 template <> \
7372 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7373 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7374 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7375 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7376 CrsGraph<LO, GO, NODE>::node_type>& importer, \
7377 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7378 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7379 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7380 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7381 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7382 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7383 const Teuchos::RCP<Teuchos::ParameterList>& params);
7384
7385#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7386 template <> \
7387 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7388 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7389 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7390 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7391 CrsGraph<LO, GO, NODE>::node_type>& rowImporter, \
7392 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7393 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7394 CrsGraph<LO, GO, NODE>::node_type>& domainImporter, \
7395 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7396 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7397 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7398 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7399 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7400 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7401 const Teuchos::RCP<Teuchos::ParameterList>& params);
7402
7403#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7404 template <> \
7405 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7406 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7407 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7408 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7409 CrsGraph<LO, GO, NODE>::node_type>& exporter, \
7410 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7411 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7412 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7413 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7414 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7415 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7416 const Teuchos::RCP<Teuchos::ParameterList>& params);
7417
7418#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7419 template <> \
7420 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7421 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7422 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7423 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7424 CrsGraph<LO, GO, NODE>::node_type>& rowExporter, \
7425 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7426 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7427 CrsGraph<LO, GO, NODE>::node_type>& domainExporter, \
7428 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7429 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7430 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7431 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7432 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7433 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7434 const Teuchos::RCP<Teuchos::ParameterList>& params);
7435
7436#define TPETRA_CRSGRAPH_INSTANT(LO, GO, NODE) \
7437 template class CrsGraph<LO, GO, NODE>; \
7438 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7439 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7440 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7441 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE)
7442
7443#endif // TPETRA_CRSGRAPH_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular,...
Functions for manipulating CRS arrays.
Declaration of a function that prints strings from each process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Declaration of Tpetra::Details::iallreduce.
Utility functions for packing and unpacking sparse matrix entries.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
Stand-alone utility functions and macros.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
local_inds_dualv_type::t_dev::const_type getLocalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
GlobalOrdinal global_ordinal_type
The type of the graph's global indices.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
std::pair< size_t, std::string > makeIndicesLocal(const bool verbose=false)
Convert column indices from global to local.
local_inds_device_view_type getLocalIndicesDevice() const
Get a device view of the packed column indicies.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void globalAssemble()
Communicate nonlocal contributions to other processes.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
global_inds_wdv_type gblInds_wdv
Global ordinals of column indices for all rows.
bool hasColMap() const override
Whether the graph has a column Map.
LocalOrdinal local_ordinal_type
The type of the graph's local indices.
std::string description() const override
Return a one-line human-readable description of this object.
bool isStorageOptimized() const
Returns true if storage has been optimized.
void getGlobalRowCopy(global_ordinal_type gblRow, nonconst_global_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, const Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given parameters.
void computeLocalConstants()
Compute local constants, if they have not yet been computed.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
typename local_graph_device_type::host_mirror_type local_graph_host_type
The type of the part of the sparse graph on each MPI process.
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
typename Node::device_type device_type
This class' Kokkos device type.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
global_inds_dualv_type::t_dev::const_type getGlobalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
KokkosSparse::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
row_ptrs_host_view_type getLocalRowPtrsHost() const
Get a host view of the packed row offsets.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
void setAllIndices(const typename local_graph_device_type::row_map_type &rowPointers, const typename local_graph_device_type::entries_type::non_const_type &columnIndices)
Set the graph's data directly, using 1-D storage.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
local_inds_host_view_type getLocalIndicesHost() const
Get a host view of the packed column indicies.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
void getLocalRowCopy(local_ordinal_type lclRow, nonconst_local_inds_host_view_type &lclColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
void getGlobalRowView(const global_ordinal_type gblRow, global_inds_host_view_type &gblColInds) const override
Get a const view of the given global row's global column indices.
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph's communicator.
void checkInternalState() const
Throw an exception if the internal state is not consistent.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
row_ptrs_device_view_type getLocalRowPtrsDevice() const
Get a device view of the packed row offsets.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
size_t getLocalNumEntries() const override
The local number of entries in the graph.
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
local_inds_wdv_type lclIndsPacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true Built during fillCo...
size_t getLocalNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets) const override
void getLocalOffRankOffsets(offset_device_view_type &offsets) const
Get offsets of the off-rank entries in the graph.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
typename row_graph_type::local_inds_device_view_type local_inds_device_view_type
The Kokkos::View type for views of local ordinals on device and host.
Kokkos::View< constsize_t *, device_type >::host_mirror_type k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
Node node_type
This class' Kokkos Node type.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
local_inds_dualv_type::t_host getLocalIndsViewHostNonConst(const RowInfo &rowinfo)
Get a ReadWrite locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(m...
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
void computeGlobalConstants()
Compute global constants, if they have not yet been computed.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
void setLocallyModified()
Report that we made a local modification to its structure.
size_t getLocalAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current Range Map with the given objects.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
void getLocalRowView(const LocalOrdinal lclRow, local_inds_host_view_type &lclColInds) const override
Get a const view of the given local row's local column indices.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
size_t getLocalNumRows() const override
Returns the number of graph rows owned on the calling node.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph's current column Map with the given Map.
Struct that holds views of the contents of a CrsMatrix.
Teuchos::RCP< const map_type > colMap
Col map for the original version of the matrix.
Teuchos::RCP< const map_type > domainMap
Domain map for original matrix.
Teuchos::RCP< const map_type > rowMap
Desired row map for "imported" version of the matrix.
Teuchos::RCP< const map_type > origRowMap
Original row map of matrix.
static bool useNewCopyAndPermute()
Use new implementation of copyAndPermute.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
"Local" part of Map suitable for Kokkos kernels.
Sets up and executes a communication plan for a Tpetra DistObject.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
An abstract interface for graphs accessed by rows.
Abstract base class for objects that can be the source of an Import or Export operation.
Implementation details of Tpetra.
int local_ordinal_type
Default value of Scalar template parameter.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const CrsGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a CrsGraph's global column indices into local column indices.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
CombineMode
Rule for combining data in an Import or Export.
@ INSERT
Insert new values that don't currently exist.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.