Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_CRSGRAPH_DEF_HPP
11#define TPETRA_CRSGRAPH_DEF_HPP
12
15
16#ifdef KOKKOS_ENABLE_SYCL
17#include <sycl/sycl.hpp>
18#endif
19
24#include "Tpetra_Details_getGraphDiagOffsets.hpp"
25#include "Tpetra_Details_getGraphOffRankOffsets.hpp"
26#include "Tpetra_Details_makeColMap.hpp"
30#include "Tpetra_Distributor.hpp"
31#include "Teuchos_SerialDenseMatrix.hpp"
32#include "Tpetra_Vector.hpp"
35#include "Tpetra_Details_packCrsGraph.hpp"
36#include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
37#include "Tpetra_Details_CrsPadding.hpp"
38#include "Tpetra_Util.hpp"
39#include <algorithm>
40#include <limits>
41#include <map>
42#include <sstream>
43#include <string>
44#include <type_traits>
45#include <utility>
46#include <vector>
47
48namespace Tpetra {
49namespace Details {
50namespace Impl {
51
52template <class MapIter>
53void verbosePrintMap(std::ostream& out,
54 MapIter beg,
55 MapIter end,
56 const size_t numEnt,
57 const char mapName[]) {
58 using ::Tpetra::Details::Behavior;
59 using ::Tpetra::Details::verbosePrintArray;
60
61 out << mapName << ": {";
62 const size_t maxNumToPrint =
64 if (maxNumToPrint == 0) {
65 if (numEnt != 0) {
66 out << "...";
67 }
68 } else {
69 const size_t numToPrint = numEnt > maxNumToPrint ? maxNumToPrint : numEnt;
70 size_t count = 0;
71 for (MapIter it = beg; it != end; ++it) {
72 out << "(" << (*it).first << ", ";
73 verbosePrintArray(out, (*it).second, "gblColInds",
74 maxNumToPrint);
75 out << ")";
76 if (count + size_t(1) < numToPrint) {
77 out << ", ";
78 }
79 ++count;
80 }
81 if (count < numEnt) {
82 out << ", ...";
83 }
84 }
85 out << "}";
86}
87
88template <class LO, class GO, class Node>
89Teuchos::ArrayView<GO>
90getRowGraphGlobalRow(
91 std::vector<GO>& gblColIndsStorage,
92 const RowGraph<LO, GO, Node>& graph,
93 const GO gblRowInd) {
94 size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
95 if (gblColIndsStorage.size() < origNumEnt) {
96 gblColIndsStorage.resize(origNumEnt);
97 }
98 typename CrsGraph<LO, GO, Node>::nonconst_global_inds_host_view_type gblColInds(gblColIndsStorage.data(),
99 origNumEnt);
100 graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
101 Teuchos::ArrayView<GO> retval(gblColIndsStorage.data(), origNumEnt);
102 return retval;
103}
104
105template <class LO, class GO, class DT, class OffsetType, class NumEntType>
106class ConvertColumnIndicesFromGlobalToLocal {
107 public:
108 ConvertColumnIndicesFromGlobalToLocal(const ::Kokkos::View<LO*, DT>& lclColInds,
109 const ::Kokkos::View<const GO*, DT>& gblColInds,
110 const ::Kokkos::View<const OffsetType*, DT>& ptr,
111 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
112 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
113 : lclColInds_(lclColInds)
114 , gblColInds_(gblColInds)
115 , ptr_(ptr)
116 , lclColMap_(lclColMap)
117 , numRowEnt_(numRowEnt) {}
118
119 KOKKOS_FUNCTION void
120 operator()(const LO& lclRow, OffsetType& curNumBad) const {
121 const OffsetType offset = ptr_(lclRow);
122 // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
123 // of entries in a row to LO, as long as the row doesn't have
124 // too many duplicate entries.
125 const LO numEnt = static_cast<LO>(numRowEnt_(lclRow));
126 for (LO j = 0; j < numEnt; ++j) {
127 const GO gid = gblColInds_(offset + j);
128 const LO lid = lclColMap_.getLocalElement(gid);
129 lclColInds_(offset + j) = lid;
130 if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid()) {
131 ++curNumBad;
132 }
133 }
134 }
135
136 static OffsetType
137 run(const ::Kokkos::View<LO*, DT>& lclColInds,
138 const ::Kokkos::View<const GO*, DT>& gblColInds,
139 const ::Kokkos::View<const OffsetType*, DT>& ptr,
140 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
141 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) {
142 typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
143 typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
144
145 const LO lclNumRows = ptr.extent(0) == 0 ? static_cast<LO>(0) : static_cast<LO>(ptr.extent(0) - 1);
146 OffsetType numBad = 0;
147 // Count of "bad" column indices is a reduction over rows.
148 ::Kokkos::parallel_reduce(range_type(0, lclNumRows),
149 functor_type(lclColInds, gblColInds, ptr,
150 lclColMap, numRowEnt),
151 numBad);
152 return numBad;
153 }
154
155 private:
156 ::Kokkos::View<LO*, DT> lclColInds_;
157 ::Kokkos::View<const GO*, DT> gblColInds_;
158 ::Kokkos::View<const OffsetType*, DT> ptr_;
160 ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
161};
162
163} // namespace Impl
164
179template <class LO, class GO, class DT, class OffsetType, class NumEntType>
180OffsetType
182 const Kokkos::View<const GO*, DT>& gblColInds,
183 const Kokkos::View<const OffsetType*, DT>& ptr,
185 const Kokkos::View<const NumEntType*, DT>& numRowEnt) {
186 using Impl::ConvertColumnIndicesFromGlobalToLocal;
188 return impl_type::run(lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
189}
190
191template <class ViewType, class LO>
192class MaxDifference {
193 public:
194 MaxDifference(const ViewType& ptr)
195 : ptr_(ptr) {}
196
197 KOKKOS_INLINE_FUNCTION void init(LO& dst) const {
198 dst = 0;
199 }
200
201 KOKKOS_INLINE_FUNCTION void
202 join(LO& dst, const LO& src) const {
203 dst = (src > dst) ? src : dst;
204 }
205
206 KOKKOS_INLINE_FUNCTION void
207 operator()(const LO lclRow, LO& maxNumEnt) const {
208 const LO numEnt = static_cast<LO>(ptr_(lclRow + 1) - ptr_(lclRow));
209 maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
210 }
211
212 private:
213 typename ViewType::const_type ptr_;
214};
215
216template <class ViewType, class LO>
217typename ViewType::non_const_value_type
218maxDifference(const char kernelLabel[],
219 const ViewType& ptr,
220 const LO lclNumRows) {
221 if (lclNumRows == 0) {
222 // mfh 07 May 2018: Weirdly, I need this special case,
223 // otherwise I get the wrong answer.
224 return static_cast<LO>(0);
225 } else {
226 using execution_space = typename ViewType::execution_space;
227 using range_type = Kokkos::RangePolicy<execution_space, LO>;
228 LO theMaxNumEnt{0};
229 Kokkos::parallel_reduce(kernelLabel,
230 range_type(0, lclNumRows),
231 MaxDifference<ViewType, LO>(ptr),
232 theMaxNumEnt);
233 return theMaxNumEnt;
234 }
235}
236
237} // namespace Details
238
239template <class LocalOrdinal, class GlobalOrdinal, class Node>
240bool CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
241 getDebug() {
242 return Details::Behavior::debug("CrsGraph");
243}
244
245template <class LocalOrdinal, class GlobalOrdinal, class Node>
246bool CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
247 getVerbose() {
248 return Details::Behavior::verbose("CrsGraph");
249}
250
251template <class LocalOrdinal, class GlobalOrdinal, class Node>
253 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
254 const size_t maxNumEntriesPerRow,
255 const Teuchos::RCP<Teuchos::ParameterList>& params)
256 : dist_object_type(rowMap)
257 , rowMap_(rowMap)
258 , numAllocForAllRows_(maxNumEntriesPerRow) {
259 const char tfecfFuncName[] =
260 "CrsGraph(rowMap,maxNumEntriesPerRow,params): ";
261 staticAssertions();
262 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid(),
263 std::invalid_argument,
264 "The allocation hint maxNumEntriesPerRow must be "
265 "a valid size_t value, which in this case means it must not be "
266 "Teuchos::OrdinalTraits<size_t>::invalid().");
269}
270
271template <class LocalOrdinal, class GlobalOrdinal, class Node>
273 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
274 const Teuchos::RCP<const map_type>& colMap,
275 const size_t maxNumEntriesPerRow,
276 const Teuchos::RCP<Teuchos::ParameterList>& params)
277 : dist_object_type(rowMap)
278 , rowMap_(rowMap)
279 , colMap_(colMap)
280 , numAllocForAllRows_(maxNumEntriesPerRow) {
281 const char tfecfFuncName[] =
282 "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,params): ";
283 staticAssertions();
285 maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid(),
286 std::invalid_argument,
287 "The allocation hint maxNumEntriesPerRow must be "
288 "a valid size_t value, which in this case means it must not be "
289 "Teuchos::OrdinalTraits<size_t>::invalid().");
292}
293
294template <class LocalOrdinal, class GlobalOrdinal, class Node>
296 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
297 const Teuchos::ArrayView<const size_t>& numEntPerRow,
298 const Teuchos::RCP<Teuchos::ParameterList>& params)
299 : dist_object_type(rowMap)
300 , rowMap_(rowMap)
301 , numAllocForAllRows_(0) {
302 const char tfecfFuncName[] =
303 "CrsGraph(rowMap,numEntPerRow,params): ";
304 staticAssertions();
305
306 const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
308 static_cast<size_t>(numEntPerRow.size()) != lclNumRows,
309 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size() << " != the local number of rows " << lclNumRows << " as specified by "
310 "the input row Map.");
311
312 if (debug_) {
313 for (size_t r = 0; r < lclNumRows; ++r) {
314 const size_t curRowCount = numEntPerRow[r];
315 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
316 std::invalid_argument, "numEntPerRow(" << r << ") "
317 "specifies an invalid number of entries "
318 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
319 }
320 }
321
322 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
323 // The latter is a const View, so we have to copy into a nonconst
324 // View first, then assign.
325 typedef decltype(k_numAllocPerRow_) out_view_type;
326 typedef typename out_view_type::non_const_type nc_view_type;
327 typedef Kokkos::View<const size_t*,
328 typename nc_view_type::array_layout,
329 Kokkos::HostSpace,
330 Kokkos::MemoryUnmanaged>
333 nc_view_type numAllocPerRowOut("Tpetra::CrsGraph::numAllocPerRow",
334 lclNumRows);
335 // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
336 using exec_space = typename nc_view_type::execution_space;
337 Kokkos::deep_copy(exec_space(), numAllocPerRowOut, numAllocPerRowIn);
339
342}
343
344template <class LocalOrdinal, class GlobalOrdinal, class Node>
346 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
347 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
348 const Teuchos::RCP<Teuchos::ParameterList>& params)
349 : dist_object_type(rowMap)
350 , rowMap_(rowMap)
351 , k_numAllocPerRow_(numEntPerRow.view_host())
352 , numAllocForAllRows_(0) {
353 const char tfecfFuncName[] =
354 "CrsGraph(rowMap,numEntPerRow,params): ";
355 staticAssertions();
356
357 const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
359 static_cast<size_t>(numEntPerRow.extent(0)) != lclNumRows,
360 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.extent(0) << " != the local number of rows " << lclNumRows << " as specified by "
361 "the input row Map.");
362
363 if (debug_) {
364 for (size_t r = 0; r < lclNumRows; ++r) {
365 const size_t curRowCount = numEntPerRow.view_host()(r);
366 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
367 std::invalid_argument, "numEntPerRow(" << r << ") "
368 "specifies an invalid number of entries "
369 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
370 }
371 }
372
375}
376
377template <class LocalOrdinal, class GlobalOrdinal, class Node>
379 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
380 const Teuchos::RCP<const map_type>& colMap,
381 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
382 const Teuchos::RCP<Teuchos::ParameterList>& params)
384 , rowMap_(rowMap)
385 , colMap_(colMap)
386 , k_numAllocPerRow_(numEntPerRow.view_host())
387 , numAllocForAllRows_(0) {
388 const char tfecfFuncName[] =
389 "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
390 staticAssertions();
391
392 const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
394 static_cast<size_t>(numEntPerRow.extent(0)) != lclNumRows,
395 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.extent(0) << " != the local number of rows " << lclNumRows << " as specified by "
396 "the input row Map.");
397
398 if (debug_) {
399 for (size_t r = 0; r < lclNumRows; ++r) {
400 const size_t curRowCount = numEntPerRow.view_host()(r);
401 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
402 std::invalid_argument, "numEntPerRow(" << r << ") "
403 "specifies an invalid number of entries "
404 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
405 }
406 }
407
408 resumeFill(params);
409 checkInternalState();
410}
411
412template <class LocalOrdinal, class GlobalOrdinal, class Node>
414 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
415 const Teuchos::RCP<const map_type>& colMap,
416 const Teuchos::ArrayView<const size_t>& numEntPerRow,
417 const Teuchos::RCP<Teuchos::ParameterList>& params)
418 : dist_object_type(rowMap)
419 , rowMap_(rowMap)
420 , colMap_(colMap)
421 , numAllocForAllRows_(0) {
422 const char tfecfFuncName[] =
423 "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
424 staticAssertions();
426 const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
428 static_cast<size_t>(numEntPerRow.size()) != lclNumRows,
429 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size() << " != the local number of rows " << lclNumRows << " as specified by "
430 "the input row Map.");
431
432 if (debug_) {
433 for (size_t r = 0; r < lclNumRows; ++r) {
434 const size_t curRowCount = numEntPerRow[r];
435 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
436 std::invalid_argument, "numEntPerRow(" << r << ") "
437 "specifies an invalid number of entries "
438 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
439 }
440 }
441
442 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
443 // The latter is a const View, so we have to copy into a nonconst
444 // View first, then assign.
445 typedef decltype(k_numAllocPerRow_) out_view_type;
446 typedef typename out_view_type::non_const_type nc_view_type;
447 typedef Kokkos::View<const size_t*,
448 typename nc_view_type::array_layout,
449 Kokkos::HostSpace,
450 Kokkos::MemoryUnmanaged>
451 in_view_type;
452 in_view_type numAllocPerRowIn(numEntPerRow.getRawPtr(), lclNumRows);
453 nc_view_type numAllocPerRowOut("Tpetra::CrsGraph::numAllocPerRow",
454 lclNumRows);
455 // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
456 using exec_space = typename nc_view_type::execution_space;
457 Kokkos::deep_copy(exec_space(), numAllocPerRowOut, numAllocPerRowIn);
459
462}
463
464template <class LocalOrdinal, class GlobalOrdinal, class Node>
467 const Teuchos::RCP<const map_type>& rowMap,
468 const Teuchos::RCP<Teuchos::ParameterList>& params)
469 : dist_object_type(rowMap)
470 , rowMap_(rowMap)
471 , colMap_(originalGraph.colMap_)
472 , numAllocForAllRows_(originalGraph.numAllocForAllRows_)
473 , storageStatus_(originalGraph.storageStatus_)
474 , indicesAreAllocated_(originalGraph.indicesAreAllocated_)
475 , indicesAreLocal_(originalGraph.indicesAreLocal_)
476 , indicesAreSorted_(originalGraph.indicesAreSorted_) {
477 staticAssertions();
478
479 int numRows = rowMap->getLocalNumElements();
480 size_t numNonZeros = originalGraph.getRowPtrsPackedHost()(numRows);
481 auto rowsToUse = Kokkos::pair<size_t, size_t>(0, numRows + 1);
482
483 this->setRowPtrsUnpacked(Kokkos::subview(originalGraph.getRowPtrsUnpackedDevice(), rowsToUse));
484 this->setRowPtrsPacked(Kokkos::subview(originalGraph.getRowPtrsPackedDevice(), rowsToUse));
485
486 if (indicesAreLocal_) {
489 } else {
490 gblInds_wdv = global_inds_wdv_type(originalGraph.gblInds_wdv, 0, numNonZeros);
491 }
492
494}
495
496template <class LocalOrdinal, class GlobalOrdinal, class Node>
498 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
499 const Teuchos::RCP<const map_type>& colMap,
500 const typename local_graph_device_type::row_map_type& rowPointers,
501 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
502 const Teuchos::RCP<Teuchos::ParameterList>& params)
503 : dist_object_type(rowMap)
504 , rowMap_(rowMap)
505 , colMap_(colMap)
506 , numAllocForAllRows_(0)
507 , storageStatus_(Details::STORAGE_1D_PACKED)
508 , indicesAreAllocated_(true)
509 , indicesAreLocal_(true) {
510 staticAssertions();
511 if (!params.is_null() && params->isParameter("sorted") &&
512 !params->get<bool>("sorted")) {
513 indicesAreSorted_ = false;
514 } else {
515 indicesAreSorted_ = true;
516 }
517 setAllIndices(rowPointers, columnIndices);
519}
520
521template <class LocalOrdinal, class GlobalOrdinal, class Node>
523 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
524 const Teuchos::RCP<const map_type>& colMap,
525 const Teuchos::ArrayRCP<size_t>& rowPointers,
526 const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices,
527 const Teuchos::RCP<Teuchos::ParameterList>& params)
528 : dist_object_type(rowMap)
529 , rowMap_(rowMap)
530 , colMap_(colMap)
531 , numAllocForAllRows_(0)
532 , storageStatus_(Details::STORAGE_1D_PACKED)
533 , indicesAreAllocated_(true)
534 , indicesAreLocal_(true) {
535 staticAssertions();
536 if (!params.is_null() && params->isParameter("sorted") &&
537 !params->get<bool>("sorted")) {
538 indicesAreSorted_ = false;
539 } else {
540 indicesAreSorted_ = true;
541 }
544}
545
546template <class LocalOrdinal, class GlobalOrdinal, class Node>
548 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
549 const Teuchos::RCP<const map_type>& colMap,
551 const Teuchos::RCP<Teuchos::ParameterList>& params)
553 rowMap,
554 colMap,
555 Teuchos::null,
556 Teuchos::null,
557 params) {}
558
559template <class LocalOrdinal, class GlobalOrdinal, class Node>
562 const Teuchos::RCP<const map_type>& rowMap,
563 const Teuchos::RCP<const map_type>& colMap,
564 const Teuchos::RCP<const map_type>& domainMap,
565 const Teuchos::RCP<const map_type>& rangeMap,
566 const Teuchos::RCP<Teuchos::ParameterList>& params)
568 , rowMap_(rowMap)
569 , colMap_(colMap)
570 , numAllocForAllRows_(0)
571 , storageStatus_(Details::STORAGE_1D_PACKED)
572 , indicesAreAllocated_(true)
573 , indicesAreLocal_(true) {
574 staticAssertions();
575 const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
576
578 colMap.is_null(), std::runtime_error,
579 ": The input column Map must be nonnull.");
581 k_local_graph_.numRows() != rowMap->getLocalNumElements(),
582 std::runtime_error,
583 ": The input row Map and the input local graph need to have the same "
584 "number of rows. The row Map claims "
585 << rowMap->getLocalNumElements()
586 << " row(s), but the local graph claims " << k_local_graph_.numRows()
587 << " row(s).");
588
589 // NOTE (mfh 17 Mar 2014) getLocalNumRows() returns
590 // rowMap_->getLocalNumElements(), but it doesn't have to.
591 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
592 // k_local_graph_.numRows () != getLocalNumRows (), std::runtime_error,
593 // ": The input row Map and the input local graph need to have the same "
594 // "number of rows. The row Map claims " << getLocalNumRows () << " row(s), "
595 // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
597 lclIndsUnpacked_wdv.extent(0) != 0 || gblInds_wdv.extent(0) != 0, std::logic_error,
598 ": cannot have 1D data structures allocated.");
599
600 if (!params.is_null() && params->isParameter("sorted") &&
601 !params->get<bool>("sorted")) {
602 indicesAreSorted_ = false;
603 } else {
604 indicesAreSorted_ = true;
605 }
606
607 setDomainRangeMaps(domainMap.is_null() ? rowMap_ : domainMap,
608 rangeMap.is_null() ? rowMap_ : rangeMap);
609 Teuchos::Array<int> remotePIDs(0); // unused output argument
610 this->makeImportExport(remotePIDs, false);
611
612 lclIndsPacked_wdv = local_inds_wdv_type(k_local_graph_.entries);
613 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
614 this->setRowPtrs(k_local_graph_.row_map);
615
616 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
617
618 const bool callComputeGlobalConstants = params.get() == nullptr ||
619 params->get("compute global constants", true);
620
622 this->computeGlobalConstants();
623 }
624 this->fillComplete_ = true;
625 this->checkInternalState();
626}
627
628template <class LocalOrdinal, class GlobalOrdinal, class Node>
631 const Teuchos::RCP<const map_type>& rowMap,
632 const Teuchos::RCP<const map_type>& colMap,
633 const Teuchos::RCP<const map_type>& domainMap,
634 const Teuchos::RCP<const map_type>& rangeMap,
635 const Teuchos::RCP<const import_type>& importer,
636 const Teuchos::RCP<const export_type>& exporter,
637 const Teuchos::RCP<Teuchos::ParameterList>& params)
639 , rowMap_(rowMap)
640 , colMap_(colMap)
641 , rangeMap_(rangeMap.is_null() ? rowMap : rangeMap)
642 , domainMap_(domainMap.is_null() ? rowMap : domainMap)
643 , importer_(importer)
644 , exporter_(exporter)
645 , numAllocForAllRows_(0)
646 , storageStatus_(Details::STORAGE_1D_PACKED)
647 , indicesAreAllocated_(true)
648 , indicesAreLocal_(true) {
649 staticAssertions();
650 const char tfecfFuncName[] =
651 "Tpetra::CrsGraph(local_graph_device_type,"
652 "Map,Map,Map,Map,Import,Export,params): ";
653
654 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(colMap.is_null(), std::runtime_error,
655 "The input column Map must be nonnull.");
656
659 setRowPtrs(lclGraph.row_map);
660
661 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
662
663 if (!params.is_null() && params->isParameter("sorted") &&
664 !params->get<bool>("sorted")) {
665 indicesAreSorted_ = false;
666 } else {
667 indicesAreSorted_ = true;
668 }
669
670 const bool callComputeGlobalConstants =
671 params.get() == nullptr ||
672 params->get("compute global constants", true);
673 if (callComputeGlobalConstants) {
675 }
676 fillComplete_ = true;
678}
679
680template <class LocalOrdinal, class GlobalOrdinal, class Node>
682 CrsGraph(const row_ptrs_device_view_type& rowPointers,
684 const Teuchos::RCP<const map_type>& rowMap,
685 const Teuchos::RCP<const map_type>& colMap,
686 const Teuchos::RCP<const map_type>& domainMap,
687 const Teuchos::RCP<const map_type>& rangeMap,
688 const Teuchos::RCP<const import_type>& importer,
689 const Teuchos::RCP<const export_type>& exporter,
690 const Teuchos::RCP<Teuchos::ParameterList>& params)
692 , rowMap_(rowMap)
693 , colMap_(colMap)
694 , rangeMap_(rangeMap.is_null() ? rowMap : rangeMap)
695 , domainMap_(domainMap.is_null() ? rowMap : domainMap)
696 , importer_(importer)
697 , exporter_(exporter)
698 , numAllocForAllRows_(0)
699 , storageStatus_(Details::STORAGE_1D_PACKED)
700 , indicesAreAllocated_(true)
701 , indicesAreLocal_(true) {
702 staticAssertions();
703 const char tfecfFuncName[] =
704 "Tpetra::CrsGraph(row_ptrs_device_view_type,local_inds_wdv_type"
705 "Map,Map,Map,Map,Import,Export,params): ";
706
707 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(colMap.is_null(), std::runtime_error,
708 "The input column Map must be nonnull.");
709
710 lclIndsPacked_wdv = columnIndices;
711 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
712 setRowPtrs(rowPointers);
713
714 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
715
716 if (!params.is_null() && params->isParameter("sorted") &&
717 !params->get<bool>("sorted")) {
718 indicesAreSorted_ = false;
719 } else {
720 indicesAreSorted_ = true;
722
723 const bool callComputeGlobalConstants =
724 params.get() == nullptr ||
725 params->get("compute global constants", true);
727 this->computeGlobalConstants();
728 }
729 fillComplete_ = true;
730 checkInternalState();
732
733template <class LocalOrdinal, class GlobalOrdinal, class Node>
734Teuchos::RCP<const Teuchos::ParameterList>
736 getValidParameters() const {
737 using Teuchos::ParameterList;
738 using Teuchos::parameterList;
739 using Teuchos::RCP;
740
741 RCP<ParameterList> params = parameterList("Tpetra::CrsGraph");
742
743 // Make a sublist for the Import.
746 // FIXME (mfh 02 Apr 2012) We should really have the Import and
747 // Export objects fill in these lists. However, we don't want to
748 // create an Import or Export unless we need them. For now, we
749 // know that the Import and Export just pass the list directly to
750 // their Distributor, so we can create a Distributor here
751 // (Distributor's constructor is a lightweight operation) and have
752 // it fill in the list.
753
754 // Fill in Distributor default parameters by creating a
755 // Distributor and asking it to do the work.
756 Distributor distributor(rowMap_->getComm(), importSublist);
757 params->set("Import", *importSublist, "How the Import performs communication.");
759 // Make a sublist for the Export. For now, it's a clone of the
760 // Import sublist. It's not a shallow copy, though, since we
761 // might like the Import to do communication differently than the
762 // Export.
763 params->set("Export", *importSublist, "How the Export performs communication.");
764
765 return params;
766}
767
768template <class LocalOrdinal, class GlobalOrdinal, class Node>
770 setParameterList(const Teuchos::RCP<Teuchos::ParameterList>& params) {
771 Teuchos::RCP<const Teuchos::ParameterList> validParams =
772 getValidParameters();
773 params->validateParametersAndSetDefaults(*validParams);
774 this->setMyParamList(params);
775}
776
777template <class LocalOrdinal, class GlobalOrdinal, class Node>
780 getGlobalNumRows() const {
781 return rowMap_->getGlobalNumElements();
782}
783
784template <class LocalOrdinal, class GlobalOrdinal, class Node>
787 getGlobalNumCols() const {
788 const char tfecfFuncName[] = "getGlobalNumCols: ";
790 !isFillComplete() || getDomainMap().is_null(), std::runtime_error,
791 "The graph does not have a domain Map. You may not call this method in "
792 "that case.");
793 return getDomainMap()->getGlobalNumElements();
794}
795
796template <class LocalOrdinal, class GlobalOrdinal, class Node>
797size_t
799 getLocalNumRows() const {
800 return this->rowMap_.is_null() ? static_cast<size_t>(0) : this->rowMap_->getLocalNumElements();
801}
802
803template <class LocalOrdinal, class GlobalOrdinal, class Node>
804size_t
806 getLocalNumCols() const {
807 const char tfecfFuncName[] = "getLocalNumCols: ";
809 !hasColMap(), std::runtime_error,
810 "The graph does not have a column Map. You may not call this method "
811 "unless the graph has a column Map. This requires either that a custom "
812 "column Map was given to the constructor, or that fillComplete() has "
813 "been called.");
814 return colMap_.is_null() ? static_cast<size_t>(0) : colMap_->getLocalNumElements();
815}
816
817template <class LocalOrdinal, class GlobalOrdinal, class Node>
818Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
820 getRowMap() const {
821 return rowMap_;
822}
823
824template <class LocalOrdinal, class GlobalOrdinal, class Node>
825Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
827 getColMap() const {
828 return colMap_;
829}
830
831template <class LocalOrdinal, class GlobalOrdinal, class Node>
832Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
834 getDomainMap() const {
835 return domainMap_;
836}
837
838template <class LocalOrdinal, class GlobalOrdinal, class Node>
839Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
841 getRangeMap() const {
842 return rangeMap_;
843}
844
845template <class LocalOrdinal, class GlobalOrdinal, class Node>
846Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
848 getImporter() const {
849 return importer_;
850}
851
852template <class LocalOrdinal, class GlobalOrdinal, class Node>
853Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
855 getExporter() const {
856 return exporter_;
857}
858
859template <class LocalOrdinal, class GlobalOrdinal, class Node>
861 hasColMap() const {
862 return !colMap_.is_null();
863}
864
865template <class LocalOrdinal, class GlobalOrdinal, class Node>
867 isStorageOptimized() const {
868 // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
869 // getLocalNumRows() is zero?
870
871 const bool isOpt = indicesAreAllocated_ &&
872 k_numRowEntries_.extent(0) == 0 &&
873 getLocalNumRows() > 0;
874
875 return isOpt;
876}
877
878template <class LocalOrdinal, class GlobalOrdinal, class Node>
881 getGlobalNumEntries() const {
882 const char tfecfFuncName[] = "getGlobalNumEntries: ";
883 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->haveGlobalConstants_, std::logic_error,
884 "The graph does not have global constants computed, "
885 "but the user has requested them.");
886
887 return globalNumEntries_;
888}
889
890template <class LocalOrdinal, class GlobalOrdinal, class Node>
891size_t
893 getLocalNumEntries() const {
894 const char tfecfFuncName[] = "getLocalNumEntries: ";
895 typedef LocalOrdinal LO;
897 Details::ProfilingRegion regionGLNE("Tpetra::CrsGraph::getLocalNumEntries");
898
899 if (this->indicesAreAllocated_) {
900 const LO lclNumRows = this->getLocalNumRows();
901 if (lclNumRows == 0) {
902 return static_cast<size_t>(0);
903 } else {
904 // Avoid the "*this capture" issue by creating a local Kokkos::View.
905 auto numEntPerRow = this->k_numRowEntries_;
906 const LO numNumEntPerRow = numEntPerRow.extent(0);
907 if (numNumEntPerRow == 0) {
908 if (static_cast<LO>(this->getRowPtrsPackedDevice().extent(0)) <
909 static_cast<LO>(lclNumRows + 1)) {
910 return static_cast<size_t>(0);
911 } else {
912 // indices are allocated and k_numRowEntries_ is not allocated,
913 // so we have packed storage and the length of lclIndsPacked_wdv
914 // must be the number of local entries.
915 if (debug_) {
916 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->getRowPtrsPackedHost()(lclNumRows) != lclIndsPacked_wdv.extent(0), std::logic_error,
917 "Final entry of packed host rowptrs doesn't match the length of lclIndsPacked");
918 }
919 return lclIndsPacked_wdv.extent(0);
920 }
921 } else { // k_numRowEntries_ is populated
922 // k_numRowEntries_ is actually be a host View, so we run
923 // the sum in its native execution space. This also means
924 // that we can use explicit capture (which could perhaps
925 // improve build time) instead of KOKKOS_LAMBDA, and avoid
926 // any CUDA build issues with trying to run a __device__ -
927 // only function on host.
928 typedef typename num_row_entries_type::execution_space
929 host_exec_space;
930 typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
931
933 size_t nodeNumEnt = 0;
934 Kokkos::parallel_reduce(
935 "Tpetra::CrsGraph::getNumNodeEntries",
936 range_type(0, upperLoopBound),
937 [=](const LO& k, size_t& lclSum) {
939 },
940 nodeNumEnt);
942 }
943 }
944 } else { // nothing allocated on this process, so no entries
945 return static_cast<size_t>(0);
946 }
947}
948
949template <class LocalOrdinal, class GlobalOrdinal, class Node>
953 const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
954 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->haveGlobalConstants_, std::logic_error,
955 "The graph does not have global constants computed, "
956 "but the user has requested them.");
958 return globalMaxNumRowEntries_;
959}
960
961template <class LocalOrdinal, class GlobalOrdinal, class Node>
962size_t
967
968template <class LocalOrdinal, class GlobalOrdinal, class Node>
970 isFillComplete() const {
971 return fillComplete_;
972}
973
974template <class LocalOrdinal, class GlobalOrdinal, class Node>
976 isFillActive() const {
977 return !fillComplete_;
978}
979
980template <class LocalOrdinal, class GlobalOrdinal, class Node>
982 isLocallyIndexed() const {
983 return indicesAreLocal_;
984}
985
986template <class LocalOrdinal, class GlobalOrdinal, class Node>
988 isGloballyIndexed() const {
989 return indicesAreGlobal_;
990}
991
992template <class LocalOrdinal, class GlobalOrdinal, class Node>
993size_t
996 typedef LocalOrdinal LO;
997
998 if (this->indicesAreAllocated_) {
999 const LO lclNumRows = this->getLocalNumRows();
1000 if (lclNumRows == 0) {
1001 return static_cast<size_t>(0);
1002 } else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
1003 if (static_cast<LO>(this->getRowPtrsPackedDevice().extent(0)) <
1004 static_cast<LO>(lclNumRows + 1)) {
1005 return static_cast<size_t>(0);
1006 } else {
1007 if (this->isLocallyIndexed())
1008 return lclIndsPacked_wdv.extent(0);
1009 else
1010 return gblInds_wdv.extent(0);
1011 }
1012 } else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1013 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1014 if (rowPtrsUnpacked_host.extent(0) == 0) {
1015 return static_cast<size_t>(0);
1016 } else {
1017 if (this->isLocallyIndexed())
1018 return lclIndsUnpacked_wdv.extent(0);
1019 else
1020 return gblInds_wdv.extent(0);
1021 }
1022 } else {
1023 return static_cast<size_t>(0);
1024 }
1025 } else {
1026 return Tpetra::Details::OrdinalTraits<size_t>::invalid();
1027 }
1028}
1029
1030template <class LocalOrdinal, class GlobalOrdinal, class Node>
1031Teuchos::RCP<const Teuchos::Comm<int>>
1033 getComm() const {
1034 return this->rowMap_.is_null() ? Teuchos::null : this->rowMap_->getComm();
1035}
1036
1037template <class LocalOrdinal, class GlobalOrdinal, class Node>
1040 getIndexBase() const {
1041 return rowMap_->getIndexBase();
1042}
1044template <class LocalOrdinal, class GlobalOrdinal, class Node>
1046 indicesAreAllocated() const {
1047 return indicesAreAllocated_;
1048}
1049
1050template <class LocalOrdinal, class GlobalOrdinal, class Node>
1052 isSorted() const {
1053 return indicesAreSorted_;
1054}
1055
1056template <class LocalOrdinal, class GlobalOrdinal, class Node>
1058 isMerged() const {
1059 return noRedundancies_;
1060}
1062template <class LocalOrdinal, class GlobalOrdinal, class Node>
1065 // FIXME (mfh 07 May 2013) How do we know that the change
1066 // introduced a redundancy, or even that it invalidated the sorted
1067 // order of indices? CrsGraph has always made this conservative
1068 // guess. It could be a bit costly to check at insertion time,
1069 // though.
1070 indicesAreSorted_ = false;
1071 noRedundancies_ = false;
1072
1073 // We've modified the graph, so we'll have to recompute local
1074 // constants like the number of diagonal entries on this process.
1075 haveLocalConstants_ = false;
1077
1078template <class LocalOrdinal, class GlobalOrdinal, class Node>
1080 allocateIndices(const ELocalGlobal lg, const bool verbose) {
1081 using std::endl;
1082 using Teuchos::arcp;
1083 using Teuchos::Array;
1084 using Teuchos::ArrayRCP;
1085 typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1086 typedef typename local_graph_device_type::row_map_type::non_const_type
1088 const char tfecfFuncName[] = "allocateIndices: ";
1089 const char suffix[] =
1090 " Please report this bug to the Tpetra developers.";
1091
1092 Details::ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1094 std::unique_ptr<std::string> prefix;
1095 if (verbose) {
1096 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1097 std::ostringstream os;
1098 os << *prefix << "Start: lg="
1099 << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1100 << ", numRows: " << this->getLocalNumRows() << endl;
1101 std::cerr << os.str();
1102 }
1103
1104 // This is a protected function, only callable by us. If it was
1105 // called incorrectly, it is our fault. That's why the tests
1106 // below throw std::logic_error instead of std::invalid_argument.
1107 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isLocallyIndexed() && lg == GlobalIndices, std::logic_error,
1108 ": The graph is locally indexed, but Tpetra code is calling "
1109 "this method with lg=GlobalIndices."
1110 << suffix);
1111 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isGloballyIndexed() && lg == LocalIndices, std::logic_error,
1112 ": The graph is globally indexed, but Tpetra code is calling "
1113 "this method with lg=LocalIndices."
1114 << suffix);
1115 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreAllocated(), std::logic_error,
1116 ": The graph's "
1117 "indices are already allocated, but Tpetra is calling "
1118 "allocateIndices again."
1119 << suffix);
1120 const size_t numRows = this->getLocalNumRows();
1121
1122 //
1123 // STATIC ALLOCATION PROFILE
1124 //
1125 size_type numInds = 0;
1127 if (verbose) {
1128 std::ostringstream os;
1129 os << *prefix << "Allocate k_rowPtrs: " << (numRows + 1) << endl;
1130 std::cerr << os.str();
1131 }
1132 non_const_row_map_type k_rowPtrs("Tpetra::CrsGraph::ptr", numRows + 1);
1133
1134 if (this->k_numAllocPerRow_.extent(0) != 0) {
1135 // It's OK to throw std::invalid_argument here, because we
1136 // haven't incurred any side effects yet. Throwing that
1137 // exception (and not, say, std::logic_error) implies that the
1138 // instance can recover.
1139 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->k_numAllocPerRow_.extent(0) != numRows,
1140 std::invalid_argument,
1141 "k_numAllocPerRow_ is allocated, that is, "
1142 "has nonzero length "
1143 << this->k_numAllocPerRow_.extent(0)
1144 << ", but its length != numRows = " << numRows << ".");
1145
1146 // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1147 // we want to compute here) lives on device. That's OK;
1148 // computeOffsetsFromCounts can handle this case.
1150
1151 // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1152 // doesn't attempt to check its input for "invalid" flag
1153 // values. For now, we omit that feature of the sequential
1154 // code disabled below.
1155 numInds = computeOffsetsFromCounts(k_rowPtrs, k_numAllocPerRow_);
1156 } else {
1157 // It's OK to throw std::invalid_argument here, because we
1158 // haven't incurred any side effects yet. Throwing that
1159 // exception (and not, say, std::logic_error) implies that the
1160 // instance can recover.
1161 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->numAllocForAllRows_ ==
1162 Tpetra::Details::OrdinalTraits<size_t>::invalid(),
1163 std::invalid_argument,
1164 "numAllocForAllRows_ has an invalid value, "
1165 "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = "
1166 << Tpetra::Details::OrdinalTraits<size_t>::invalid() << ".");
1167
1169 numInds = computeOffsetsFromConstantCount(k_rowPtrs, this->numAllocForAllRows_);
1171 // "Commit" the resulting row offsets.
1172 setRowPtrsUnpacked(k_rowPtrs);
1173 }
1174 if (debug_) {
1175 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numInds != size_type(this->getRowPtrsUnpackedHost()(numRows)), std::logic_error,
1176 ": Number of indices produced by computeOffsetsFrom[Constant]Counts "
1177 "does not match final entry of rowptrs unpacked");
1178 }
1179
1180 if (lg == LocalIndices) {
1181 if (verbose) {
1182 std::ostringstream os;
1183 os << *prefix << "Allocate local column indices "
1184 "lclIndsUnpacked_wdv: "
1185 << numInds << endl;
1186 std::cerr << os.str();
1187 }
1188 lclIndsUnpacked_wdv = local_inds_wdv_type(
1189 local_inds_dualv_type("Tpetra::CrsGraph::lclInd", numInds));
1190 } else {
1191 if (verbose) {
1192 std::ostringstream os;
1193 os << *prefix << "Allocate global column indices "
1194 "gblInds_wdv: "
1195 << numInds << endl;
1196 std::cerr << os.str();
1197 }
1198 gblInds_wdv = global_inds_wdv_type(
1199 global_inds_dualv_type("Tpetra::CrsGraph::gblInd", numInds));
1200 }
1201 storageStatus_ = Details::STORAGE_1D_UNPACKED;
1202
1203 this->indicesAreLocal_ = (lg == LocalIndices);
1204 this->indicesAreGlobal_ = (lg == GlobalIndices);
1205
1206 if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1207 using Kokkos::ViewAllocateWithoutInitializing;
1208 const char label[] = "Tpetra::CrsGraph::numRowEntries";
1209 if (verbose) {
1210 std::ostringstream os;
1211 os << *prefix << "Allocate k_numRowEntries_: " << numRows
1212 << endl;
1213 std::cerr << os.str();
1214 }
1215 num_row_entries_type numRowEnt(ViewAllocateWithoutInitializing(label), numRows);
1216 // DEEP_COPY REVIEW - VALUE-TO-HOSTMIRROR
1217 Kokkos::deep_copy(execution_space(), numRowEnt, static_cast<size_t>(0)); // fill w/ 0s
1218 Kokkos::fence("CrsGraph::allocateIndices"); // TODO: Need to understand downstream failure points and move this fence.
1219 this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1220 }
1221
1222 // Once indices are allocated, CrsGraph needs to free this information.
1223 this->numAllocForAllRows_ = 0;
1224 this->k_numAllocPerRow_ = decltype(k_numAllocPerRow_)();
1225 this->indicesAreAllocated_ = true;
1226
1227 try {
1228 this->checkInternalState();
1229 } catch (std::logic_error& e) {
1230 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
1231 "At end of allocateIndices, "
1232 "checkInternalState threw std::logic_error: "
1233 << e.what());
1234 } catch (std::exception& e) {
1235 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1236 "At end of allocateIndices, "
1237 "checkInternalState threw std::exception: "
1238 << e.what());
1239 } catch (...) {
1240 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1241 "At end of allocateIndices, "
1242 "checkInternalState threw an exception "
1243 "not a subclass of std::exception.");
1244 }
1245
1246 if (verbose) {
1247 std::ostringstream os;
1248 os << *prefix << "Done" << endl;
1249 std::cerr << os.str();
1250 }
1251}
1252
1253template <class LocalOrdinal, class GlobalOrdinal, class Node>
1254typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1255 local_inds_dualv_type::t_host::const_type
1258 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1259 return typename local_inds_dualv_type::t_host::const_type();
1260 else
1261 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1262 rowinfo.allocSize,
1263 Access::ReadOnly);
1264}
1265
1266template <class LocalOrdinal, class GlobalOrdinal, class Node>
1268 local_inds_dualv_type::t_host
1271 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1272 return typename local_inds_dualv_type::t_host();
1273 else
1274 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1275 rowinfo.allocSize,
1276 Access::ReadWrite);
1277}
1278
1279template <class LocalOrdinal, class GlobalOrdinal, class Node>
1281 global_inds_dualv_type::t_host::const_type
1284 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1285 return typename global_inds_dualv_type::t_host::const_type();
1286 else
1287 return gblInds_wdv.getHostSubview(rowinfo.offset1D,
1288 rowinfo.allocSize,
1289 Access::ReadOnly);
1290}
1291
1292template <class LocalOrdinal, class GlobalOrdinal, class Node>
1294 local_inds_dualv_type::t_dev::const_type
1297 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1298 return typename local_inds_dualv_type::t_dev::const_type();
1299 else
1300 return lclIndsUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
1301 rowinfo.allocSize,
1302 Access::ReadOnly);
1303}
1304
1305template <class LocalOrdinal, class GlobalOrdinal, class Node>
1307 global_inds_dualv_type::t_dev::const_type
1310 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1311 return typename global_inds_dualv_type::t_dev::const_type();
1312 else
1313 return gblInds_wdv.getDeviceSubview(rowinfo.offset1D,
1314 rowinfo.allocSize,
1315 Access::ReadOnly);
1316}
1317
1318template <class LocalOrdinal, class GlobalOrdinal, class Node>
1319RowInfo
1321 getRowInfo(const LocalOrdinal myRow) const {
1322 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
1323 RowInfo ret;
1324 if (this->rowMap_.is_null() || !this->rowMap_->isNodeLocalElement(myRow)) {
1325 ret.localRow = STINV;
1326 ret.allocSize = 0;
1327 ret.numEntries = 0;
1328 ret.offset1D = STINV;
1329 return ret;
1330 }
1331
1332 ret.localRow = static_cast<size_t>(myRow);
1333 if (this->indicesAreAllocated()) {
1334 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1335 // Offsets tell us the allocation size in this case.
1336 if (rowPtrsUnpacked_host.extent(0) == 0) {
1337 ret.offset1D = 0;
1338 ret.allocSize = 0;
1339 } else {
1340 ret.offset1D = rowPtrsUnpacked_host(myRow);
1342 }
1343
1344 ret.numEntries = (this->k_numRowEntries_.extent(0) == 0) ? ret.allocSize : this->k_numRowEntries_(myRow);
1345 } else { // haven't performed allocation yet; probably won't hit this code
1346 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1347 // allocate, rather than doing lazy allocation at first insert.
1348 // This will make k_numAllocPerRow_ obsolete.
1349 ret.allocSize = (this->k_numAllocPerRow_.extent(0) != 0) ? this->k_numAllocPerRow_(myRow) : // this is a host View
1350 this->numAllocForAllRows_;
1351 ret.numEntries = 0;
1352 ret.offset1D = STINV;
1353 }
1354
1355 return ret;
1356}
1357
1358template <class LocalOrdinal, class GlobalOrdinal, class Node>
1359RowInfo
1362 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
1363 RowInfo ret;
1364 if (this->rowMap_.is_null()) {
1365 ret.localRow = STINV;
1366 ret.allocSize = 0;
1367 ret.numEntries = 0;
1368 ret.offset1D = STINV;
1369 return ret;
1370 }
1371 const LocalOrdinal myRow = this->rowMap_->getLocalElement(gblRow);
1372 if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid()) {
1373 ret.localRow = STINV;
1374 ret.allocSize = 0;
1375 ret.numEntries = 0;
1376 ret.offset1D = STINV;
1377 return ret;
1378 }
1380 ret.localRow = static_cast<size_t>(myRow);
1381 if (this->indicesAreAllocated()) {
1382 // graph data structures have the info that we need
1383 //
1384 // if static graph, offsets tell us the allocation size
1385 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1386 if (rowPtrsUnpacked_host.extent(0) == 0) {
1387 ret.offset1D = 0;
1388 ret.allocSize = 0;
1389 } else {
1390 ret.offset1D = rowPtrsUnpacked_host(myRow);
1393
1394 ret.numEntries = (this->k_numRowEntries_.extent(0) == 0) ? ret.allocSize : this->k_numRowEntries_(myRow);
1395 } else { // haven't performed allocation yet; probably won't hit this code
1396 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1397 // allocate, rather than doing lazy allocation at first insert.
1398 // This will make k_numAllocPerRow_ obsolete.
1399 ret.allocSize = (this->k_numAllocPerRow_.extent(0) != 0) ? this->k_numAllocPerRow_(myRow) : // this is a host View
1400 this->numAllocForAllRows_;
1401 ret.numEntries = 0;
1402 ret.offset1D = STINV;
1403 }
1405 return ret;
1406}
1408template <class LocalOrdinal, class GlobalOrdinal, class Node>
1410 staticAssertions() const {
1411 using Teuchos::OrdinalTraits;
1412 typedef LocalOrdinal LO;
1413 typedef GlobalOrdinal GO;
1414 typedef global_size_t GST;
1415
1416 // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1417 // This is so that we can store local indices in the memory
1418 // formerly occupied by global indices.
1419 static_assert(sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal),
1420 "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1421 // Assumption: max(size_t) >= max(LocalOrdinal)
1422 // This is so that we can represent any LocalOrdinal as a size_t.
1423 static_assert(sizeof(size_t) >= sizeof(LocalOrdinal),
1424 "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1425 static_assert(sizeof(GST) >= sizeof(size_t),
1426 "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1428 // FIXME (mfh 30 Sep 2015) We're not using
1429 // Teuchos::CompileTimeAssert any more. Can we do these checks
1430 // with static_assert?
1431
1432 // can't call max() with CompileTimeAssert, because it isn't a
1433 // constant expression; will need to make this a runtime check
1434 const char msg[] =
1435 "Tpetra::CrsGraph: Object cannot be created with the "
1436 "given template arguments: size assumptions are not valid.";
1438 static_cast<size_t>(Teuchos::OrdinalTraits<LO>::max()) > Teuchos::OrdinalTraits<size_t>::max(),
1439 std::runtime_error, msg);
1441 static_cast<GST>(Teuchos::OrdinalTraits<LO>::max()) > static_cast<GST>(Teuchos::OrdinalTraits<GO>::max()),
1442 std::runtime_error, msg);
1444 static_cast<size_t>(Teuchos::OrdinalTraits<GO>::max()) > Teuchos::OrdinalTraits<GST>::max(),
1445 std::runtime_error, msg);
1447 Teuchos::OrdinalTraits<size_t>::max() > Teuchos::OrdinalTraits<GST>::max(),
1448 std::runtime_error, msg);
1450
1451template <class LocalOrdinal, class GlobalOrdinal, class Node>
1452size_t
1455 const SLocalGlobalViews& newInds,
1456 const ELocalGlobal lg,
1457 const ELocalGlobal I) {
1458 using Teuchos::ArrayView;
1459 typedef LocalOrdinal LO;
1460 typedef GlobalOrdinal GO;
1461 const char tfecfFuncName[] = "insertIndices: ";
1462
1463 size_t oldNumEnt = 0;
1464 if (debug_) {
1465 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1466 "lg must be either GlobalIndices or LocalIndices.");
1467 oldNumEnt = this->getNumEntriesInLocalRow(rowinfo.localRow);
1468 }
1469
1470 size_t numNewInds = 0;
1471 if (lg == GlobalIndices) { // input indices are global
1473 numNewInds = new_ginds.size();
1474 if (I == GlobalIndices) { // store global indices
1475 auto gind_view = gblInds_wdv.getHostView(Access::ReadWrite);
1476 if (debug_) {
1477 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(gind_view.size()) <
1478 rowinfo.numEntries + numNewInds,
1479 std::logic_error,
1480 "gind_view.size() = " << gind_view.size()
1481 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1482 << ") + numNewInds (= " << numNewInds << ").");
1483 }
1484 GO* const gblColInds_out = gind_view.data() + rowinfo.offset1D + rowinfo.numEntries;
1485 for (size_t k = 0; k < numNewInds; ++k) {
1487 }
1488 } else if (I == LocalIndices) { // store local indices
1489 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1490 if (debug_) {
1491 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(lind_view.size()) <
1492 rowinfo.numEntries + numNewInds,
1493 std::logic_error,
1494 "lind_view.size() = " << lind_view.size()
1495 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1496 << ") + numNewInds (= " << numNewInds << ").");
1497 }
1498 LO* const lclColInds_out = lind_view.data() + rowinfo.offset1D + rowinfo.numEntries;
1499 for (size_t k = 0; k < numNewInds; ++k) {
1500 lclColInds_out[k] = colMap_->getLocalElement(new_ginds[k]);
1501 }
1503 } else if (lg == LocalIndices) { // input indices are local
1504 ArrayView<const LO> new_linds = newInds.linds;
1505 numNewInds = new_linds.size();
1506 if (I == LocalIndices) { // store local indices
1507 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1508 if (debug_) {
1509 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(lind_view.size()) <
1510 rowinfo.numEntries + numNewInds,
1511 std::logic_error,
1512 "lind_view.size() = " << lind_view.size()
1513 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1514 << ") + numNewInds (= " << numNewInds << ").");
1515 }
1516 LO* const lclColInds_out = lind_view.data() + rowinfo.offset1D + rowinfo.numEntries;
1517 for (size_t k = 0; k < numNewInds; ++k) {
1518 lclColInds_out[k] = new_linds[k];
1519 }
1520 } else if (I == GlobalIndices) {
1521 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
1522 "The case where the input indices are local "
1523 "and the indices to write are global (lg=LocalIndices, I="
1524 "GlobalIndices) is not implemented, because it does not make sense."
1525 << std::endl
1526 << "If you have correct local column indices, that "
1527 "means the graph has a column Map. In that case, you should be "
1528 "storing local indices.");
1529 }
1530 }
1531
1532 rowinfo.numEntries += numNewInds;
1533 this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1534 this->setLocallyModified();
1535
1536 if (debug_) {
1537 const size_t chkNewNumEnt =
1538 this->getNumEntriesInLocalRow(rowinfo.localRow);
1540 "chkNewNumEnt = " << chkNewNumEnt
1541 << " != oldNumEnt (= " << oldNumEnt
1542 << ") + numNewInds (= " << numNewInds << ").");
1543 }
1544
1545 return numNewInds;
1546}
1547
1548template <class LocalOrdinal, class GlobalOrdinal, class Node>
1549size_t
1553 const size_t numInputInds) {
1554 return this->insertGlobalIndicesImpl(this->getRowInfo(lclRow),
1556}
1557
1558template <class LocalOrdinal, class GlobalOrdinal, class Node>
1559size_t
1563 const size_t numInputInds,
1564 std::function<void(const size_t, const size_t, const size_t)> fun) {
1566 using Kokkos::MemoryUnmanaged;
1567 using Kokkos::subview;
1568 using Kokkos::View;
1569 using Teuchos::ArrayView;
1570 using LO = LocalOrdinal;
1571 using GO = GlobalOrdinal;
1572
1573 const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1574 const LO lclRow = static_cast<LO>(rowInfo.localRow);
1575
1576 auto numEntries = rowInfo.numEntries;
1579 size_t numInserted;
1580 {
1581 auto gblIndsHostView = this->gblInds_wdv.getHostView(Access::ReadWrite);
1582 numInserted = Details::insertCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1584 numEntries, inputInds, fun);
1585 }
1586
1587 const bool insertFailed =
1588 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1589 if (insertFailed) {
1590 constexpr size_t ONE(1);
1591 const int myRank = this->getComm()->getRank();
1592 std::ostringstream os;
1593
1594 os << "Proc " << myRank << ": Not enough capacity to insert "
1595 << numInputInds
1596 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1597 << " into local row " << lclRow << ", which currently has "
1598 << rowInfo.numEntries
1599 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1600 << " and total allocation size " << rowInfo.allocSize
1601 << ". ";
1602 const size_t maxNumToPrint =
1605 numInputInds);
1606 verbosePrintArray(os, inputGblColIndsView,
1607 "Input global "
1608 "column indices",
1610 os << ", ";
1611 auto curGblColInds = getGlobalIndsViewHost(rowInfo);
1613 rowInfo.numEntries);
1614 verbosePrintArray(os, curGblColIndsView,
1615 "Current global "
1616 "column indices",
1618 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
1619 }
1620
1621 this->k_numRowEntries_(lclRow) += numInserted;
1622
1623 this->setLocallyModified();
1624 return numInserted;
1625}
1626
1627template <class LocalOrdinal, class GlobalOrdinal, class Node>
1630 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1631 std::function<void(const size_t, const size_t, const size_t)> fun) {
1632 using Kokkos::MemoryUnmanaged;
1633 using Kokkos::subview;
1634 using Kokkos::View;
1635 using LO = LocalOrdinal;
1636
1637 const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1638
1639 const RowInfo rowInfo = this->getRowInfo(myRow);
1640
1641 size_t numNewInds = 0;
1642 size_t newNumEntries = 0;
1643
1644 auto numEntries = rowInfo.numEntries;
1645 // Note: Teuchos::ArrayViews are in HostSpace
1647 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1648 size_t numInserted = 0;
1649 {
1650 auto lclInds = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1651 numInserted = Details::insertCrsIndices(myRow, this->getRowPtrsUnpackedHost(), lclInds,
1652 numEntries, inputInds, fun);
1653 }
1654
1655 const bool insertFailed =
1656 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1657 if (insertFailed) {
1658 constexpr size_t ONE(1);
1659 const size_t numInputInds(indices.size());
1660 const int myRank = this->getComm()->getRank();
1661 std::ostringstream os;
1662 os << "On MPI Process " << myRank << ": Not enough capacity to "
1663 "insert "
1664 << numInputInds
1665 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1666 << " into local row " << myRow << ", which currently has "
1667 << rowInfo.numEntries
1668 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1669 << " and total allocation size " << rowInfo.allocSize << ".";
1670 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
1671 }
1672 numNewInds = numInserted;
1674
1675 this->k_numRowEntries_(myRow) += numNewInds;
1676 this->setLocallyModified();
1677
1678 if (debug_) {
1679 const size_t chkNewNumEntries = this->getNumEntriesInLocalRow(myRow);
1681 "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1682 << " != newNumEntries = " << newNumEntries
1683 << ". Please report this bug to the Tpetra developers.");
1684 }
1685}
1686
1687template <class LocalOrdinal, class GlobalOrdinal, class Node>
1688size_t
1691 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1692 std::function<void(const size_t, const size_t, const size_t)> fun) const {
1693 using GO = GlobalOrdinal;
1694 using Kokkos::MemoryUnmanaged;
1695 using Kokkos::View;
1697 auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1698
1700 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1701
1702 size_t numFound = 0;
1703 LocalOrdinal lclRow = rowInfo.localRow;
1704 if (this->isLocallyIndexed()) {
1705 if (this->colMap_.is_null())
1706 return invalidCount;
1707 const auto& colMap = *(this->colMap_);
1708 auto map = [&](GO const gblInd) { return colMap.getLocalElement(gblInd); };
1709 if (this->isSorted()) {
1710 numFound = Details::findCrsIndicesSorted(
1711 lclRow,
1712 this->getRowPtrsUnpackedHost(),
1713 rowInfo.numEntries,
1714 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly),
1715 inputInds,
1716 map,
1717 fun);
1718 } else {
1719 numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1720 rowInfo.numEntries,
1721 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly), inputInds, map, fun);
1722 }
1723 } else if (this->isGloballyIndexed()) {
1724 numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1725 rowInfo.numEntries,
1726 gblInds_wdv.getHostView(Access::ReadOnly), inputInds, fun);
1727 }
1728 return numFound;
1729}
1730
1731template <class LocalOrdinal, class GlobalOrdinal, class Node>
1733 setDomainRangeMaps(const Teuchos::RCP<const map_type>& domainMap,
1734 const Teuchos::RCP<const map_type>& rangeMap) {
1735 // simple pointer comparison for equality
1736 if (domainMap_ != domainMap) {
1737 domainMap_ = domainMap;
1738 importer_ = Teuchos::null;
1739 }
1740 if (rangeMap_ != rangeMap) {
1741 rangeMap_ = rangeMap;
1742 exporter_ = Teuchos::null;
1743 }
1744}
1745
1746template <class LocalOrdinal, class GlobalOrdinal, class Node>
1749 const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
1750
1751 globalNumEntries_ = INV;
1752 globalMaxNumRowEntries_ = INV;
1753 haveGlobalConstants_ = false;
1754}
1755
1756template <class LocalOrdinal, class GlobalOrdinal, class Node>
1758 checkInternalState() const {
1759 if (debug_) {
1760 using std::endl;
1761 const char tfecfFuncName[] = "checkInternalState: ";
1762 const char suffix[] = " Please report this bug to the Tpetra developers.";
1763
1764 std::unique_ptr<std::string> prefix;
1765 if (verbose_) {
1766 prefix = this->createPrefix("CrsGraph", "checkInternalState");
1767 std::ostringstream os;
1768 os << *prefix << "Start" << endl;
1769 std::cerr << os.str();
1770 }
1771
1772 const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid();
1773 // const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
1774 // check the internal state of this data structure
1775 // this is called by numerous state-changing methods, in a debug build, to ensure that the object
1776 // always remains in a valid state
1777
1778 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->rowMap_.is_null(), std::logic_error,
1779 "Row Map is null." << suffix);
1780 // This may access the row Map, so we need to check first (above)
1781 // whether the row Map is null.
1782 const LocalOrdinal lclNumRows =
1783 static_cast<LocalOrdinal>(this->getLocalNumRows());
1784
1785 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillActive() == this->isFillComplete(), std::logic_error,
1786 "Graph cannot be both fill active and fill complete." << suffix);
1787 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillComplete() &&
1788 (this->colMap_.is_null() ||
1789 this->rangeMap_.is_null() ||
1790 this->domainMap_.is_null()),
1791 std::logic_error,
1792 "Graph is full complete, but at least one of {column, range, domain} "
1793 "Map is null."
1794 << suffix);
1795 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isStorageOptimized() && !this->indicesAreAllocated(),
1796 std::logic_error,
1797 "Storage is optimized, but indices are not "
1798 "allocated, not even trivially."
1799 << suffix);
1800
1801 size_t nodeAllocSize = 0;
1802 try {
1803 nodeAllocSize = this->getLocalAllocationSize();
1804 } catch (std::logic_error& e) {
1805 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1806 "getLocalAllocationSize threw "
1807 "std::logic_error: "
1808 << e.what());
1809 } catch (std::exception& e) {
1810 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1811 "getLocalAllocationSize threw an "
1812 "std::exception: "
1813 << e.what());
1814 } catch (...) {
1815 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1816 "getLocalAllocationSize threw an exception "
1817 "not a subclass of std::exception.");
1818 }
1819
1820 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isStorageOptimized() &&
1821 nodeAllocSize != this->getLocalNumEntries(),
1822 std::logic_error,
1823 "Storage is optimized, but "
1824 "this->getLocalAllocationSize() = "
1825 << nodeAllocSize
1826 << " != this->getLocalNumEntries() = " << this->getLocalNumEntries()
1827 << "." << suffix);
1828 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->haveGlobalConstants_ &&
1829 (this->globalNumEntries_ != GSTI ||
1830 this->globalMaxNumRowEntries_ != GSTI),
1831 std::logic_error,
1832 "Graph claims not to have global constants, but "
1833 "some of the global constants are not marked as invalid."
1834 << suffix);
1835 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->haveGlobalConstants_ &&
1836 (this->globalNumEntries_ == GSTI ||
1837 this->globalMaxNumRowEntries_ == GSTI),
1838 std::logic_error,
1839 "Graph claims to have global constants, but "
1840 "some of them are marked as invalid."
1841 << suffix);
1842 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->haveGlobalConstants_ &&
1843 (this->globalNumEntries_ < this->getLocalNumEntries() ||
1844 this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
1845 std::logic_error,
1846 "Graph claims to have global constants, and "
1847 "all of the values of the global constants are valid, but "
1848 "some of the local constants are greater than "
1849 "their corresponding global constants."
1850 << suffix);
1851 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->indicesAreAllocated() &&
1852 (this->numAllocForAllRows_ != 0 ||
1853 this->k_numAllocPerRow_.extent(0) != 0),
1854 std::logic_error,
1855 "The graph claims that its indices are allocated, but "
1856 "either numAllocForAllRows_ (= "
1857 << this->numAllocForAllRows_ << ") is "
1858 "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
1859 "the graph is supposed to release its \"allocation specifications\" "
1860 "when it allocates its indices."
1861 << suffix);
1862 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1863 auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
1865 std::logic_error,
1866 "The host and device views of k_rowPtrs_ have "
1867 "different sizes; rowPtrsUnpacked_host_ has size "
1868 << rowPtrsUnpacked_host.extent(0)
1869 << ", but rowPtrsUnpacked_dev_ has size "
1870 << rowPtrsUnpacked_dev.extent(0)
1871 << "." << suffix);
1872 if (isGloballyIndexed() && rowPtrsUnpacked_host.extent(0) != 0) {
1874 std::logic_error,
1875 "The graph is globally indexed and "
1876 "k_rowPtrs has nonzero size "
1877 << rowPtrsUnpacked_host.extent(0)
1878 << ", but that size does not equal lclNumRows+1 = "
1879 << (lclNumRows + 1) << "." << suffix);
1881 std::logic_error,
1882 "The graph is globally indexed and "
1883 "k_rowPtrs_ has nonzero size "
1884 << rowPtrsUnpacked_host.extent(0)
1885 << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
1887 << " != gblInds_wdv.extent(0)="
1888 << gblInds_wdv.extent(0) << "." << suffix);
1889 }
1890 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() &&
1891 rowPtrsUnpacked_host.extent(0) != 0 &&
1892 (static_cast<size_t>(rowPtrsUnpacked_host.extent(0)) !=
1893 static_cast<size_t>(lclNumRows + 1) ||
1895 static_cast<size_t>(this->lclIndsUnpacked_wdv.extent(0))),
1896 std::logic_error,
1897 "If k_rowPtrs_ has nonzero size and "
1898 "the graph is locally indexed, then "
1899 "k_rowPtrs_ must have N+1 rows, and "
1900 "k_rowPtrs_(N) must equal lclIndsUnpacked_wdv.extent(0)."
1901 << suffix);
1902
1903 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->indicesAreAllocated() &&
1904 nodeAllocSize > 0 &&
1905 this->lclIndsUnpacked_wdv.extent(0) == 0 &&
1906 this->gblInds_wdv.extent(0) == 0,
1907 std::logic_error,
1908 "Graph is allocated nontrivially, but "
1909 "but 1-D allocations are not present."
1910 << suffix);
1911
1912 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->indicesAreAllocated() &&
1913 ((rowPtrsUnpacked_host.extent(0) != 0 ||
1914 this->k_numRowEntries_.extent(0) != 0) ||
1915 this->lclIndsUnpacked_wdv.extent(0) != 0 ||
1916 this->gblInds_wdv.extent(0) != 0),
1917 std::logic_error,
1918 "If indices are not allocated, "
1919 "then none of the buffers should be."
1920 << suffix);
1921 // indices may be local or global only if they are allocated
1922 // (numAllocated is redundant; could simply be indicesAreLocal_ ||
1923 // indicesAreGlobal_)
1924 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
1925 !this->indicesAreAllocated_,
1926 std::logic_error,
1927 "Indices may be local or global only if they are "
1928 "allocated."
1929 << suffix);
1930 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->indicesAreLocal_ && this->indicesAreGlobal_,
1931 std::logic_error, "Indices may not be both local and global." << suffix);
1932 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreLocal_ && gblInds_wdv.extent(0) != 0,
1933 std::logic_error,
1934 "Indices are local, but "
1935 "gblInds_wdv.extent(0) (= "
1936 << gblInds_wdv.extent(0)
1937 << ") != 0. In other words, if indices are local, then "
1938 "allocations of global indices should not be present."
1939 << suffix);
1940 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreGlobal_ && lclIndsUnpacked_wdv.extent(0) != 0,
1941 std::logic_error,
1942 "Indices are global, but "
1943 "lclIndsUnpacked_wdv.extent(0) (= "
1944 << lclIndsUnpacked_wdv.extent(0)
1945 << ") != 0. In other words, if indices are global, "
1946 "then allocations for local indices should not be present."
1947 << suffix);
1949 lclIndsUnpacked_wdv.extent(0) == 0 && getLocalNumRows() > 0,
1950 std::logic_error,
1951 "Indices are local and "
1952 "getLocalAllocationSize() = "
1953 << nodeAllocSize << " > 0, but "
1954 "lclIndsUnpacked_wdv.extent(0) = 0 and getLocalNumRows() = "
1955 << getLocalNumRows() << " > 0." << suffix);
1956 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreGlobal_ && nodeAllocSize > 0 &&
1957 gblInds_wdv.extent(0) == 0 && getLocalNumRows() > 0,
1958 std::logic_error,
1959 "Indices are global and "
1960 "getLocalAllocationSize() = "
1961 << nodeAllocSize << " > 0, but "
1962 "gblInds_wdv.extent(0) = 0 and getLocalNumRows() = "
1963 << getLocalNumRows() << " > 0." << suffix);
1964 // check the actual allocations
1965 if (this->indicesAreAllocated() &&
1966 rowPtrsUnpacked_host.extent(0) != 0) {
1967 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(rowPtrsUnpacked_host.extent(0)) !=
1968 this->getLocalNumRows() + 1,
1969 std::logic_error,
1970 "Indices are allocated and "
1971 "k_rowPtrs_ has nonzero length, but rowPtrsUnpacked_host_.extent(0) = "
1972 << rowPtrsUnpacked_host.extent(0) << " != getLocalNumRows()+1 = "
1973 << (this->getLocalNumRows() + 1) << "." << suffix);
1974 const size_t actualNumAllocated =
1975 rowPtrsUnpacked_host(this->getLocalNumRows());
1976 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() &&
1977 static_cast<size_t>(this->lclIndsUnpacked_wdv.extent(0)) != actualNumAllocated,
1978 std::logic_error,
1979 "Graph is locally indexed, indices are "
1980 "are allocated, and k_rowPtrs_ has nonzero length, but "
1981 "lclIndsUnpacked_wdv.extent(0) = "
1982 << this->lclIndsUnpacked_wdv.extent(0)
1983 << " != actualNumAllocated = " << actualNumAllocated << suffix);
1984 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isGloballyIndexed() &&
1985 static_cast<size_t>(this->gblInds_wdv.extent(0)) != actualNumAllocated,
1986 std::logic_error,
1987 "Graph is globally indexed, indices "
1988 "are allocated, and k_rowPtrs_ has nonzero length, but "
1989 "gblInds_wdv.extent(0) = "
1990 << this->gblInds_wdv.extent(0)
1991 << " != actualNumAllocated = " << actualNumAllocated << suffix);
1992 }
1993
1994 if (verbose_) {
1995 std::ostringstream os;
1996 os << *prefix << "Done" << endl;
1997 std::cerr << os.str();
1998 }
1999 }
2001
2002template <class LocalOrdinal, class GlobalOrdinal, class Node>
2003size_t
2006 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(globalRow);
2007 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2008 return Teuchos::OrdinalTraits<size_t>::invalid();
2009 } else {
2010 return rowInfo.numEntries;
2011 }
2012}
2013
2014template <class LocalOrdinal, class GlobalOrdinal, class Node>
2015size_t
2017 getNumEntriesInLocalRow(LocalOrdinal localRow) const {
2018 const RowInfo rowInfo = this->getRowInfo(localRow);
2019 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2020 return Teuchos::OrdinalTraits<size_t>::invalid();
2021 } else {
2022 return rowInfo.numEntries;
2023 }
2024}
2025
2026template <class LocalOrdinal, class GlobalOrdinal, class Node>
2027size_t
2030 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(globalRow);
2031 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2032 return Teuchos::OrdinalTraits<size_t>::invalid();
2033 } else {
2034 return rowInfo.allocSize;
2035 }
2036}
2037
2038template <class LocalOrdinal, class GlobalOrdinal, class Node>
2039size_t
2042 const RowInfo rowInfo = this->getRowInfo(localRow);
2043 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2044 return Teuchos::OrdinalTraits<size_t>::invalid();
2045 } else {
2046 return rowInfo.allocSize;
2047 }
2048}
2049
2050template <class LocalOrdinal, class GlobalOrdinal, class Node>
2051typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type
2053 getLocalRowPtrsHost() const {
2054 return getRowPtrsPackedHost();
2056
2057template <class LocalOrdinal, class GlobalOrdinal, class Node>
2058typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_device_view_type
2060 getLocalRowPtrsDevice() const {
2061 return getRowPtrsPackedDevice();
2062}
2063
2064template <class LocalOrdinal, class GlobalOrdinal, class Node>
2065typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type
2067 getLocalIndicesHost() const {
2068 return lclIndsPacked_wdv.getHostView(Access::ReadOnly);
2070
2071template <class LocalOrdinal, class GlobalOrdinal, class Node>
2074 getLocalIndicesDevice() const {
2075 return lclIndsPacked_wdv.getDeviceView(Access::ReadOnly);
2076}
2077
2078template <class LocalOrdinal, class GlobalOrdinal, class Node>
2081 nonconst_local_inds_host_view_type& indices,
2082 size_t& numEntries) const {
2083 using Teuchos::ArrayView;
2084 const char tfecfFuncName[] = "getLocalRowCopy: ";
2085
2087 isGloballyIndexed() && !hasColMap(), std::runtime_error,
2088 "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2089 "does not have a column Map yet. That means we don't have local indices "
2090 "for columns yet, so it doesn't make sense to call this method. If the "
2091 "graph doesn't have a column Map yet, you should call fillComplete on "
2092 "it first.");
2093
2094 // This does the right thing (reports an empty row) if the input
2095 // row is invalid.
2096 const RowInfo rowinfo = this->getRowInfo(localRow);
2097 // No side effects on error.
2098 const size_t theNumEntries = rowinfo.numEntries;
2099 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) < theNumEntries, std::runtime_error,
2100 "Specified storage (size==" << indices.size() << ") does not suffice "
2101 "to hold all "
2102 << theNumEntries << " entry/ies for this row.");
2103 numEntries = theNumEntries;
2104
2105 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid()) {
2106 if (isLocallyIndexed()) {
2107 auto lclInds = getLocalIndsViewHost(rowinfo);
2108 for (size_t j = 0; j < theNumEntries; ++j) {
2109 indices[j] = lclInds(j);
2110 }
2111 } else if (isGloballyIndexed()) {
2112 auto gblInds = getGlobalIndsViewHost(rowinfo);
2113 for (size_t j = 0; j < theNumEntries; ++j) {
2114 indices[j] = colMap_->getLocalElement(gblInds(j));
2115 }
2116 }
2117 }
2118}
2119
2120template <class LocalOrdinal, class GlobalOrdinal, class Node>
2123 nonconst_global_inds_host_view_type& indices,
2124 size_t& numEntries) const {
2125 using Teuchos::ArrayView;
2126 const char tfecfFuncName[] = "getGlobalRowCopy: ";
2127
2128 // This does the right thing (reports an empty row) if the input
2129 // row is invalid.
2130 const RowInfo rowinfo = getRowInfoFromGlobalRowIndex(globalRow);
2131 const size_t theNumEntries = rowinfo.numEntries;
2133 static_cast<size_t>(indices.size()) < theNumEntries, std::runtime_error,
2134 "Specified storage (size==" << indices.size() << ") does not suffice "
2135 "to hold all "
2136 << theNumEntries << " entry/ies for this row.");
2137 numEntries = theNumEntries; // first side effect
2138
2139 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid()) {
2140 if (isLocallyIndexed()) {
2141 auto lclInds = getLocalIndsViewHost(rowinfo);
2142 bool err = colMap_->getGlobalElements(lclInds.data(), theNumEntries, indices.data());
2143 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(err, std::runtime_error, "getGlobalElements error");
2144 } else if (isGloballyIndexed()) {
2145 auto gblInds = getGlobalIndsViewHost(rowinfo);
2146 std::memcpy(
2147 (void*)indices.data(),
2148 (const void*)gblInds.data(),
2149 theNumEntries * sizeof(*indices.data()));
2150 }
2151 }
2152}
2153
2154template <class LocalOrdinal, class GlobalOrdinal, class Node>
2157 const LocalOrdinal localRow,
2158 local_inds_host_view_type& indices) const {
2159 const char tfecfFuncName[] = "getLocalRowView: ";
2160
2161 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isGloballyIndexed(), std::runtime_error,
2162 "The graph's indices are "
2163 "currently stored as global indices, so we cannot return a view with "
2164 "local column indices, whether or not the graph has a column Map. If "
2165 "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2166
2167 const RowInfo rowInfo = getRowInfo(localRow);
2168 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid() &&
2169 rowInfo.numEntries > 0) {
2170 indices = lclIndsUnpacked_wdv.getHostSubview(rowInfo.offset1D,
2171 rowInfo.numEntries,
2172 Access::ReadOnly);
2173 } else {
2174 // This does the right thing (reports an empty row) if the input
2175 // row is invalid.
2176 indices = local_inds_host_view_type();
2177 }
2178
2179 if (debug_) {
2180 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) !=
2181 getNumEntriesInLocalRow(localRow),
2182 std::logic_error,
2183 "indices.size() "
2184 "= " << indices.extent(0)
2185 << " != getNumEntriesInLocalRow(localRow=" << localRow << ") = " << getNumEntriesInLocalRow(localRow) << ". Please report this bug to the Tpetra developers.");
2186 }
2187}
2188
2189template <class LocalOrdinal, class GlobalOrdinal, class Node>
2193 global_inds_host_view_type& indices) const {
2194 const char tfecfFuncName[] = "getGlobalRowView: ";
2195
2196 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isLocallyIndexed(), std::runtime_error,
2197 "The graph's indices are "
2198 "currently stored as local indices, so we cannot return a view with "
2199 "global column indices. Use getGlobalRowCopy() instead.");
2200
2201 // This does the right thing (reports an empty row) if the input
2202 // row is invalid.
2203 const RowInfo rowInfo = getRowInfoFromGlobalRowIndex(globalRow);
2204 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid() &&
2205 rowInfo.numEntries > 0) {
2206 indices = gblInds_wdv.getHostSubview(rowInfo.offset1D,
2207 rowInfo.numEntries,
2208 Access::ReadOnly);
2209 } else {
2210 indices = typename global_inds_dualv_type::t_host::const_type();
2211 }
2212 if (debug_) {
2213 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) !=
2214 getNumEntriesInGlobalRow(globalRow),
2215 std::logic_error, "indices.size() = " << indices.extent(0) << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = " << getNumEntriesInGlobalRow(globalRow) << ". Please report this bug to the Tpetra developers.");
2216 }
2217}
2218
2219template <class LocalOrdinal, class GlobalOrdinal, class Node>
2221 insertLocalIndices(const LocalOrdinal localRow,
2222 const Teuchos::ArrayView<const LocalOrdinal>& indices) {
2223 const char tfecfFuncName[] = "insertLocalIndices: ";
2224
2225 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive(), std::runtime_error, "Fill must be active.");
2226 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isGloballyIndexed(), std::runtime_error,
2227 "Graph indices are global; use insertGlobalIndices().");
2228 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!hasColMap(), std::runtime_error,
2229 "Cannot insert local indices without a column Map.");
2230 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!rowMap_->isNodeLocalElement(localRow), std::runtime_error,
2231 "Local row index " << localRow << " is not in the row Map "
2232 "on the calling process.");
2233 if (!indicesAreAllocated()) {
2234 allocateIndices(LocalIndices, verbose_);
2235 }
2236
2237 if (debug_) {
2238 // In debug mode, if the graph has a column Map, test whether any
2239 // of the given column indices are not in the column Map. Keep
2240 // track of the invalid column indices so we can tell the user
2241 // about them.
2242 if (hasColMap()) {
2243 using std::endl;
2244 using Teuchos::Array;
2245 using Teuchos::toString;
2246 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2247
2248 const map_type& colMap = *colMap_;
2250 bool allInColMap = true;
2251 for (size_type k = 0; k < indices.size(); ++k) {
2252 if (!colMap.isNodeLocalElement(indices[k])) {
2253 allInColMap = false;
2254 badColInds.push_back(indices[k]);
2255 }
2256 }
2257 if (!allInColMap) {
2258 std::ostringstream os;
2259 os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2260 "entries in owned row "
2261 << localRow << ", at the following column "
2262 "indices: "
2263 << toString(indices) << "." << endl;
2264 os << "Of those, the following indices are not in the column Map on "
2265 "this process: "
2266 << toString(badColInds) << "." << endl
2267 << "Since "
2268 "the graph has a column Map already, it is invalid to insert entries "
2269 "at those locations.";
2270 TEUCHOS_TEST_FOR_EXCEPTION(!allInColMap, std::invalid_argument, os.str());
2271 }
2272 }
2273 }
2274
2275 insertLocalIndicesImpl(localRow, indices);
2276
2277 if (debug_) {
2278 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!indicesAreAllocated() || !isLocallyIndexed(), std::logic_error,
2279 "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2280 "! isLocallyIndexed() is true. Please report this bug to the "
2281 "Tpetra developers.");
2282 }
2283}
2284
2285template <class LocalOrdinal, class GlobalOrdinal, class Node>
2287 insertLocalIndices(const LocalOrdinal localRow,
2288 const LocalOrdinal numEnt,
2289 const LocalOrdinal inds[]) {
2290 Teuchos::ArrayView<const LocalOrdinal> indsT(inds, numEnt);
2291 this->insertLocalIndices(localRow, indsT);
2292}
2293
2294template <class LocalOrdinal, class GlobalOrdinal, class Node>
2299 typedef LocalOrdinal LO;
2300 const char tfecfFuncName[] = "insertGlobalIndices: ";
2302 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed(), std::runtime_error,
2303 "graph indices are local; use insertLocalIndices().");
2304 // This can't really be satisfied for now, because if we are
2305 // fillComplete(), then we are local. In the future, this may
2306 // change. However, the rule that modification require active
2307 // fill will not change.
2308 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->isFillActive(), std::runtime_error,
2309 "You are not allowed to call this method if fill is not active. "
2310 "If fillComplete has been called, you must first call resumeFill "
2311 "before you may insert indices.");
2312 if (!indicesAreAllocated()) {
2313 allocateIndices(GlobalIndices, verbose_);
2314 }
2315 const LO lclRow = this->rowMap_->getLocalElement(gblRow);
2316 if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid()) {
2317 if (debug_) {
2318 if (this->hasColMap()) {
2319 using std::endl;
2320 const map_type& colMap = *(this->colMap_);
2321 // In a debug build, keep track of the nonowned ("bad") column
2322 // indices, so that we can display them in the exception
2323 // message. In a release build, just ditch the loop early if
2324 // we encounter a nonowned column index.
2325 std::vector<GlobalOrdinal> badColInds;
2326 bool allInColMap = true;
2327 for (LO k = 0; k < numInputInds; ++k) {
2328 if (!colMap.isNodeGlobalElement(inputGblColInds[k])) {
2329 allInColMap = false;
2330 badColInds.push_back(inputGblColInds[k]);
2331 }
2332 }
2333 if (!allInColMap) {
2334 std::ostringstream os;
2335 os << "You attempted to insert entries in owned row " << gblRow
2336 << ", at the following column indices: [";
2337 for (LO k = 0; k < numInputInds; ++k) {
2338 os << inputGblColInds[k];
2339 if (k + static_cast<LO>(1) < numInputInds) {
2340 os << ",";
2341 }
2342 }
2343 os << "]." << endl
2344 << "Of those, the following indices are not in "
2345 "the column Map on this process: [";
2346 for (size_t k = 0; k < badColInds.size(); ++k) {
2347 os << badColInds[k];
2348 if (k + size_t(1) < badColInds.size()) {
2349 os << ",";
2350 }
2351 }
2352 os << "]." << endl
2353 << "Since the matrix has a column Map already, "
2354 "it is invalid to insert entries at those locations.";
2355 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::invalid_argument, os.str());
2356 }
2357 }
2358 } // debug_
2359 this->insertGlobalIndicesImpl(lclRow, inputGblColInds, numInputInds);
2360 } else { // a nonlocal row
2361 this->insertGlobalIndicesIntoNonownedRows(gblRow, inputGblColInds,
2362 numInputInds);
2363 }
2364}
2365
2366template <class LocalOrdinal, class GlobalOrdinal, class Node>
2369 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds) {
2370 this->insertGlobalIndices(gblRow, inputGblColInds.size(),
2371 inputGblColInds.getRawPtr());
2372}
2373
2374template <class LocalOrdinal, class GlobalOrdinal, class Node>
2377 const GlobalOrdinal gblColInds[],
2379 typedef LocalOrdinal LO;
2380 typedef GlobalOrdinal GO;
2381 const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2382
2383 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed(), std::runtime_error,
2384 "Graph indices are local; use insertLocalIndices().");
2385 // This can't really be satisfied for now, because if we are
2386 // fillComplete(), then we are local. In the future, this may
2387 // change. However, the rule that modification require active
2388 // fill will not change.
2389 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->isFillActive(), std::runtime_error,
2390 "You are not allowed to call this method if fill is not active. "
2391 "If fillComplete has been called, you must first call resumeFill "
2392 "before you may insert indices.");
2393 if (!indicesAreAllocated()) {
2394 allocateIndices(GlobalIndices, verbose_);
2395 }
2396
2397 Teuchos::ArrayView<const GO> gblColInds_av(gblColInds, numGblColInds);
2398 // If we have a column Map, use it to filter the entries.
2399 if (!colMap_.is_null()) {
2400 const map_type& colMap = *(this->colMap_);
2401
2402 LO curOffset = 0;
2403 while (curOffset < numGblColInds) {
2404 // Find a sequence of input indices that are in the column Map
2405 // on the calling process. Doing a sequence at a time,
2406 // instead of one at a time, amortizes some overhead.
2407 LO endOffset = curOffset;
2408 for (; endOffset < numGblColInds; ++endOffset) {
2409 const LO lclCol = colMap.getLocalElement(gblColInds[endOffset]);
2410 if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid()) {
2411 break; // first entry, in current sequence, not in the column Map
2412 }
2413 }
2414 // curOffset, endOffset: half-exclusive range of indices in
2415 // the column Map on the calling process. If endOffset ==
2416 // curOffset, the range is empty.
2417 const LO numIndInSeq = (endOffset - curOffset);
2418 if (numIndInSeq != 0) {
2419 this->insertGlobalIndicesImpl(lclRow, gblColInds + curOffset,
2420 numIndInSeq);
2421 }
2422 // Invariant before this line: Either endOffset ==
2423 // numGblColInds, or gblColInds[endOffset] is not in the
2424 // column Map on the calling process.
2425 curOffset = endOffset + 1;
2426 }
2427 } else {
2428 this->insertGlobalIndicesImpl(lclRow, gblColInds_av.getRawPtr(),
2429 gblColInds_av.size());
2430 }
2431}
2432
2433template <class LocalOrdinal, class GlobalOrdinal, class Node>
2436 const GlobalOrdinal gblColInds[],
2438 // This creates the std::vector if it doesn't exist yet.
2439 // std::map's operator[] does a lookup each time, so it's better
2440 // to pull nonlocals_[grow] out of the loop.
2441 std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2442 for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2443 // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2444 // order to avoid duplicates. globalAssemble() sorts these
2445 // anyway.
2446 nonlocalRow.push_back(gblColInds[k]);
2447 }
2448}
2449
2450template <class LocalOrdinal, class GlobalOrdinal, class Node>
2453 const char tfecfFuncName[] = "removeLocalIndices: ";
2455 !isFillActive(), std::runtime_error, "requires that fill is active.");
2457 isStorageOptimized(), std::runtime_error,
2458 "cannot remove indices after optimizeStorage() has been called.");
2460 isGloballyIndexed(), std::runtime_error, "graph indices are global.");
2462 !rowMap_->isNodeLocalElement(lrow), std::runtime_error,
2463 "Local row " << lrow << " is not in the row Map on the calling process.");
2464 if (!indicesAreAllocated()) {
2465 allocateIndices(LocalIndices, verbose_);
2466 }
2467
2468 if (k_numRowEntries_.extent(0) != 0) {
2469 this->k_numRowEntries_(lrow) = 0;
2470 }
2471
2472 if (debug_) {
2473 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(getNumEntriesInLocalRow(lrow) != 0 ||
2474 !indicesAreAllocated() ||
2475 !isLocallyIndexed(),
2476 std::logic_error,
2477 "Violated stated post-conditions. Please contact Tpetra team.");
2478 }
2479}
2480
2481template <class LocalOrdinal, class GlobalOrdinal, class Node>
2483 setAllIndices(const typename local_graph_device_type::row_map_type& rowPointers,
2484 const typename local_graph_device_type::entries_type::non_const_type& columnIndices) {
2485 using ProfilingRegion = Details::ProfilingRegion;
2486 ProfilingRegion region("Tpetra::CrsGraph::setAllIndices");
2487 const char tfecfFuncName[] = "setAllIndices: ";
2489 !hasColMap() || getColMap().is_null(), std::runtime_error,
2490 "The graph must have a column Map before you may call this method.");
2491 LocalOrdinal numLocalRows = this->getLocalNumRows();
2492 {
2494 if (numLocalRows == 0) {
2496 rowPtrLen != 0 && rowPtrLen != 1,
2497 std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2498 } else {
2500 rowPtrLen != numLocalRows + 1,
2501 std::runtime_error, "rowPointers.size() = " << rowPtrLen << " != this->getLocalNumRows()+1 = " << (numLocalRows + 1) << ".");
2502 }
2503 }
2504
2505 if (debug_) {
2506 using exec_space = typename local_graph_device_type::execution_space;
2507 int columnsOutOfBounds = 0;
2508 local_ordinal_type numLocalCols = this->getLocalNumCols();
2509 Kokkos::parallel_reduce(
2510 Kokkos::RangePolicy<exec_space>(0, columnIndices.extent(0)),
2512 if (columnIndices(i) < 0 || columnIndices(i) >= numLocalCols)
2513 lOutOfBounds++;
2514 },
2516 int globalColsOutOfBounds = 0;
2517 auto comm = this->getComm();
2518 Teuchos::reduceAll<int, int>(*comm, Teuchos::REDUCE_MAX, columnsOutOfBounds,
2519 Teuchos::outArg(globalColsOutOfBounds));
2521 std::string message;
2522 if (columnsOutOfBounds) {
2523 // Only print message from ranks with the problem
2524 message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2525 }
2526 Details::gathervPrint(std::cout, message, *comm);
2527 throw std::invalid_argument("CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2528 }
2529 }
2530
2531 if (debug_ && this->isSorted()) {
2532 // Verify that the local indices are actually sorted
2533 int notSorted = 0;
2534 using exec_space = typename local_graph_device_type::execution_space;
2535 using size_type = typename local_graph_device_type::size_type;
2536 Kokkos::parallel_reduce(
2537 Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2538 KOKKOS_LAMBDA(const LocalOrdinal i, int& lNotSorted) {
2539 size_type rowBegin = rowPointers(i);
2540 size_type rowEnd = rowPointers(i + 1);
2541 for (size_type j = rowBegin + 1; j < rowEnd; j++) {
2542 if (columnIndices(j - 1) > columnIndices(j)) {
2543 lNotSorted = 1;
2544 }
2545 }
2546 },
2547 notSorted);
2548 // All-reduce notSorted to avoid rank divergence
2549 int globalNotSorted = 0;
2550 auto comm = this->getComm();
2551 Teuchos::reduceAll<int, int>(*comm, Teuchos::REDUCE_MAX, notSorted,
2552 Teuchos::outArg(globalNotSorted));
2553 if (globalNotSorted) {
2554 std::string message;
2555 if (notSorted) {
2556 // Only print message from ranks with the problem
2557 message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2558 }
2559 Details::gathervPrint(std::cout, message, *comm);
2560 throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2561 }
2562 }
2563
2564 indicesAreAllocated_ = true;
2565 indicesAreLocal_ = true;
2566 indicesAreSorted_ = true;
2567 noRedundancies_ = true;
2568 lclIndsPacked_wdv = local_inds_wdv_type(columnIndices);
2569 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
2570 setRowPtrs(rowPointers);
2571
2572 set_need_sync_host_uvm_access(); // columnIndices and rowPointers potentially still in a kernel
2573
2574 // Storage MUST be packed, since the interface doesn't give any
2575 // way to indicate any extra space at the end of each row.
2576 storageStatus_ = Details::STORAGE_1D_PACKED;
2577
2578 // These normally get cleared out at the end of allocateIndices.
2579 // It makes sense to clear them out here, because at the end of
2580 // this method, the graph is allocated on the calling process.
2581 numAllocForAllRows_ = 0;
2582 k_numAllocPerRow_ = decltype(k_numAllocPerRow_)();
2583
2584 checkInternalState();
2585}
2586
2587template <class LocalOrdinal, class GlobalOrdinal, class Node>
2589 setAllIndices(const Teuchos::ArrayRCP<size_t>& rowPointers,
2590 const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices) {
2591 using Kokkos::View;
2592 typedef typename local_graph_device_type::row_map_type row_map_type;
2593 typedef typename row_map_type::array_layout layout_type;
2594 typedef typename row_map_type::non_const_value_type row_offset_type;
2595 typedef View<size_t*, layout_type, Kokkos::HostSpace,
2596 Kokkos::MemoryUnmanaged>
2597 input_view_type;
2598 typedef typename row_map_type::non_const_type nc_row_map_type;
2599
2600 const size_t size = static_cast<size_t>(rowPointers.size());
2601 constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2602 input_view_type ptr_in(rowPointers.getRawPtr(), size);
2603
2604 nc_row_map_type ptr_rot("Tpetra::CrsGraph::ptr", size);
2605
2606 if constexpr (same) { // size_t == row_offset_type
2607 using lexecution_space = typename device_type::execution_space;
2608 Kokkos::deep_copy(lexecution_space(),
2609 ptr_rot,
2610 ptr_in);
2611 } else { // size_t != row_offset_type
2612 // CudaUvmSpace != HostSpace, so this will be false in that case.
2613 constexpr bool inHostMemory =
2614 std::is_same<typename row_map_type::memory_space,
2615 Kokkos::HostSpace>::value;
2616 if (inHostMemory) {
2617 // Copy (with cast from size_t to row_offset_type, with bounds
2618 // checking if necessary) to ptr_rot.
2620 } else { // Copy input row offsets to device first.
2621 //
2622 // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
2623 // execution space would avoid the double copy.
2624 //
2625 View<size_t*, layout_type, device_type> ptr_st("Tpetra::CrsGraph::ptr", size);
2626
2627 // DEEP_COPY REVIEW - NOT TESTED
2628 Kokkos::deep_copy(ptr_st, ptr_in);
2629 // Copy on device (casting from size_t to row_offset_type,
2630 // with bounds checking if necessary) to ptr_rot. This
2631 // executes in the output View's execution space, which is the
2632 // same as execution_space.
2634 }
2635 }
2636
2637 Kokkos::View<LocalOrdinal*, layout_type, device_type> k_ind =
2638 Kokkos::Compat::getKokkosViewDeepCopy<device_type>(columnIndices());
2639 setAllIndices(ptr_rot, k_ind);
2640}
2641
2642template <class LocalOrdinal, class GlobalOrdinal, class Node>
2645 using std::endl;
2646 using Teuchos::Comm;
2647 using Teuchos::outArg;
2648 using Teuchos::RCP;
2649 using Teuchos::rcp;
2650 using Teuchos::REDUCE_MAX;
2651 using Teuchos::REDUCE_MIN;
2652 using Teuchos::reduceAll;
2653 using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
2654 using LO = local_ordinal_type;
2655 using GO = global_ordinal_type;
2656 using size_type = typename Teuchos::Array<GO>::size_type;
2657
2658 const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
2659
2660 Details::ProfilingRegion regionGA("Tpetra::CrsGraph::globalAssemble");
2661
2662 std::unique_ptr<std::string> prefix;
2663 if (verbose_) {
2664 prefix = this->createPrefix("CrsGraph", "globalAssemble");
2665 std::ostringstream os;
2666 os << *prefix << "Start" << endl;
2667 std::cerr << os.str();
2668 }
2669 RCP<const Comm<int>> comm = getComm();
2670
2671 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive(), std::runtime_error,
2672 "Fill must be active before "
2673 "you may call this method.");
2674
2675 const size_t myNumNonlocalRows = this->nonlocals_.size();
2676
2677 // If no processes have nonlocal rows, then we don't have to do
2678 // anything. Checking this is probably cheaper than constructing
2679 // the Map of nonlocal rows (see below) and noticing that it has
2680 // zero global entries.
2681 {
2682 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
2683 int someoneHasNonlocalRows = 0;
2686 if (someoneHasNonlocalRows == 0) {
2687 if (verbose_) {
2688 std::ostringstream os;
2689 os << *prefix << "Done: No nonlocal rows" << endl;
2690 std::cerr << os.str();
2691 }
2692 return;
2693 } else if (verbose_) {
2694 std::ostringstream os;
2695 os << *prefix << "At least 1 process has nonlocal rows"
2696 << endl;
2697 std::cerr << os.str();
2698 }
2699 }
2700
2701 // 1. Create a list of the "nonlocal" rows on each process. this
2702 // requires iterating over nonlocals_, so while we do this,
2703 // deduplicate the entries and get a count for each nonlocal
2704 // row on this process.
2705 // 2. Construct a new row Map corresponding to those rows. This
2706 // Map is likely overlapping. We know that the Map is not
2707 // empty on all processes, because the above all-reduce and
2708 // return exclude that case.
2709
2711 // Keep this for CrsGraph's constructor.
2712 Teuchos::Array<size_t> numEntPerNonlocalRow(myNumNonlocalRows);
2713 {
2714 Teuchos::Array<GO> myNonlocalGblRows(myNumNonlocalRows);
2715 size_type curPos = 0;
2716 for (auto mapIter = this->nonlocals_.begin();
2717 mapIter != this->nonlocals_.end();
2718 ++mapIter, ++curPos) {
2720 std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
2721 std::sort(gblCols.begin(), gblCols.end());
2722 auto vecLast = std::unique(gblCols.begin(), gblCols.end());
2723 gblCols.erase(vecLast, gblCols.end());
2725 }
2726
2727 // Currently, Map requires that its indexBase be the global min
2728 // of all its global indices. Map won't compute this for us, so
2729 // we must do it. If our process has no nonlocal rows, set the
2730 // "min" to the max possible GO value. This ensures that if
2731 // some process has at least one nonlocal row, then it will pick
2732 // that up as the min. We know that at least one process has a
2733 // nonlocal row, since the all-reduce and return at the top of
2734 // this method excluded that case.
2735 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max();
2736 {
2737 auto iter = std::min_element(myNonlocalGblRows.begin(),
2738 myNonlocalGblRows.end());
2739 if (iter != myNonlocalGblRows.end()) {
2741 }
2742 }
2743 GO gblMinNonlocalGblRow = 0;
2747 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
2749 }
2750
2751 if (verbose_) {
2752 std::ostringstream os;
2753 os << *prefix << "nonlocalRowMap->getIndexBase()="
2754 << nonlocalRowMap->getIndexBase() << endl;
2755 std::cerr << os.str();
2756 }
2757
2758 // 3. Use the column indices for each nonlocal row, as stored in
2759 // nonlocals_, to construct a CrsGraph corresponding to
2760 // nonlocal rows. We need, but we have, exact counts of the
2761 // number of entries in each nonlocal row.
2762
2764 rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow()));
2765 {
2766 size_type curPos = 0;
2767 for (auto mapIter = this->nonlocals_.begin();
2768 mapIter != this->nonlocals_.end();
2769 ++mapIter, ++curPos) {
2770 const GO gblRow = mapIter->first;
2771 std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
2772 const LO numEnt = static_cast<LO>(numEntPerNonlocalRow[curPos]);
2773 nonlocalGraph->insertGlobalIndices(gblRow, numEnt, gblCols.data());
2774 }
2775 }
2776 if (verbose_) {
2777 std::ostringstream os;
2778 os << *prefix << "Built nonlocal graph" << endl;
2779 std::cerr << os.str();
2780 }
2781 // There's no need to fill-complete the nonlocals graph.
2782 // We just use it as a temporary container for the Export.
2783
2784 // 4. If the original row Map is one to one, then we can Export
2785 // directly from nonlocalGraph into this. Otherwise, we have
2786 // to create a temporary graph with a one-to-one row Map,
2787 // Export into that, then Import from the temporary graph into
2788 // *this.
2789
2790 auto origRowMap = this->getRowMap();
2791 const bool origRowMapIsOneToOne = origRowMap->isOneToOne();
2792
2794 if (verbose_) {
2795 std::ostringstream os;
2796 os << *prefix << "Original row Map is 1-to-1" << endl;
2797 std::cerr << os.str();
2798 }
2800 this->doExport(*nonlocalGraph, exportToOrig, Tpetra::INSERT);
2801 // We're done at this point!
2802 } else {
2803 if (verbose_) {
2804 std::ostringstream os;
2805 os << *prefix << "Original row Map is NOT 1-to-1" << endl;
2806 std::cerr << os.str();
2807 }
2808 // If you ask a Map whether it is one to one, it does some
2809 // communication and stashes intermediate results for later use
2810 // by createOneToOne. Thus, calling createOneToOne doesn't cost
2811 // much more then the original cost of calling isOneToOne.
2814
2815 // Create a temporary graph with the one-to-one row Map.
2816 //
2817 // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
2818 // row, to avoid reallocation during the Export operation.
2819 crs_graph_type oneToOneGraph(oneToOneRowMap, 0);
2820
2821 // Export from graph of nonlocals into the temp one-to-one graph.
2822 if (verbose_) {
2823 std::ostringstream os;
2824 os << *prefix << "Export nonlocal graph" << endl;
2825 std::cerr << os.str();
2826 }
2828
2829 // We don't need the graph of nonlocals anymore, so get rid of
2830 // it, to keep the memory high-water mark down.
2831 nonlocalGraph = Teuchos::null;
2832
2833 // Import from the one-to-one graph to the original graph.
2835 if (verbose_) {
2836 std::ostringstream os;
2837 os << *prefix << "Import nonlocal graph" << endl;
2838 std::cerr << os.str();
2839 }
2840 this->doImport(oneToOneGraph, importToOrig, Tpetra::INSERT);
2841 }
2842
2843 // It's safe now to clear out nonlocals_, since we've already
2844 // committed side effects to *this. The standard idiom for
2845 // clearing a Container like std::map, is to swap it with an empty
2846 // Container and let the swapped Container fall out of scope.
2847 decltype(this->nonlocals_) newNonlocals;
2848 std::swap(this->nonlocals_, newNonlocals);
2849
2850 checkInternalState();
2851 if (verbose_) {
2852 std::ostringstream os;
2853 os << *prefix << "Done" << endl;
2854 std::cerr << os.str();
2855 }
2856}
2857
2858template <class LocalOrdinal, class GlobalOrdinal, class Node>
2860 resumeFill(const Teuchos::RCP<Teuchos::ParameterList>& params) {
2861 clearGlobalConstants();
2862 if (params != Teuchos::null) this->setParameterList(params);
2863 // either still sorted/merged or initially sorted/merged
2864 indicesAreSorted_ = true;
2865 noRedundancies_ = true;
2866 fillComplete_ = false;
2867}
2868
2869template <class LocalOrdinal, class GlobalOrdinal, class Node>
2871 fillComplete(const Teuchos::RCP<Teuchos::ParameterList>& params) {
2872 // If the graph already has domain and range Maps, don't clobber
2873 // them. If it doesn't, use the current row Map for both the
2874 // domain and range Maps.
2875 //
2876 // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
2877 // column Map, and column indices are inserted which are not in
2878 // the row Map on any process, this will cause troubles. However,
2879 // that is not a common case for most applications that we
2880 // encounter, and checking for it might require more
2881 // communication.
2882 Teuchos::RCP<const map_type> domMap = this->getDomainMap();
2883 if (domMap.is_null()) {
2884 domMap = this->getRowMap();
2885 }
2886 Teuchos::RCP<const map_type> ranMap = this->getRangeMap();
2887 if (ranMap.is_null()) {
2888 ranMap = this->getRowMap();
2889 }
2890 this->fillComplete(domMap, ranMap, params);
2891}
2892
2893template <class LocalOrdinal, class GlobalOrdinal, class Node>
2895 fillComplete(const Teuchos::RCP<const map_type>& domainMap,
2896 const Teuchos::RCP<const map_type>& rangeMap,
2897 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2898 using std::endl;
2899
2900 const char tfecfFuncName[] = "fillComplete: ";
2901 const bool verbose = verbose_;
2902
2903 Details::ProfilingRegion regionFC("Tpetra::CrsGraph::fillComplete");
2904
2905 std::unique_ptr<std::string> prefix;
2906 if (verbose) {
2907 prefix = this->createPrefix("CrsGraph", "fillComplete");
2908 std::ostringstream os;
2909 os << *prefix << "Start" << endl;
2910 std::cerr << os.str();
2911 }
2912
2913 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive() || isFillComplete(), std::runtime_error,
2914 "Graph fill state must be active (isFillActive() "
2915 "must be true) before calling fillComplete().");
2916
2917 const int numProcs = getComm()->getSize();
2918
2919 //
2920 // Read and set parameters
2921 //
2922
2923 // Does the caller want to sort remote GIDs (within those owned by
2924 // the same process) in makeColMap()?
2925 if (!params.is_null()) {
2926 if (params->isParameter("sort column map ghost gids")) {
2927 sortGhostsAssociatedWithEachProcessor_ =
2928 params->get<bool>("sort column map ghost gids",
2929 sortGhostsAssociatedWithEachProcessor_);
2930 } else if (params->isParameter("Sort column Map ghost GIDs")) {
2931 sortGhostsAssociatedWithEachProcessor_ =
2932 params->get<bool>("Sort column Map ghost GIDs",
2933 sortGhostsAssociatedWithEachProcessor_);
2934 }
2935 }
2936
2937 // If true, the caller promises that no process did nonlocal
2938 // changes since the last call to fillComplete.
2939 bool assertNoNonlocalInserts = false;
2940 if (!params.is_null()) {
2942 params->get<bool>("No Nonlocal Changes", assertNoNonlocalInserts);
2943 }
2944
2945 //
2946 // Allocate indices, if they haven't already been allocated
2947 //
2948 if (!indicesAreAllocated()) {
2949 if (hasColMap()) {
2950 // We have a column Map, so use local indices.
2951 allocateIndices(LocalIndices, verbose);
2952 } else {
2953 // We don't have a column Map, so use global indices.
2954 allocateIndices(GlobalIndices, verbose);
2955 }
2956 }
2957
2958 //
2959 // Do global assembly, if requested and if the communicator
2960 // contains more than one process.
2961 //
2964 // This first checks if we need to do global assembly.
2965 // The check costs a single all-reduce.
2966 globalAssemble();
2967 } else {
2968 const size_t numNonlocals = nonlocals_.size();
2969 if (verbose) {
2970 std::ostringstream os;
2971 os << *prefix << "Do not need to call globalAssemble; "
2972 "assertNoNonlocalInserts="
2973 << (assertNoNonlocalInserts ? "true" : "false")
2974 << "numProcs=" << numProcs
2975 << ", nonlocals_.size()=" << numNonlocals << endl;
2976 std::cerr << os.str();
2977 }
2978 const int lclNeededGlobalAssemble =
2979 (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
2980 if (lclNeededGlobalAssemble != 0 && verbose) {
2981 std::ostringstream os;
2982 os << *prefix;
2983 Details::Impl::verbosePrintMap(
2984 os, nonlocals_.begin(), nonlocals_.end(),
2985 nonlocals_.size(), "nonlocals_");
2986 std::cerr << os.str() << endl;
2987 }
2988
2989 if (debug_) {
2990 auto map = this->getMap();
2991 auto comm = map.is_null() ? Teuchos::null : map->getComm();
2993 if (!comm.is_null()) {
2994 using Teuchos::REDUCE_MAX;
2995 using Teuchos::reduceAll;
2997 Teuchos::outArg(gblNeededGlobalAssemble));
2998 }
3000 "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3001 "least one process in the CrsGraph's communicator. This "
3002 "means either that you incorrectly set the "
3003 "\"No Nonlocal Changes\" fillComplete parameter to true, "
3004 "or that you inserted invalid entries. "
3005 "Rerun with the environment variable TPETRA_VERBOSE="
3006 "CrsGraph set to see the entries of nonlocals_ on every "
3007 "MPI process (WARNING: lots of output).");
3008 } else {
3010 "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3011 "calling process. This means either that you incorrectly "
3012 "set the \"No Nonlocal Changes\" fillComplete parameter "
3013 "to true, or that you inserted invalid entries. "
3014 "Rerun with the environment "
3015 "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3016 "of nonlocals_ on every MPI process (WARNING: lots of "
3017 "output).");
3018 }
3019 }
3020
3021 // Set domain and range Map. This may clear the Import / Export
3022 // objects if the new Maps differ from any old ones.
3023 setDomainRangeMaps(domainMap, rangeMap);
3024
3025 // If the graph does not already have a column Map (either from
3026 // the user constructor calling the version of the constructor
3027 // that takes a column Map, or from a previous fillComplete call),
3028 // then create it.
3029 Teuchos::Array<int> remotePIDs(0);
3030 const bool mustBuildColMap = !this->hasColMap();
3031 if (mustBuildColMap) {
3032 this->makeColMap(remotePIDs); // resized on output
3033 }
3034
3035 // Make indices local, if they aren't already.
3036 // The method doesn't do any work if the indices are already local.
3037 const std::pair<size_t, std::string> makeIndicesLocalResult =
3038 this->makeIndicesLocal(verbose);
3039
3040 if (debug_) {
3042 using Teuchos::outArg;
3043 using Teuchos::RCP;
3044 using Teuchos::REDUCE_MIN;
3045 using Teuchos::reduceAll;
3046
3047 RCP<const map_type> map = this->getMap();
3049 if (!map.is_null()) {
3050 comm = map->getComm();
3051 }
3052 if (comm.is_null()) {
3054 makeIndicesLocalResult.second);
3055 } else {
3056 const int lclSuccess = (makeIndicesLocalResult.first == 0);
3057 int gblSuccess = 0; // output argument
3059 if (gblSuccess != 1) {
3060 std::ostringstream os;
3061 gathervPrint(os, makeIndicesLocalResult.second, *comm);
3062 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
3063 }
3064 }
3065 } else {
3066 // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3067 // the error state to makeImportExport or
3068 // computeGlobalConstants, which may do all-reduces and thus may
3069 // have the opportunity to communicate that error state.
3071 makeIndicesLocalResult.second);
3072 }
3073
3074 // If this process has no indices, then CrsGraph considers it
3075 // already trivially sorted and merged. Thus, this method need
3076 // not be called on all processes in the row Map's communicator.
3077 this->sortAndMergeAllIndices(this->isSorted(), this->isMerged());
3078
3079 // Make Import and Export objects, if they haven't been made
3080 // already. If we made a column Map above, reuse information from
3081 // that process to avoid communiation in the Import setup.
3082 this->makeImportExport(remotePIDs, mustBuildColMap);
3083
3084 // Create the KokkosSparse::StaticCrsGraph, if it doesn't already exist.
3085 this->fillLocalGraph(params);
3086
3087 const bool callComputeGlobalConstants = params.get() == nullptr ||
3088 params->get("compute global constants", true);
3090 this->computeGlobalConstants();
3091 } else {
3092 this->computeLocalConstants();
3093 }
3094 this->fillComplete_ = true;
3095 this->checkInternalState();
3096
3097 if (verbose) {
3098 std::ostringstream os;
3099 os << *prefix << "Done" << endl;
3100 std::cerr << os.str();
3101 }
3102}
3103
3104template <class LocalOrdinal, class GlobalOrdinal, class Node>
3106 expertStaticFillComplete(const Teuchos::RCP<const map_type>& domainMap,
3107 const Teuchos::RCP<const map_type>& rangeMap,
3108 const Teuchos::RCP<const import_type>& importer,
3109 const Teuchos::RCP<const export_type>& exporter,
3110 const Teuchos::RCP<Teuchos::ParameterList>& params) {
3111 const char tfecfFuncName[] = "expertStaticFillComplete: ";
3112 auto MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra ESFC-G-Setup"));
3113
3115 domainMap.is_null() || rangeMap.is_null(),
3116 std::runtime_error, "The input domain Map and range Map must be nonnull.");
3118 isFillComplete() || !hasColMap(), std::runtime_error,
3119 "You may not "
3120 "call this method unless the graph has a column Map.");
3121 auto rowPtrsUnpackedLength = this->getRowPtrsUnpackedDevice().extent(0);
3123 getLocalNumRows() > 0 && rowPtrsUnpackedLength == 0,
3124 std::runtime_error, "The calling process has getLocalNumRows() = " << getLocalNumRows() << " > 0 rows, but the row offsets array has not "
3125 "been set.");
3127 static_cast<size_t>(rowPtrsUnpackedLength) != getLocalNumRows() + 1,
3128 std::runtime_error, "The row offsets array has length " << rowPtrsUnpackedLength << " != getLocalNumRows()+1 = " << (getLocalNumRows() + 1) << ".");
3129
3130 // Note: We don't need to do the following things which are normally done in fillComplete:
3131 // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3132
3133 // Constants from allocateIndices
3134 //
3135 // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3136 // away once the graph is allocated. expertStaticFillComplete
3137 // either presumes that the graph is allocated, or "allocates" it.
3138 //
3139 // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3140 // version of CrsGraph is to allocate in the constructor, not
3141 // lazily on first insert. That will make both
3142 // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3143 numAllocForAllRows_ = 0;
3144 k_numAllocPerRow_ = decltype(k_numAllocPerRow_)();
3145 indicesAreAllocated_ = true;
3146
3147 // Constants from makeIndicesLocal
3148 //
3149 // The graph has a column Map, so its indices had better be local.
3150 indicesAreLocal_ = true;
3151 indicesAreGlobal_ = false;
3152
3153 // set domain/range map: may clear the import/export objects
3154 MM = Teuchos::null;
3155 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra ESFC-G-Maps"));
3156 setDomainRangeMaps(domainMap, rangeMap);
3157
3158 // Presume the user sorted and merged the arrays first
3159 indicesAreSorted_ = true;
3160 noRedundancies_ = true;
3161
3162 // makeImportExport won't create a new importer/exporter if I set one here first.
3163 MM = Teuchos::null;
3164 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra ESFC-G-mIXcheckI"));
3165
3166 importer_ = Teuchos::null;
3167 exporter_ = Teuchos::null;
3168 if (importer != Teuchos::null) {
3170 !importer->getSourceMap()->isSameAs(*getDomainMap()) ||
3171 !importer->getTargetMap()->isSameAs(*getColMap()),
3172 std::invalid_argument, ": importer does not match matrix maps.");
3173 importer_ = importer;
3174 }
3175
3176 MM = Teuchos::null;
3177 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra ESFC-G-mIXcheckE"));
3178
3179 if (exporter != Teuchos::null) {
3181 !exporter->getSourceMap()->isSameAs(*getRowMap()) ||
3182 !exporter->getTargetMap()->isSameAs(*getRangeMap()),
3183 std::invalid_argument, ": exporter does not match matrix maps.");
3184 exporter_ = exporter;
3185 }
3186
3187 MM = Teuchos::null;
3188 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra ESFC-G-mIXmake"));
3189 Teuchos::Array<int> remotePIDs(0); // unused output argument
3190 this->makeImportExport(remotePIDs, false);
3191
3192 MM = Teuchos::null;
3193 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra ESFC-G-fLG"));
3194 this->fillLocalGraph(params);
3195
3196 const bool callComputeGlobalConstants = params.get() == nullptr ||
3197 params->get("compute global constants", true);
3198
3200 MM = Teuchos::null;
3201 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra ESFC-G-cGC (const)"));
3202 this->computeGlobalConstants();
3203 } else {
3204 MM = Teuchos::null;
3205 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra ESFC-G-cGC (noconst)"));
3206 this->computeLocalConstants();
3207 }
3208
3209 fillComplete_ = true;
3210
3211 MM = Teuchos::null;
3212 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra ESFC-G-cIS"));
3213 checkInternalState();
3214}
3215
3216template <class LocalOrdinal, class GlobalOrdinal, class Node>
3218 fillLocalGraph(const Teuchos::RCP<Teuchos::ParameterList>& params) {
3219 using ::Tpetra::Details::computeOffsetsFromCounts;
3220 typedef typename local_graph_device_type::row_map_type row_map_type;
3221 typedef typename row_map_type::non_const_type non_const_row_map_type;
3222 typedef typename local_graph_device_type::entries_type::non_const_type lclinds_1d_type;
3223 const char tfecfFuncName[] =
3224 "fillLocalGraph (called from fillComplete or "
3225 "expertStaticFillComplete): ";
3226 const size_t lclNumRows = this->getLocalNumRows();
3227
3228 Details::ProfilingRegion regionFLG("Tpetra::CrsGraph::fillLocalGraph");
3229
3230 // This method's goal is to fill in the two arrays (compressed
3231 // sparse row format) that define the sparse graph's structure.
3232
3233 bool requestOptimizedStorage = true;
3234 if (!params.is_null() && !params->get("Optimize Storage", true)) {
3236 }
3237
3238 // The graph's column indices are currently stored in a 1-D
3239 // format, with row offsets in rowPtrsUnpacked_host_ and local column indices
3240 // in k_lclInds1D_.
3241
3242 if (debug_) {
3243 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3244 // The graph's array of row offsets must already be allocated.
3245 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked.extent(0) == 0, std::logic_error,
3246 "rowPtrsUnpacked_host_ has size zero, but shouldn't");
3247 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked.extent(0) != lclNumRows + 1, std::logic_error,
3248 "rowPtrsUnpacked_host_.extent(0) = "
3249 << rowPtrsUnpacked.extent(0) << " != (lclNumRows + 1) = "
3250 << (lclNumRows + 1) << ".");
3251 const size_t numOffsets = rowPtrsUnpacked.extent(0);
3252 const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3253 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numOffsets != 0 &&
3254 lclIndsUnpacked_wdv.extent(0) != valToCheck,
3255 std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3256 " and lclIndsUnpacked_wdv.extent(0)="
3257 << lclIndsUnpacked_wdv.extent(0) << " != rowPtrsUnpacked_host_(" << numOffsets << ")=" << valToCheck << ".");
3258 }
3259
3260 size_t allocSize = 0;
3261 try {
3262 allocSize = this->getLocalAllocationSize();
3263 } catch (std::logic_error& e) {
3264 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
3265 "getLocalAllocationSize threw "
3266 "std::logic_error: "
3267 << e.what());
3268 } catch (std::runtime_error& e) {
3269 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
3270 "getLocalAllocationSize threw "
3271 "std::runtime_error: "
3272 << e.what());
3273 } catch (std::exception& e) {
3274 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
3275 "getLocalAllocationSize threw "
3276 "std::exception: "
3277 << e.what());
3278 } catch (...) {
3279 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
3280 "getLocalAllocationSize threw "
3281 "an exception not a subclass of std::exception.");
3282 }
3283
3284 if (this->getLocalNumEntries() != allocSize) {
3285 // Use the nonconst version of row_map_type for ptr_d, because
3286 // the latter is const and we need to modify ptr_d here.
3287 non_const_row_map_type ptr_d;
3288 row_map_type ptr_d_const;
3289
3290 // The graph's current 1-D storage is "unpacked." This means
3291 // the row offsets may differ from what the final row offsets
3292 // should be. This could happen, for example, if the user set
3293 // an upper bound on the number of entries in each row, but
3294 // didn't fill all those entries.
3295
3296 if (debug_) {
3297 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3298 if (rowPtrsUnpacked.extent(0) != 0) {
3299 const size_t numOffsets =
3300 static_cast<size_t>(rowPtrsUnpacked.extent(0));
3301 const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3302 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != size_t(lclIndsUnpacked_wdv.extent(0)),
3303 std::logic_error,
3304 "(Unpacked branch) Before allocating "
3305 "or packing, k_rowPtrs_("
3306 << (numOffsets - 1) << ")="
3307 << valToCheck << " != lclIndsUnpacked_wdv.extent(0)="
3308 << lclIndsUnpacked_wdv.extent(0) << ".");
3309 }
3310 }
3311
3312 // Pack the row offsets into ptr_d, by doing a sum-scan of the
3313 // array of valid entry counts per row (k_numRowEntries_).
3314
3315 // Total number of entries in the matrix on the calling
3316 // process. We will compute this in the loop below. It's
3317 // cheap to compute and useful as a sanity check.
3318 size_t lclTotalNumEntries = 0;
3319 {
3320 // Allocate the packed row offsets array.
3321 ptr_d =
3322 non_const_row_map_type("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3323 ptr_d_const = ptr_d;
3324
3325 // It's ok that k_numRowEntries_ is a host View; the
3326 // function can handle this.
3327 typename num_row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3328 if (debug_) {
3329 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(size_t(numRowEnt_h.extent(0)) != lclNumRows,
3330 std::logic_error,
3331 "(Unpacked branch) "
3332 "numRowEnt_h.extent(0)="
3333 << numRowEnt_h.extent(0)
3334 << " != getLocalNumRows()=" << lclNumRows << "");
3335 }
3336
3337 lclTotalNumEntries = computeOffsetsFromCounts(ptr_d, numRowEnt_h);
3338
3339 if (debug_) {
3340 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(ptr_d.extent(0)) != lclNumRows + 1,
3341 std::logic_error,
3342 "(Unpacked branch) After allocating "
3343 "ptr_d, ptr_d.extent(0) = "
3344 << ptr_d.extent(0)
3345 << " != lclNumRows+1 = " << (lclNumRows + 1) << ".");
3346 const auto valToCheck =
3347 ::Tpetra::Details::getEntryOnHost(ptr_d, lclNumRows);
3348 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != lclTotalNumEntries, std::logic_error,
3349 "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3350 "after filling ptr_d, ptr_d(lclNumRows="
3351 << lclNumRows
3352 << ") = " << valToCheck << " != total number of entries "
3353 "on the calling process = "
3354 << lclTotalNumEntries
3355 << ".");
3356 }
3357 }
3358
3359 // Allocate the array of packed column indices.
3360 lclinds_1d_type ind_d =
3361 lclinds_1d_type("Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3362
3363 // k_rowPtrs_ and lclIndsUnpacked_wdv are currently unpacked. Pack
3364 // them, using the packed row offsets array ptr_d that we
3365 // created above.
3366 //
3367 // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3368 // CrsMatrix?), we need to keep around the unpacked row
3369 // offsets and column indices.
3370
3371 // Pack the column indices from unpacked lclIndsUnpacked_wdv into
3372 // packed ind_d. We will replace lclIndsUnpacked_wdv below.
3373 typedef pack_functor<
3374 typename local_graph_device_type::entries_type::non_const_type,
3375 typename local_inds_dualv_type::t_dev::const_type,
3376 row_map_type,
3377 typename local_graph_device_type::row_map_type>
3378 inds_packer_type;
3379 inds_packer_type f(ind_d,
3380 lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3381 ptr_d, this->getRowPtrsUnpackedDevice());
3382 {
3383 typedef typename decltype(ind_d)::execution_space exec_space;
3384 typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3385 Kokkos::parallel_for(range_type(0, lclNumRows), f);
3386 }
3387
3388 if (debug_) {
3389 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(ptr_d.extent(0) == 0, std::logic_error,
3390 "(\"Optimize Storage\"=true branch) After packing, "
3391 "ptr_d.extent(0)=0.");
3392 if (ptr_d.extent(0) != 0) {
3393 const size_t numOffsets = static_cast<size_t>(ptr_d.extent(0));
3394 const auto valToCheck =
3395 ::Tpetra::Details::getEntryOnHost(ptr_d, numOffsets - 1);
3396 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(valToCheck) != ind_d.extent(0),
3397 std::logic_error,
3398 "(\"Optimize Storage\"=true branch) "
3399 "After packing, ptr_d("
3400 << (numOffsets - 1) << ")="
3401 << valToCheck << " != ind_d.extent(0)="
3402 << ind_d.extent(0) << ".");
3403 }
3404 }
3405 // Build the local graph.
3406 if (requestOptimizedStorage)
3407 setRowPtrs(ptr_d_const);
3408 else
3409 setRowPtrsPacked(ptr_d_const);
3410 lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3411 } else { // We don't have to pack, so just set the pointers.
3412 // Set both packed and unpacked rowptrs to this
3413 this->setRowPtrs(rowPtrsUnpacked_dev_);
3414 lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3415
3416 if (debug_) {
3417 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3418 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3419 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsPacked_dev.extent(0) == 0, std::logic_error,
3420 "(\"Optimize Storage\"=false branch) "
3421 "rowPtrsPacked_dev_.extent(0) = 0.");
3422 if (rowPtrsPacked_dev.extent(0) != 0) {
3423 const size_t numOffsets =
3424 static_cast<size_t>(rowPtrsPacked_dev.extent(0));
3425 const size_t valToCheck =
3426 rowPtrsPacked_host(numOffsets - 1);
3427 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != size_t(lclIndsPacked_wdv.extent(0)),
3428 std::logic_error,
3429 "(\"Optimize Storage\"=false branch) "
3430 "rowPtrsPacked_dev_("
3431 << (numOffsets - 1) << ")="
3432 << valToCheck
3433 << " != lclIndsPacked_wdv.extent(0)="
3434 << lclIndsPacked_wdv.extent(0) << ".");
3435 }
3436 }
3437 }
3438
3439 if (debug_) {
3440 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3441 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3442 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(rowPtrsPacked_dev.extent(0)) != lclNumRows + 1,
3443 std::logic_error, "After packing, rowPtrsPacked_dev_.extent(0) = " << rowPtrsPacked_dev.extent(0) << " != lclNumRows+1 = " << (lclNumRows + 1) << ".");
3444 if (rowPtrsPacked_dev.extent(0) != 0) {
3445 const size_t numOffsets = static_cast<size_t>(rowPtrsPacked_dev.extent(0));
3446 const auto valToCheck = rowPtrsPacked_host(numOffsets - 1);
3447 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(valToCheck) != lclIndsPacked_wdv.extent(0),
3448 std::logic_error, "After packing, rowPtrsPacked_dev_(" << (numOffsets - 1) << ") = " << valToCheck << " != lclIndsPacked_wdv.extent(0) = " << lclIndsPacked_wdv.extent(0) << ".");
3449 }
3450 }
3451
3452 if (requestOptimizedStorage) {
3453 // With optimized storage, we don't need to store
3454 // the array of row entry counts.
3455
3456 // Free graph data structures that are only needed for
3457 // unpacked 1-D storage.
3458 k_numRowEntries_ = num_row_entries_type();
3459
3460 // Keep the new 1-D packed allocations.
3461 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3462
3463 storageStatus_ = Details::STORAGE_1D_PACKED;
3464 }
3465
3466 set_need_sync_host_uvm_access(); // make sure kernel setup of indices is fenced before a host access
3467}
3468
3469template <class LocalOrdinal, class GlobalOrdinal, class Node>
3471 replaceColMap(const Teuchos::RCP<const map_type>& newColMap) {
3472 // NOTE: This safety check matches the code, but not the documentation of Crsgraph
3473 //
3474 // FIXME (mfh 18 Aug 2014) This will break if the calling process
3475 // has no entries, because in that case, currently it is neither
3476 // locally nor globally indexed. This will change once we get rid
3477 // of lazy allocation (so that the constructor allocates indices
3478 // and therefore commits to local vs. global).
3479 const char tfecfFuncName[] = "replaceColMap: ";
3481 isLocallyIndexed() || isGloballyIndexed(), std::runtime_error,
3482 "Requires matching maps and non-static graph.");
3483 colMap_ = newColMap;
3484}
3485
3486template <class LocalOrdinal, class GlobalOrdinal, class Node>
3488 reindexColumns(const Teuchos::RCP<const map_type>& newColMap,
3489 const Teuchos::RCP<const import_type>& newImport,
3490 const bool sortIndicesInEachRow) {
3491 using Teuchos::RCP;
3492 using Teuchos::REDUCE_MIN;
3493 using Teuchos::reduceAll;
3494 typedef GlobalOrdinal GO;
3495 typedef LocalOrdinal LO;
3496 using col_inds_type_dev = typename local_inds_dualv_type::t_dev;
3497 const char tfecfFuncName[] = "reindexColumns: ";
3498
3500 isFillComplete(), std::runtime_error,
3501 "The graph is fill complete "
3502 "(isFillComplete() returns true). You must call resumeFill() before "
3503 "you may call this method.");
3504
3505 // mfh 19 Aug 2014: This method does NOT redistribute data; it
3506 // doesn't claim to do the work of an Import or Export. This
3507 // means that for all processes, the calling process MUST own all
3508 // column indices, in both the old column Map (if it exists) and
3509 // the new column Map. We check this via an all-reduce.
3510 //
3511 // Some processes may be globally indexed, others may be locally
3512 // indexed, and others (that have no graph entries) may be
3513 // neither. This method will NOT change the graph's current
3514 // state. If it's locally indexed, it will stay that way, and
3515 // vice versa. It would easy to add an option to convert indices
3516 // from global to local, so as to save a global-to-local
3517 // conversion pass. However, we don't do this here. The intended
3518 // typical use case is that the graph already has a column Map and
3519 // is locally indexed, and this is the case for which we optimize.
3520
3521 const LO lclNumRows = static_cast<LO>(this->getLocalNumRows());
3522
3523 // Attempt to convert indices to the new column Map's version of
3524 // local. This will fail if on the calling process, the graph has
3525 // indices that are not on that process in the new column Map.
3526 // After the local conversion attempt, we will do an all-reduce to
3527 // see if any processes failed.
3528
3529 // If this is false, then either the graph contains a column index
3530 // which is invalid in the CURRENT column Map, or the graph is
3531 // locally indexed but currently has no column Map. In either
3532 // case, there is no way to convert the current local indices into
3533 // global indices, so that we can convert them into the new column
3534 // Map's local indices. It's possible for this to be true on some
3535 // processes but not others, due to replaceColMap.
3536 bool allCurColIndsValid = true;
3537 // On the calling process, are all valid current column indices
3538 // also in the new column Map on the calling process? In other
3539 // words, does local reindexing suffice, or should the user have
3540 // done an Import or Export instead?
3541 bool localSuffices = true;
3542
3543 {
3544 // Final arrays for the local indices. We will allocate exactly
3545 // one of these ONLY if the graph is locally indexed on the
3546 // calling process, and ONLY if the graph has one or more entries
3547 // (is not empty) on the calling process. In that case, we
3548 // allocate the first (1-D storage) if the graph has a static
3549 // profile, else we allocate the second (2-D storage).
3551
3552 // If indices aren't allocated, that means the calling process
3553 // owns no entries in the graph. Thus, there is nothing to
3554 // convert, and it trivially succeeds locally.
3555 if (indicesAreAllocated()) {
3556 if (isLocallyIndexed()) {
3557 if (hasColMap()) { // locally indexed, and currently has a column Map
3558 const map_type& oldColMap = *(getColMap());
3559
3560 // Allocate storage for the new local indices.
3561 const size_t allocSize = this->getLocalAllocationSize();
3562 auto oldLclInds1D = lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly);
3563 newLclInds1D_dev = col_inds_type_dev("Tpetra::CrsGraph::lclIndsReindexed",
3564 allocSize);
3565 auto oldLclColMap = oldColMap.getLocalMap();
3566 auto newLclColMap = newColMap->getLocalMap();
3567
3568 const auto LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
3569 const auto GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
3570
3571 const int NOT_ALL_LOCAL_INDICES_ARE_VALID = 1;
3572 const int LOCAL_DOES_NOT_SUFFICE = 2;
3573 int errorStatus = 0;
3574 Kokkos::parallel_reduce(
3575 "Tpetra::CrsGraph::reindexColumns",
3576 Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, allocSize),
3577 KOKKOS_LAMBDA(const LocalOrdinal k, int& result) {
3579 if (oldLclCol == LO_INVALID) {
3581 } else {
3582 const GO gblCol = oldLclColMap.getGlobalElement(oldLclCol);
3583 if (gblCol == GO_INVALID) {
3585 } else {
3586 const LocalOrdinal newLclCol = newLclColMap.getLocalElement(gblCol);
3587 if (newLclCol == LO_INVALID) {
3589 } else {
3591 }
3592 }
3593 }
3594 },
3595 Kokkos::LOr<int>(errorStatus));
3598 } else { // locally indexed, but no column Map
3599 // This case is only possible if replaceColMap() was called
3600 // with a null argument on the calling process. It's
3601 // possible, but it means that this method can't possibly
3602 // succeed, since we have no way of knowing how to convert
3603 // the current local indices to global indices.
3604 allCurColIndsValid = false;
3605 }
3606 } else { // globally indexed
3607 // If the graph is globally indexed, we don't need to save
3608 // local indices, but we _do_ need to know whether the current
3609 // global indices are valid in the new column Map. We may
3610 // need to do a getRemoteIndexList call to find this out.
3611 //
3612 // In this case, it doesn't matter whether the graph currently
3613 // has a column Map. We don't need the old column Map to
3614 // convert from global indices to the _new_ column Map's local
3615 // indices. Furthermore, we can use the same code, whether
3616 // the graph is static or dynamic profile.
3617
3618 // Test whether the current global indices are in the new
3619 // column Map on the calling process.
3620 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3621 const RowInfo rowInfo = this->getRowInfo(lclRow);
3622 auto oldGblRowView = this->getGlobalIndsViewHost(rowInfo);
3623 for (size_t k = 0; k < rowInfo.numEntries; ++k) {
3624 const GO gblCol = oldGblRowView(k);
3625 if (!newColMap->isNodeGlobalElement(gblCol)) {
3626 localSuffices = false;
3627 break; // Stop at the first invalid index
3628 }
3629 } // for each entry in the current row
3630 } // for each locally owned row
3631 } // locally or globally indexed
3632 } // whether indices are allocated
3633
3634 // Do an all-reduce to check both possible error conditions.
3635 int lclSuccess[2];
3636 lclSuccess[0] = allCurColIndsValid ? 1 : 0;
3637 lclSuccess[1] = localSuffices ? 1 : 0;
3638 int gblSuccess[2];
3639 gblSuccess[0] = 0;
3640 gblSuccess[1] = 0;
3642 getRowMap().is_null() ? Teuchos::null : getRowMap()->getComm();
3643 if (!comm.is_null()) {
3645 }
3646
3648 gblSuccess[0] == 0, std::runtime_error,
3649 "It is not possible to continue."
3650 " The most likely reason is that the graph is locally indexed, but the "
3651 "column Map is missing (null) on some processes, due to a previous call "
3652 "to replaceColMap().");
3653
3655 gblSuccess[1] == 0, std::runtime_error,
3656 "On some process, the graph "
3657 "contains column indices that are in the old column Map, but not in the "
3658 "new column Map (on that process). This method does NOT redistribute "
3659 "data; it does not claim to do the work of an Import or Export operation."
3660 " This means that for all processess, the calling process MUST own all "
3661 "column indices, in both the old column Map and the new column Map. In "
3662 "this case, you will need to do an Import or Export operation to "
3663 "redistribute data.");
3664
3665 // Commit the results.
3666 if (isLocallyIndexed()) {
3667 lclIndsUnpacked_wdv = local_inds_wdv_type(newLclInds1D_dev);
3668 }
3669 // end of scope for newLclInds1D_dev
3670 // sortAndMergeAllIndices needs host access
3671 }
3672
3673 if (isLocallyIndexed()) {
3674 // We've reindexed, so we don't know if the indices are sorted.
3675 //
3676 // FIXME (mfh 17 Sep 2014) It could make sense to check this,
3677 // since we're already going through all the indices above. We
3678 // could also sort each row in place; that way, we would only
3679 // have to make one pass over the rows.
3680 indicesAreSorted_ = false;
3682 // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
3683 // order to call this method.
3684 //
3685 // FIXME (mfh 17 Sep 2014) This violates the strong exception
3686 // guarantee. It would be better to sort the new index arrays
3687 // before committing them.
3688 const bool sorted = false; // need to resort
3689 const bool merged = true; // no need to merge, since no dups
3690 this->sortAndMergeAllIndices(sorted, merged);
3691 }
3692 }
3693 colMap_ = newColMap;
3694
3695 if (newImport.is_null()) {
3696 // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
3697 // check whether the input Import is null on any process.
3698 //
3699 // If the domain Map hasn't been set yet, we can't compute a new
3700 // Import object. Leave it what it is; it should be null, but
3701 // it doesn't matter. If the domain Map _has_ been set, then
3702 // compute a new Import object if necessary.
3703 if (!domainMap_.is_null()) {
3704 if (!domainMap_->isSameAs(*newColMap)) {
3705 importer_ = Teuchos::rcp(new import_type(domainMap_, newColMap));
3706 } else {
3707 importer_ = Teuchos::null; // don't need an Import
3708 }
3709 }
3710 } else {
3711 // The caller gave us an Import object. Assume that it's valid.
3712 importer_ = newImport;
3713 }
3714}
3715
3716template <class LocalOrdinal, class GlobalOrdinal, class Node>
3718 replaceDomainMap(const Teuchos::RCP<const map_type>& newDomainMap) {
3719 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMap: ";
3721 colMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3722 "this method unless the graph already has a column Map.");
3724 newDomainMap.is_null(), std::invalid_argument,
3725 prefix << "The new domain Map must be nonnull.");
3726
3727 // Create a new importer, if needed
3728 Teuchos::RCP<const import_type> newImporter = Teuchos::null;
3729 if (newDomainMap != colMap_ && (!newDomainMap->isSameAs(*colMap_))) {
3730 newImporter = rcp(new import_type(newDomainMap, colMap_));
3731 }
3732 this->replaceDomainMapAndImporter(newDomainMap, newImporter);
3733}
3734
3735template <class LocalOrdinal, class GlobalOrdinal, class Node>
3737 replaceDomainMapAndImporter(const Teuchos::RCP<const map_type>& newDomainMap,
3738 const Teuchos::RCP<const import_type>& newImporter) {
3739 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
3741 colMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3742 "this method unless the graph already has a column Map.");
3744 newDomainMap.is_null(), std::invalid_argument,
3745 prefix << "The new domain Map must be nonnull.");
3746
3747 if (debug_) {
3748 if (newImporter.is_null()) {
3749 // It's not a good idea to put expensive operations in a macro
3750 // clause, even if they are side effect - free, because macros
3751 // don't promise that they won't evaluate their arguments more
3752 // than once. It's polite for them to do so, but not required.
3753 const bool colSameAsDom = colMap_->isSameAs(*newDomainMap);
3754 TEUCHOS_TEST_FOR_EXCEPTION(!colSameAsDom, std::invalid_argument,
3755 "If the new Import is null, "
3756 "then the new domain Map must be the same as the current column Map.");
3757 } else {
3758 const bool colSameAsTgt =
3759 colMap_->isSameAs(*(newImporter->getTargetMap()));
3760 const bool newDomSameAsSrc =
3761 newDomainMap->isSameAs(*(newImporter->getSourceMap()));
3762 TEUCHOS_TEST_FOR_EXCEPTION(!colSameAsTgt || !newDomSameAsSrc, std::invalid_argument,
3763 "If the "
3764 "new Import is nonnull, then the current column Map must be the same "
3765 "as the new Import's target Map, and the new domain Map must be the "
3766 "same as the new Import's source Map.");
3767 }
3768 }
3769
3770 domainMap_ = newDomainMap;
3771 importer_ = Teuchos::rcp_const_cast<import_type>(newImporter);
3772}
3773
3774template <class LocalOrdinal, class GlobalOrdinal, class Node>
3776 replaceRangeMap(const Teuchos::RCP<const map_type>& newRangeMap) {
3777 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMap: ";
3779 rowMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3780 "this method unless the graph already has a row Map.");
3782 newRangeMap.is_null(), std::invalid_argument,
3783 prefix << "The new range Map must be nonnull.");
3784
3785 // Create a new exporter, if needed
3786 Teuchos::RCP<const export_type> newExporter = Teuchos::null;
3787 if (newRangeMap != rowMap_ && (!newRangeMap->isSameAs(*rowMap_))) {
3788 newExporter = rcp(new export_type(rowMap_, newRangeMap));
3789 }
3790 this->replaceRangeMapAndExporter(newRangeMap, newExporter);
3791}
3792
3793template <class LocalOrdinal, class GlobalOrdinal, class Node>
3795 replaceRangeMapAndExporter(const Teuchos::RCP<const map_type>& newRangeMap,
3796 const Teuchos::RCP<const export_type>& newExporter) {
3797 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
3799 rowMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3800 "this method unless the graph already has a column Map.");
3802 newRangeMap.is_null(), std::invalid_argument,
3803 prefix << "The new domain Map must be nonnull.");
3804
3805 if (debug_) {
3806 if (newExporter.is_null()) {
3807 // It's not a good idea to put expensive operations in a macro
3808 // clause, even if they are side effect - free, because macros
3809 // don't promise that they won't evaluate their arguments more
3810 // than once. It's polite for them to do so, but not required.
3811 const bool rowSameAsRange = rowMap_->isSameAs(*newRangeMap);
3812 TEUCHOS_TEST_FOR_EXCEPTION(!rowSameAsRange, std::invalid_argument,
3813 "If the new Export is null, "
3814 "then the new range Map must be the same as the current row Map.");
3815 } else {
3816 const bool newRangeSameAsTgt =
3817 newRangeMap->isSameAs(*(newExporter->getTargetMap()));
3818 const bool rowSameAsSrc =
3819 rowMap_->isSameAs(*(newExporter->getSourceMap()));
3820 TEUCHOS_TEST_FOR_EXCEPTION(!rowSameAsSrc || !newRangeSameAsTgt, std::invalid_argument,
3821 "If the "
3822 "new Export is nonnull, then the current row Map must be the same "
3823 "as the new Export's source Map, and the new range Map must be the "
3824 "same as the new Export's target Map.");
3825 }
3826 }
3827
3828 rangeMap_ = newRangeMap;
3829 exporter_ = Teuchos::rcp_const_cast<export_type>(newExporter);
3830}
3831
3832template <class LocalOrdinal, class GlobalOrdinal, class Node>
3835 getLocalGraphDevice() const {
3837 lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
3838 this->getRowPtrsPackedDevice());
3839}
3840
3841template <class LocalOrdinal, class GlobalOrdinal, class Node>
3844 getLocalGraphHost() const {
3845 return local_graph_host_type(
3846 lclIndsPacked_wdv.getHostView(Access::ReadWrite),
3847 this->getRowPtrsPackedHost());
3848}
3849
3850template <class LocalOrdinal, class GlobalOrdinal, class Node>
3853 using Teuchos::ArrayView;
3854 using Teuchos::outArg;
3855 using Teuchos::reduceAll;
3856 using ::Tpetra::Details::ProfilingRegion;
3857 typedef global_size_t GST;
3858
3859 ProfilingRegion regionCGC("Tpetra::CrsGraph::computeGlobalConstants");
3860
3861 this->computeLocalConstants();
3862
3863 // Compute global constants from local constants. Processes that
3864 // already have local constants still participate in the
3865 // all-reduces, using their previously computed values.
3866 if (!this->haveGlobalConstants_) {
3867 const Teuchos::Comm<int>& comm = *(this->getComm());
3868 // Promote all the nodeNum* and nodeMaxNum* quantities from
3869 // size_t to global_size_t, when doing the all-reduces for
3870 // globalNum* / globalMaxNum* results.
3871 //
3872 // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
3873 // this in two all-reduces (one for the sum and the other for
3874 // the max), or use a custom MPI_Op that combines the sum and
3875 // the max. The latter might even be slower than two
3876 // all-reduces on modern network hardware. It would also be a
3877 // good idea to use nonblocking all-reduces (MPI 3), so that we
3878 // don't have to wait around for the first one to finish before
3879 // starting the second one.
3880 GST lcl, gbl;
3881 lcl = static_cast<GST>(this->getLocalNumEntries());
3882
3883 reduceAll<int, GST>(comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
3884 this->globalNumEntries_ = gbl;
3885
3886 const GST lclMaxNumRowEnt = static_cast<GST>(this->nodeMaxNumRowEntries_);
3887 reduceAll<int, GST>(comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
3888 outArg(this->globalMaxNumRowEntries_));
3889 this->haveGlobalConstants_ = true;
3890 }
3891}
3892
3893template <class LocalOrdinal, class GlobalOrdinal, class Node>
3896 using ::Tpetra::Details::ProfilingRegion;
3897
3898 ProfilingRegion regionCLC("Tpetra::CrsGraph::computeLocalConstants");
3899 if (this->haveLocalConstants_) {
3900 return;
3901 }
3902
3903 // Reset local properties
3904 this->nodeMaxNumRowEntries_ =
3905 Teuchos::OrdinalTraits<size_t>::invalid();
3906
3907 using LO = local_ordinal_type;
3908
3909 auto ptr = this->getRowPtrsPackedDevice();
3910 const LO lclNumRows = ptr.extent(0) == 0 ? static_cast<LO>(0) : (static_cast<LO>(ptr.extent(0)) - static_cast<LO>(1));
3911
3912 const LO lclMaxNumRowEnt =
3913 ::Tpetra::Details::maxDifference("Tpetra::CrsGraph: nodeMaxNumRowEntries",
3914 ptr, lclNumRows);
3915 this->nodeMaxNumRowEntries_ = static_cast<size_t>(lclMaxNumRowEnt);
3916 this->haveLocalConstants_ = true;
3917}
3918
3919template <class LocalOrdinal, class GlobalOrdinal, class Node>
3920std::pair<size_t, std::string>
3922 makeIndicesLocal(const bool verbose) {
3924 using std::endl;
3925 using Teuchos::arcp;
3926 using Teuchos::Array;
3927 typedef LocalOrdinal LO;
3928 typedef GlobalOrdinal GO;
3929 typedef device_type DT;
3930 typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
3931 typedef typename num_row_entries_type::non_const_value_type num_ent_type;
3932 const char tfecfFuncName[] = "makeIndicesLocal: ";
3933 ProfilingRegion regionMakeIndicesLocal("Tpetra::CrsGraph::makeIndicesLocal");
3934
3935 std::unique_ptr<std::string> prefix;
3936 if (verbose) {
3937 prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
3938 std::ostringstream os;
3939 os << *prefix << "lclNumRows: " << getLocalNumRows() << endl;
3940 std::cerr << os.str();
3941 }
3942
3943 // These are somewhat global properties, so it's safe to have
3944 // exception checks for them, rather than returning an error code.
3945 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap(), std::logic_error,
3946 "The graph does not have a "
3947 "column Map yet. This method should never be called in that case. "
3948 "Please report this bug to the Tpetra developers.");
3949 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->getColMap().is_null(), std::logic_error,
3950 "The graph claims "
3951 "that it has a column Map, because hasColMap() returns true. However, "
3952 "the result of getColMap() is null. This should never happen. Please "
3953 "report this bug to the Tpetra developers.");
3954
3955 // Return value 1: The number of column indices (counting
3956 // duplicates) that could not be converted to local indices,
3957 // because they were not in the column Map on the calling process.
3958 size_t lclNumErrs = 0;
3959 std::ostringstream errStrm; // for return value 2 (error string)
3960
3961 const LO lclNumRows = static_cast<LO>(this->getLocalNumRows());
3962 const map_type& colMap = *(this->getColMap());
3963
3964 if (this->isGloballyIndexed() && lclNumRows != 0) {
3965 // This is a host-accessible View.
3966 typename num_row_entries_type::const_type h_numRowEnt =
3967 this->k_numRowEntries_;
3968
3969 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
3970
3971 // Allocate space for local indices.
3972 if (rowPtrsUnpacked_host.extent(0) == 0) {
3973 errStrm << "Unpacked row pointers (rowPtrsUnpacked_dev_) has length 0. This should never "
3974 "happen here. Please report this bug to the Tpetra developers."
3975 << endl;
3976 // Need to return early.
3977 return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid(),
3978 errStrm.str());
3979 }
3981
3982 // mfh 17 Dec 2016: We don't need initial zero-fill of
3983 // lclIndsUnpacked_wdv, because we will fill it below anyway.
3984 // AllowPadding would only help for aligned access (e.g.,
3985 // for vectorization) if we also were to pad each row to the
3986 // same alignment, so we'll skip AllowPadding for now.
3987
3988 // using Kokkos::AllowPadding;
3989 using Kokkos::view_alloc;
3990 using Kokkos::WithoutInitializing;
3991
3992 // When giving the label as an argument to
3993 // Kokkos::view_alloc, the label must be a string and not a
3994 // char*, else the code won't compile. This is because
3995 // view_alloc also allows a raw pointer as its first
3996 // argument. See
3997 // https://github.com/kokkos/kokkos/issues/434. This is a
3998 // large allocation typically, so the overhead of creating
3999 // an std::string is minor.
4000 const std::string label("Tpetra::CrsGraph::lclInd");
4001 if (verbose) {
4002 std::ostringstream os;
4003 os << *prefix << "(Re)allocate lclInd_wdv: old="
4004 << lclIndsUnpacked_wdv.extent(0) << ", new=" << numEnt << endl;
4005 std::cerr << os.str();
4006 }
4007
4008 local_inds_dualv_type lclInds_dualv =
4009 local_inds_dualv_type(view_alloc(label, WithoutInitializing),
4010 numEnt);
4011 lclIndsUnpacked_wdv = local_inds_wdv_type(lclInds_dualv);
4012
4013 auto lclColMap = colMap.getLocalMap();
4014 // This is a "device mirror" of the host View h_numRowEnt.
4015 //
4016 // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4017 // Device instance is to use its default constructor. See the
4018 // following Kokkos issue:
4019 //
4020 // https://github.com/kokkos/kokkos/issues/442
4021 if (verbose) {
4022 std::ostringstream os;
4023 os << *prefix << "Allocate device mirror k_numRowEnt: "
4024 << h_numRowEnt.extent(0) << endl;
4025 std::cerr << os.str();
4026 }
4027 auto k_numRowEnt =
4028 Kokkos::create_mirror_view_and_copy(device_type(), h_numRowEnt);
4029
4030 using ::Tpetra::Details::convertColumnIndicesFromGlobalToLocal;
4031 lclNumErrs =
4033 lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4034 gblInds_wdv.getDeviceView(Access::ReadOnly),
4035 this->getRowPtrsUnpackedDevice(),
4036 lclColMap,
4037 k_numRowEnt);
4038 if (lclNumErrs != 0) {
4039 const int myRank = [this]() {
4040 auto map = this->getMap();
4041 if (map.is_null()) {
4042 return 0;
4043 } else {
4044 auto comm = map->getComm();
4045 return comm.is_null() ? 0 : comm->getRank();
4046 }
4047 }();
4048 const bool pluralNumErrs = (lclNumErrs != static_cast<size_t>(1));
4049 errStrm << "(Process " << myRank << ") When converting column "
4050 "indices from global to local, we encountered "
4051 << lclNumErrs
4052 << " ind" << (pluralNumErrs ? "ices" : "ex")
4053 << " that do" << (pluralNumErrs ? "es" : "")
4054 << " not live in the column Map on this process." << endl;
4055 }
4056
4057 // We've converted column indices from global to local, so we
4058 // can deallocate the global column indices (which we know are
4059 // in 1-D storage, because the graph has static profile).
4060 if (verbose) {
4061 std::ostringstream os;
4062 os << *prefix << "Free gblInds_wdv: "
4063 << gblInds_wdv.extent(0) << endl;
4064 std::cerr << os.str();
4065 }
4066 gblInds_wdv = global_inds_wdv_type();
4067 } // globallyIndexed() && lclNumRows > 0
4068
4069 this->indicesAreLocal_ = true;
4070 this->indicesAreGlobal_ = false;
4071 this->checkInternalState();
4072
4073 return std::make_pair(lclNumErrs, errStrm.str());
4074}
4075
4076template <class LocalOrdinal, class GlobalOrdinal, class Node>
4078 makeColMap(Teuchos::Array<int>& remotePIDs) {
4080 using std::endl;
4081 const char tfecfFuncName[] = "makeColMap";
4082
4083 ProfilingRegion regionSortAndMerge("Tpetra::CrsGraph::makeColMap");
4084 std::unique_ptr<std::string> prefix;
4085 if (verbose_) {
4086 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4087 std::ostringstream os;
4088 os << *prefix << "Start" << endl;
4089 std::cerr << os.str();
4090 }
4091
4092 // this->colMap_ should be null at this point, but we accept the
4093 // future possibility that it might not be (esp. if we decide
4094 // later to support graph structure changes after first
4095 // fillComplete, which CrsGraph does not currently (as of 12 Feb
4096 // 2017) support).
4097 Teuchos::RCP<const map_type> colMap = this->colMap_;
4098 const bool sortEachProcsGids =
4099 this->sortGhostsAssociatedWithEachProcessor_;
4100
4101 // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4102 // per-process error code. If an error does occur on a process,
4103 // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4104 // notice that error. This is the caller's responsibility. For
4105 // now, we only propagate (to all processes) and report the error
4106 // in debug mode. In the future, we need to add the local/global
4107 // error handling scheme used in BlockCrsMatrix to this class.
4108 if (debug_) {
4109 using Teuchos::outArg;
4110 using Teuchos::REDUCE_MIN;
4111 using Teuchos::reduceAll;
4112
4113 std::ostringstream errStrm;
4114 const int lclErrCode =
4115 Details::makeColMap(colMap, remotePIDs,
4116 getDomainMap(), *this, sortEachProcsGids, &errStrm);
4117 auto comm = this->getComm();
4118 if (!comm.is_null()) {
4119 const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4120 int gblSuccess = 0; // output argument
4123 if (gblSuccess != 1) {
4124 std::ostringstream os;
4125 Details::gathervPrint(os, errStrm.str(), *comm);
4126 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
4127 ": An error happened on at "
4128 "least one process in the CrsGraph's communicator. "
4129 "Here are all processes' error messages:"
4130 << std::endl
4131 << os.str());
4132 }
4133 }
4134 } else {
4135 (void)Details::makeColMap(colMap, remotePIDs,
4136 getDomainMap(), *this, sortEachProcsGids, nullptr);
4137 }
4138 // See above. We want to admit the possibility of makeColMap
4139 // actually revising an existing column Map, even though that
4140 // doesn't currently (as of 10 May 2017) happen.
4141 this->colMap_ = colMap;
4142
4143 checkInternalState();
4144 if (verbose_) {
4145 std::ostringstream os;
4146 os << *prefix << "Done" << endl;
4147 std::cerr << os.str();
4148 }
4149}
4150
4151template <class execution_space, class LO, class rowptr_type, class colinds_type, class numRowEntries_type>
4152void prepareSortMergeUnpackedGraph(rowptr_type rowptr, colinds_type colinds, numRowEntries_type numRowEntries) {
4153 using ATS = KokkosKernels::ArithTraits<LO>;
4154 const auto unused = ATS::max();
4155
4156 auto numRows = rowptr.extent(0) - 1;
4157
4158 // make sure that unused entries will get ordered last
4159 Kokkos::parallel_for(
4160 "flag_unused_entries", Kokkos::RangePolicy<execution_space, LO>(0, numRows), KOKKOS_LAMBDA(const LO rlid) {
4161 for (size_t jj = rowptr(rlid) + numRowEntries(rlid); jj < rowptr(rlid + 1); ++jj) {
4162 colinds(jj) = unused;
4163 }
4164 });
4165}
4166
4167template <class execution_space, class LO, class rowptr_type, class colinds_type, class numRowEntries_type>
4168void mergeUnpackedGraph(rowptr_type rowptr, colinds_type colinds, numRowEntries_type numRowEntries) {
4169 // For this to work correctly, we require that the unsused column entries have been filled
4170 // with indices that get ordered last.
4171
4172 auto numRows = rowptr.extent(0) - 1;
4173
4174 // merge
4175 // We cannot use KokkosSparse::sort_and_merge_matrix since we
4176 // do not actually want to change the allocations.
4177
4178 Kokkos::parallel_for(
4179 "merge_entries", Kokkos::RangePolicy<execution_space>(0, numRows), KOKKOS_LAMBDA(const LO rlid) {
4180 auto rowNNZ = numRowEntries(rlid);
4181 if (rowNNZ == 0) {
4182 return;
4183 }
4184 auto rowBegin = rowptr(rlid);
4185 auto pos = rowBegin;
4186 for (size_t offset = rowBegin + 1; offset < rowBegin + rowNNZ; ++offset) {
4187 if ((colinds(offset) != colinds(pos))) {
4188 colinds(++pos) = colinds(offset);
4189 }
4190 }
4191 numRowEntries(rlid) = pos + 1 - rowBegin;
4192 });
4193}
4194
4195template <class LocalOrdinal, class GlobalOrdinal, class Node>
4196void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4197 sortAndMergeAllIndices(const bool sorted, const bool merged) {
4198 using std::endl;
4199 const char tfecfFuncName[] = "sortAndMergeAllIndices";
4200 Details::ProfilingRegion regionSortAndMerge("Tpetra::CrsGraph::sortAndMergeAllIndices");
4201
4202 std::unique_ptr<std::string> prefix;
4203 if (verbose_) {
4204 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4205 std::ostringstream os;
4206 os << *prefix << "Start: "
4207 << "sorted=" << (sorted ? "true" : "false")
4208 << ", merged=" << (merged ? "true" : "false") << endl;
4209 std::cerr << os.str();
4210 }
4211 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isGloballyIndexed(), std::logic_error,
4212 "This method may only be called after makeIndicesLocal.");
4213 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!merged && this->isStorageOptimized(), std::logic_error,
4214 "The graph is already storage optimized, so we shouldn't be "
4215 "merging any indices. "
4216 "Please report this bug to the Tpetra developers.");
4217
4218 if (!sorted || !merged) {
4219 if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
4220 // We are sorting & merging the unpacked views.
4221 // This means that not all entries are actually in use. We need to take k_numRowEntries_ into account.
4222 auto rowptr = rowPtrsUnpacked_dev_;
4223 auto colinds = lclIndsUnpacked_wdv.getDeviceView(Access::ReadWrite);
4224
4225 // Create a device copy of k_numRowEntries_.
4226 auto k_numRowEntries_d = Kokkos::create_mirror_view_and_copy(execution_space(), k_numRowEntries_);
4227
4228 // set set unused column entries so they get sorted last
4229 prepareSortMergeUnpackedGraph<execution_space, LocalOrdinal>(rowptr, colinds, k_numRowEntries_d);
4230
4231 if (!sorted) {
4232 KokkosSparse::sort_crs_graph(rowptr, colinds);
4233 this->indicesAreSorted_ = true; // we just sorted every row
4234 }
4235 if (!merged) {
4236 mergeUnpackedGraph<execution_space, LocalOrdinal>(rowptr, colinds, k_numRowEntries_d);
4237 Kokkos::deep_copy(k_numRowEntries_, k_numRowEntries_d);
4238 this->noRedundancies_ = true; // we just merged every row
4239 }
4240 } else {
4241 auto rowptr = rowPtrsPacked_dev_;
4242 auto colinds = lclIndsPacked_wdv.getDeviceView(Access::ReadWrite);
4243 if (!sorted && merged) {
4244 KokkosSparse::sort_crs_graph(rowptr, colinds);
4245 this->indicesAreSorted_ = true; // we just sorted every row
4246 } else {
4247 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
4248 "We should never get here."
4249 "Please report this bug to the Tpetra developers.");
4250 }
4251 }
4252 }
4253
4254 if (verbose_) {
4255 std::ostringstream os;
4256 os << *prefix << "Done" << endl;
4257 std::cerr << os.str();
4258 }
4259}
4260
4261template <class LocalOrdinal, class GlobalOrdinal, class Node>
4263 makeImportExport(Teuchos::Array<int>& remotePIDs,
4264 const bool useRemotePIDs) {
4265 using Teuchos::ParameterList;
4266 using Teuchos::RCP;
4267 using Teuchos::rcp;
4268 using ::Tpetra::Details::ProfilingRegion;
4269 const char tfecfFuncName[] = "makeImportExport: ";
4270 ProfilingRegion regionMIE("Tpetra::CrsGraph::makeImportExport");
4271
4272 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap(), std::logic_error,
4273 "This method may not be called unless the graph has a column Map.");
4274 RCP<ParameterList> params = this->getNonconstParameterList(); // could be null
4275
4276 // Don't do any checks to see if we need to create the Import, if
4277 // it exists already.
4278 //
4279 // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4280 // change CrsGraph in the future to allow changing the column
4281 // Map after fillComplete. For now, the column Map is fixed
4282 // after the first fillComplete call.
4283 if (importer_.is_null()) {
4284 // Create the Import instance if necessary.
4285 if (domainMap_ != colMap_ && (!domainMap_->isSameAs(*colMap_))) {
4286 if (params.is_null() || !params->isSublist("Import")) {
4287 if (useRemotePIDs) {
4288 importer_ = rcp(new import_type(domainMap_, colMap_, remotePIDs));
4289 } else {
4290 importer_ = rcp(new import_type(domainMap_, colMap_));
4291 }
4292 } else {
4294 if (useRemotePIDs) {
4296 rcp(new import_type(domainMap_, colMap_, remotePIDs,
4297 importSublist));
4298 importer_ = newImp;
4299 } else {
4300 importer_ = rcp(new import_type(domainMap_, colMap_, importSublist));
4301 }
4302 }
4303 }
4304 }
4305
4306 // Don't do any checks to see if we need to create the Export, if
4307 // it exists already.
4308 if (exporter_.is_null()) {
4309 // Create the Export instance if necessary.
4310 if (rangeMap_ != rowMap_ && !rangeMap_->isSameAs(*rowMap_)) {
4311 if (params.is_null() || !params->isSublist("Export")) {
4312 exporter_ = rcp(new export_type(rowMap_, rangeMap_));
4313 } else {
4315 exporter_ = rcp(new export_type(rowMap_, rangeMap_, exportSublist));
4316 }
4317 }
4318 }
4319}
4320
4321template <class LocalOrdinal, class GlobalOrdinal, class Node>
4322std::string
4324 description() const {
4325 std::ostringstream oss;
4326 oss << dist_object_type::description();
4327 if (isFillComplete()) {
4328 oss << "{status = fill complete"
4329 << ", global rows = " << getGlobalNumRows()
4330 << ", global cols = " << getGlobalNumCols()
4331 << ", global num entries = " << getGlobalNumEntries()
4332 << "}";
4333 } else {
4334 oss << "{status = fill not complete"
4335 << ", global rows = " << getGlobalNumRows()
4336 << "}";
4337 }
4338 return oss.str();
4339}
4340
4341template <class LocalOrdinal, class GlobalOrdinal, class Node>
4343 describe(Teuchos::FancyOStream& out,
4344 const Teuchos::EVerbosityLevel verbLevel) const {
4345 using std::endl;
4346 using std::setw;
4347 using Teuchos::ArrayView;
4348 using Teuchos::Comm;
4349 using Teuchos::RCP;
4350 using Teuchos::VERB_DEFAULT;
4351 using Teuchos::VERB_EXTREME;
4352 using Teuchos::VERB_HIGH;
4353 using Teuchos::VERB_LOW;
4354 using Teuchos::VERB_MEDIUM;
4355 using Teuchos::VERB_NONE;
4356
4357 Teuchos::EVerbosityLevel vl = verbLevel;
4358 if (vl == VERB_DEFAULT) vl = VERB_LOW;
4359 RCP<const Comm<int>> comm = this->getComm();
4360 const int myImageID = comm->getRank(),
4361 numImages = comm->getSize();
4362 size_t width = 1;
4363 for (size_t dec = 10; dec < getGlobalNumRows(); dec *= 10) {
4364 ++width;
4365 }
4366 width = std::max<size_t>(width, static_cast<size_t>(11)) + 2;
4367 Teuchos::OSTab tab(out);
4368 // none: print nothing
4369 // low: print O(1) info from node 0
4370 // medium: print O(P) info, num entries per node
4371 // high: print O(N) info, num entries per row
4372 // extreme: print O(NNZ) info: print graph indices
4373 //
4374 // for medium and higher, print constituent objects at specified verbLevel
4375 if (vl != VERB_NONE) {
4376 if (myImageID == 0) out << this->description() << std::endl;
4377 // O(1) globals, minus what was already printed by description()
4378 if (isFillComplete() && myImageID == 0) {
4379 out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4380 }
4381 // constituent objects
4382 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4383 if (myImageID == 0) out << "\nRow map: " << std::endl;
4384 rowMap_->describe(out, vl);
4385 if (colMap_ != Teuchos::null) {
4386 if (myImageID == 0) out << "\nColumn map: " << std::endl;
4387 colMap_->describe(out, vl);
4388 }
4389 if (domainMap_ != Teuchos::null) {
4390 if (myImageID == 0) out << "\nDomain map: " << std::endl;
4391 domainMap_->describe(out, vl);
4392 }
4393 if (rangeMap_ != Teuchos::null) {
4394 if (myImageID == 0) out << "\nRange map: " << std::endl;
4395 rangeMap_->describe(out, vl);
4396 }
4397 }
4398 // O(P) data
4399 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4400 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4401 if (myImageID == imageCtr) {
4402 out << "Node ID = " << imageCtr << std::endl
4403 << "Node number of entries = " << this->getLocalNumEntries() << std::endl
4404 << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4405 if (!indicesAreAllocated()) {
4406 out << "Indices are not allocated." << std::endl;
4407 }
4408 }
4409 comm->barrier();
4410 comm->barrier();
4411 comm->barrier();
4412 }
4413 }
4414 // O(N) and O(NNZ) data
4415 if (vl == VERB_HIGH || vl == VERB_EXTREME) {
4416 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4417 if (myImageID == imageCtr) {
4418 out << std::setw(width) << "Node ID"
4419 << std::setw(width) << "Global Row"
4420 << std::setw(width) << "Num Entries";
4421 if (vl == VERB_EXTREME) {
4422 out << " Entries";
4423 }
4424 out << std::endl;
4425 const LocalOrdinal lclNumRows =
4426 static_cast<LocalOrdinal>(this->getLocalNumRows());
4427 for (LocalOrdinal r = 0; r < lclNumRows; ++r) {
4428 const RowInfo rowinfo = this->getRowInfo(r);
4429 GlobalOrdinal gid = rowMap_->getGlobalElement(r);
4430 out << std::setw(width) << myImageID
4431 << std::setw(width) << gid
4432 << std::setw(width) << rowinfo.numEntries;
4433 if (vl == VERB_EXTREME) {
4434 out << " ";
4435 if (isGloballyIndexed()) {
4436 auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4437 for (size_t j = 0; j < rowinfo.numEntries; ++j) {
4438 GlobalOrdinal colgid = rowview[j + rowinfo.offset1D];
4439 out << colgid << " ";
4440 }
4441 } else if (isLocallyIndexed()) {
4442 auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4443 for (size_t j = 0; j < rowinfo.numEntries; ++j) {
4444 LocalOrdinal collid = rowview[j + rowinfo.offset1D];
4445 out << colMap_->getGlobalElement(collid) << " ";
4446 }
4447 }
4448 }
4449 out << std::endl;
4450 }
4451 }
4452 comm->barrier();
4453 comm->barrier();
4454 comm->barrier();
4455 }
4456 }
4457 }
4458}
4459
4460template <class LocalOrdinal, class GlobalOrdinal, class Node>
4462 checkSizes(const SrcDistObject& /* source */) {
4463 // It's not clear what kind of compatibility checks on sizes can
4464 // be performed here. Epetra_CrsGraph doesn't check any sizes for
4465 // compatibility.
4466 return true;
4467}
4468
4469template <class LocalOrdinal, class GlobalOrdinal, class Node>
4472 const size_t numSameIDs,
4473 const Kokkos::DualView<const local_ordinal_type*,
4475 const Kokkos::DualView<const local_ordinal_type*,
4477 const CombineMode /*CM*/) {
4478 using std::endl;
4479 using LO = local_ordinal_type;
4480 using GO = global_ordinal_type;
4482 const char tfecfFuncName[] = "copyAndPermute: ";
4483 const bool verbose = verbose_;
4484
4486 const row_graph_type& srcRowGraph = dynamic_cast<const row_graph_type&>(source);
4487 copyAndPermuteNew(srcRowGraph, *this, numSameIDs, permuteToLIDs, permuteFromLIDs, INSERT);
4488 return;
4489 }
4490
4491 Details::ProfilingRegion regionCAP("Tpetra::CrsGraph::copyAndPermute");
4492
4493 std::unique_ptr<std::string> prefix;
4494 if (verbose) {
4495 prefix = this->createPrefix("CrsGraph", "copyAndPermute");
4496 std::ostringstream os;
4497 os << *prefix << endl;
4498 std::cerr << os.str();
4499 }
4500
4502 std::runtime_error, "permuteToLIDs.extent(0) = " << permuteToLIDs.extent(0) << " != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0) << ".");
4503
4504 // We know from checkSizes that the source object is a
4505 // row_graph_type, so we don't need to check again.
4507 dynamic_cast<const row_graph_type&>(source);
4508
4509 if (verbose) {
4510 std::ostringstream os;
4511 os << *prefix << "Compute padding" << endl;
4512 std::cerr << os.str();
4513 }
4514 auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4515 permuteToLIDs, permuteFromLIDs, verbose);
4516 applyCrsPadding(*padding, verbose);
4517
4518 // If the source object is actually a CrsGraph, we can use view
4519 // mode instead of copy mode to access the entries in each row,
4520 // if the graph is not fill complete.
4521 const this_CRS_type* srcCrsGraph =
4522 dynamic_cast<const this_CRS_type*>(&source);
4523
4524 const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4525 const map_type& tgtRowMap = *(getRowMap());
4526 const bool src_filled = srcRowGraph.isFillComplete();
4527 nonconst_global_inds_host_view_type row_copy;
4528 LO myid = 0;
4529
4530 //
4531 // "Copy" part of "copy and permute."
4532 //
4533 if (src_filled || srcCrsGraph == nullptr) {
4534 if (verbose) {
4535 std::ostringstream os;
4536 os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
4537 std::cerr << os.str();
4538 }
4539 // If the source graph is fill complete, we can't use view mode,
4540 // because the data might be stored in a different format not
4541 // compatible with the expectations of view mode. Also, if the
4542 // source graph is not a CrsGraph, we can't use view mode,
4543 // because RowGraph only provides copy mode access to the data.
4544 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4545 const GO gid = srcRowMap.getGlobalElement(myid);
4546 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow(gid);
4547 Kokkos::resize(row_copy, row_length);
4548 size_t check_row_length = 0;
4549 srcRowGraph.getGlobalRowCopy(gid, row_copy, check_row_length);
4550 this->insertGlobalIndices(gid, row_length, row_copy.data());
4551 }
4552 } else {
4553 if (verbose) {
4554 std::ostringstream os;
4555 os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
4556 std::cerr << os.str();
4557 }
4558 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4559 const GO gid = srcRowMap.getGlobalElement(myid);
4560 global_inds_host_view_type row;
4561 srcCrsGraph->getGlobalRowView(gid, row);
4562 this->insertGlobalIndices(gid, row.extent(0), row.data());
4563 }
4564 }
4565
4566 //
4567 // "Permute" part of "copy and permute."
4568 //
4569 auto permuteToLIDs_h = permuteToLIDs.view_host();
4570 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
4571
4572 if (src_filled || srcCrsGraph == nullptr) {
4573 for (LO i = 0; i < static_cast<LO>(permuteToLIDs_h.extent(0)); ++i) {
4574 const GO mygid = tgtRowMap.getGlobalElement(permuteToLIDs_h[i]);
4575 const GO srcgid = srcRowMap.getGlobalElement(permuteFromLIDs_h[i]);
4576 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow(srcgid);
4577 Kokkos::resize(row_copy, row_length);
4578 size_t check_row_length = 0;
4579 srcRowGraph.getGlobalRowCopy(srcgid, row_copy, check_row_length);
4580 this->insertGlobalIndices(mygid, row_length, row_copy.data());
4581 }
4582 } else {
4583 for (LO i = 0; i < static_cast<LO>(permuteToLIDs_h.extent(0)); ++i) {
4584 const GO mygid = tgtRowMap.getGlobalElement(permuteToLIDs_h[i]);
4585 const GO srcgid = srcRowMap.getGlobalElement(permuteFromLIDs_h[i]);
4586 global_inds_host_view_type row;
4587 srcCrsGraph->getGlobalRowView(srcgid, row);
4588 this->insertGlobalIndices(mygid, row.extent(0), row.data());
4589 }
4590 }
4591
4592 if (verbose) {
4593 std::ostringstream os;
4594 os << *prefix << "Done" << endl;
4595 std::cerr << os.str();
4596 }
4597}
4598
4599template <class LocalOrdinal, class GlobalOrdinal, class Node>
4601 applyCrsPadding(const padding_type& padding,
4602 const bool verbose) {
4605 using std::endl;
4606 using LO = local_ordinal_type;
4607 using row_ptrs_type =
4608 typename local_graph_device_type::row_map_type::non_const_type;
4609 using range_policy =
4610 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4611 const char tfecfFuncName[] = "applyCrsPadding";
4612 ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
4613
4614 std::unique_ptr<std::string> prefix;
4615 if (verbose) {
4616 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4617 std::ostringstream os;
4618 os << *prefix << "padding: ";
4619 padding.print(os);
4620 os << endl;
4621 std::cerr << os.str();
4622 }
4623 const int myRank = !verbose ? -1 : [&]() {
4624 auto map = this->getMap();
4625 if (map.is_null()) {
4626 return -1;
4627 }
4628 auto comm = map->getComm();
4629 if (comm.is_null()) {
4630 return -1;
4631 }
4632 return comm->getRank();
4633 }();
4634
4635 // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
4636 // row_ptrs_beg or allocate row_ptrs_end unless the allocation
4637 // size needs to increase. That should be the job of
4638 // padCrsArrays.
4639
4640 // Assume global indexing we don't have any indices yet
4641 if (!indicesAreAllocated()) {
4642 if (verbose) {
4643 std::ostringstream os;
4644 os << *prefix << "Call allocateIndices" << endl;
4645 std::cerr << os.str();
4646 }
4647 allocateIndices(GlobalIndices, verbose);
4648 }
4649 TEUCHOS_ASSERT(indicesAreAllocated());
4650
4651 // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
4652 // would use it directly.
4653
4654 auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
4655 if (verbose) {
4656 std::ostringstream os;
4657 os << *prefix << "Allocate row_ptrs_beg: "
4658 << rowPtrsUnpacked_dev.extent(0) << endl;
4659 std::cerr << os.str();
4660 }
4661 using Kokkos::view_alloc;
4662 using Kokkos::WithoutInitializing;
4663 row_ptrs_type row_ptrs_beg(
4664 view_alloc("row_ptrs_beg", WithoutInitializing),
4665 rowPtrsUnpacked_dev.extent(0));
4666 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
4667 Kokkos::deep_copy(execution_space(), row_ptrs_beg, rowPtrsUnpacked_dev);
4668
4669 const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) : size_t(row_ptrs_beg.extent(0) - 1);
4670 if (verbose) {
4671 std::ostringstream os;
4672 os << *prefix << "Allocate row_ptrs_end: " << N << endl;
4673 std::cerr << os.str();
4674 }
4675 row_ptrs_type row_ptrs_end(
4676 view_alloc("row_ptrs_end", WithoutInitializing), N);
4677 row_ptrs_type num_row_entries;
4678
4679 const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
4680
4681 execution_space().fence(); // we need above deep_copy to be done
4682
4683 if (refill_num_row_entries) { // Case 1: Unpacked storage
4684 // We can't assume correct *this capture until C++17, and it's
4685 // likely more efficient just to capture what we need anyway.
4686 num_row_entries =
4687 row_ptrs_type(view_alloc("num_row_entries", WithoutInitializing), N);
4688 Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
4689 Kokkos::parallel_for(
4690 "Fill end row pointers", range_policy(0, N),
4691 KOKKOS_LAMBDA(const size_t i) {
4692 row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
4693 });
4694 } else {
4695 // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
4696 // storage, we don't need row_ptr_end to be separate allocation;
4697 // could just have it alias row_ptr_beg+1.
4698 Kokkos::parallel_for(
4699 "Fill end row pointers", range_policy(0, N),
4700 KOKKOS_LAMBDA(const size_t i) {
4701 row_ptrs_end(i) = row_ptrs_beg(i + 1);
4702 });
4703 }
4704
4705 if (isGloballyIndexed()) {
4706 padCrsArrays(row_ptrs_beg, row_ptrs_end, gblInds_wdv,
4707 padding, myRank, verbose);
4708 } else {
4709 padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
4710 padding, myRank, verbose);
4711 }
4712
4713 if (refill_num_row_entries) {
4714 Kokkos::parallel_for(
4715 "Fill num entries", range_policy(0, N),
4716 KOKKOS_LAMBDA(const size_t i) {
4717 num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
4718 });
4719 Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
4720 }
4721 if (verbose) {
4722 std::ostringstream os;
4723 os << *prefix << "Reassign k_rowPtrs_; old size: "
4724 << rowPtrsUnpacked_dev.extent(0) << ", new size: "
4725 << row_ptrs_beg.extent(0) << endl;
4726 std::cerr << os.str();
4727 TEUCHOS_ASSERT(rowPtrsUnpacked_dev.extent(0) == row_ptrs_beg.extent(0));
4728 }
4729
4730 setRowPtrsUnpacked(row_ptrs_beg);
4731}
4732
4733template <class LocalOrdinal, class GlobalOrdinal, class Node>
4734std::unique_ptr<
4735 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4736CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4737 computeCrsPadding(
4738 const RowGraph<LocalOrdinal, GlobalOrdinal, Node>& source,
4739 const size_t numSameIDs,
4740 const Kokkos::DualView<const local_ordinal_type*,
4741 buffer_device_type>& permuteToLIDs,
4742 const Kokkos::DualView<const local_ordinal_type*,
4743 buffer_device_type>& permuteFromLIDs,
4744 const bool verbose) const {
4745 using LO = local_ordinal_type;
4746 using std::endl;
4747
4748 std::unique_ptr<std::string> prefix;
4749 if (verbose) {
4750 prefix = this->createPrefix("CrsGraph",
4751 "computeCrsPadding(same & permute)");
4752 std::ostringstream os;
4753 os << *prefix << "{numSameIDs: " << numSameIDs
4754 << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
4755 << endl;
4756 std::cerr << os.str();
4757 }
4758
4759 const int myRank = [&]() {
4760 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
4761 return comm.is_null() ? -1 : comm->getRank();
4762 }();
4763 std::unique_ptr<padding_type> padding(
4764 new padding_type(myRank, numSameIDs,
4765 permuteFromLIDs.extent(0)));
4766
4767 computeCrsPaddingForSameIDs(*padding, source,
4768 static_cast<LO>(numSameIDs));
4769 computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
4770 permuteFromLIDs);
4771 return padding;
4772}
4773
4774template <class LocalOrdinal, class GlobalOrdinal, class Node>
4775void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4776 computeCrsPaddingForSameIDs(
4777 padding_type& padding,
4778 const RowGraph<local_ordinal_type, global_ordinal_type,
4779 node_type>& source,
4780 const local_ordinal_type numSameIDs) const {
4781 using LO = local_ordinal_type;
4782 using GO = global_ordinal_type;
4783 using Details::Impl::getRowGraphGlobalRow;
4784 using std::endl;
4785 const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
4786
4787 std::unique_ptr<std::string> prefix;
4788 const bool verbose = verbose_;
4789 if (verbose) {
4790 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4791 std::ostringstream os;
4792 os << *prefix << "numSameIDs: " << numSameIDs << endl;
4793 std::cerr << os.str();
4794 }
4795
4796 if (numSameIDs == 0) {
4797 return;
4798 }
4799
4800 const map_type& srcRowMap = *(source.getRowMap());
4801 const map_type& tgtRowMap = *rowMap_;
4802 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
4803 const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
4804 const bool src_is_unique =
4805 srcCrs == nullptr ? false : srcCrs->isMerged();
4806 const bool tgt_is_unique = this->isMerged();
4807
4808 std::vector<GO> srcGblColIndsScratch;
4809 std::vector<GO> tgtGblColIndsScratch;
4810
4811 execute_sync_host_uvm_access(); // protect host UVM access
4812 for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
4813 const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
4814 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
4815 auto srcGblColInds = getRowGraphGlobalRow(
4816 srcGblColIndsScratch, source, srcGblRowInd);
4817 auto tgtGblColInds = getRowGraphGlobalRow(
4818 tgtGblColIndsScratch, *this, tgtGblRowInd);
4819 padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
4820 tgtGblColInds.size(), tgt_is_unique,
4821 srcGblColInds.getRawPtr(),
4822 srcGblColInds.size(), src_is_unique);
4823 }
4824 if (verbose) {
4825 std::ostringstream os;
4826 os << *prefix << "Done" << endl;
4827 std::cerr << os.str();
4828 }
4829}
4830
4831template <class LocalOrdinal, class GlobalOrdinal, class Node>
4832void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4833 computeCrsPaddingForPermutedIDs(
4834 padding_type& padding,
4835 const RowGraph<local_ordinal_type, global_ordinal_type,
4836 node_type>& source,
4837 const Kokkos::DualView<const local_ordinal_type*,
4838 buffer_device_type>& permuteToLIDs,
4839 const Kokkos::DualView<const local_ordinal_type*,
4840 buffer_device_type>& permuteFromLIDs) const {
4841 using LO = local_ordinal_type;
4842 using GO = global_ordinal_type;
4843 using Details::Impl::getRowGraphGlobalRow;
4844 using std::endl;
4845 const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
4846
4847 std::unique_ptr<std::string> prefix;
4848 const bool verbose = verbose_;
4849 if (verbose) {
4850 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4851 std::ostringstream os;
4852 os << *prefix << "permuteToLIDs.extent(0): "
4853 << permuteToLIDs.extent(0)
4854 << ", permuteFromLIDs.extent(0): "
4855 << permuteFromLIDs.extent(0) << endl;
4856 std::cerr << os.str();
4857 }
4858
4859 if (permuteToLIDs.extent(0) == 0) {
4860 return;
4861 }
4862
4863 const map_type& srcRowMap = *(source.getRowMap());
4864 const map_type& tgtRowMap = *rowMap_;
4865 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
4866 const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
4867 const bool src_is_unique =
4868 srcCrs == nullptr ? false : srcCrs->isMerged();
4869 const bool tgt_is_unique = this->isMerged();
4870
4871 TEUCHOS_ASSERT(!permuteToLIDs.need_sync_host());
4872 auto permuteToLIDs_h = permuteToLIDs.view_host();
4873 TEUCHOS_ASSERT(!permuteFromLIDs.need_sync_host());
4874 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
4875
4876 std::vector<GO> srcGblColIndsScratch;
4877 std::vector<GO> tgtGblColIndsScratch;
4878 const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
4879
4880 execute_sync_host_uvm_access(); // protect host UVM access
4881 for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
4882 const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
4883 const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
4884 auto srcGblColInds = getRowGraphGlobalRow(
4885 srcGblColIndsScratch, source, srcGblRowInd);
4886 const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
4887 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
4888 auto tgtGblColInds = getRowGraphGlobalRow(
4889 tgtGblColIndsScratch, *this, tgtGblRowInd);
4890 padding.update_permute(whichPermute, tgtLclRowInd,
4891 tgtGblColInds.getRawPtr(),
4892 tgtGblColInds.size(), tgt_is_unique,
4893 srcGblColInds.getRawPtr(),
4894 srcGblColInds.size(), src_is_unique);
4895 }
4896
4897 if (verbose) {
4898 std::ostringstream os;
4899 os << *prefix << "Done" << endl;
4900 std::cerr << os.str();
4901 }
4902}
4903
4904template <class LocalOrdinal, class GlobalOrdinal, class Node>
4905std::unique_ptr<
4906 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4907CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4908 computeCrsPaddingForImports(
4909 const Kokkos::DualView<const local_ordinal_type*,
4910 buffer_device_type>& importLIDs,
4911 Kokkos::DualView<packet_type*, buffer_device_type> imports,
4912 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
4913 const bool verbose) const {
4914 using Details::Impl::getRowGraphGlobalRow;
4915 using std::endl;
4916 using LO = local_ordinal_type;
4917 using GO = global_ordinal_type;
4918 const char tfecfFuncName[] = "computeCrsPaddingForImports";
4919
4920 std::unique_ptr<std::string> prefix;
4921 if (verbose) {
4922 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4923 std::ostringstream os;
4924 os << *prefix << "importLIDs.extent(0): "
4925 << importLIDs.extent(0)
4926 << ", imports.extent(0): "
4927 << imports.extent(0)
4928 << ", numPacketsPerLID.extent(0): "
4929 << numPacketsPerLID.extent(0) << endl;
4930 std::cerr << os.str();
4931 }
4932
4933 const LO numImports = static_cast<LO>(importLIDs.extent(0));
4934 const int myRank = [&]() {
4935 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
4936 return comm.is_null() ? -1 : comm->getRank();
4937 }();
4938 std::unique_ptr<padding_type> padding(
4939 new padding_type(myRank, numImports));
4940
4941 if (imports.need_sync_host()) {
4942 imports.sync_host();
4943 }
4944 auto imports_h = imports.view_host();
4945 if (numPacketsPerLID.need_sync_host()) {
4946 numPacketsPerLID.sync_host();
4947 }
4948 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
4949
4950 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
4951 auto importLIDs_h = importLIDs.view_host();
4952
4953 const map_type& tgtRowMap = *rowMap_;
4954 // Always merge source column indices, since isMerged() is
4955 // per-process state, and we don't know its value on other
4956 // processes that sent us data.
4957 constexpr bool src_is_unique = false;
4958 const bool tgt_is_unique = isMerged();
4959
4960 std::vector<GO> tgtGblColIndsScratch;
4961 size_t offset = 0;
4962 execute_sync_host_uvm_access(); // protect host UVM access
4963 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
4964 // CrsGraph packs just global column indices, while CrsMatrix
4965 // packs bytes (first the number of entries in the row, then the
4966 // global column indices, then other stuff like the matrix
4967 // values in that row).
4968 const LO origSrcNumEnt =
4969 static_cast<LO>(numPacketsPerLID_h[whichImport]);
4970 GO* const srcGblColInds = imports_h.data() + offset;
4971
4972 const LO tgtLclRowInd = importLIDs_h[whichImport];
4973 const GO tgtGblRowInd =
4974 tgtRowMap.getGlobalElement(tgtLclRowInd);
4975 auto tgtGblColInds = getRowGraphGlobalRow(
4976 tgtGblColIndsScratch, *this, tgtGblRowInd);
4977 const size_t origTgtNumEnt(tgtGblColInds.size());
4978
4979 padding->update_import(whichImport, tgtLclRowInd,
4980 tgtGblColInds.getRawPtr(),
4981 origTgtNumEnt, tgt_is_unique,
4982 srcGblColInds,
4983 origSrcNumEnt, src_is_unique);
4984 offset += origSrcNumEnt;
4985 }
4986
4987 if (verbose) {
4988 std::ostringstream os;
4989 os << *prefix << "Done" << endl;
4990 std::cerr << os.str();
4991 }
4992 return padding;
4993}
4994
4995template <class LocalOrdinal, class GlobalOrdinal, class Node>
4996std::unique_ptr<
4997 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4998CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4999 computePaddingForCrsMatrixUnpack(
5000 const Kokkos::DualView<const local_ordinal_type*,
5001 buffer_device_type>& importLIDs,
5002 Kokkos::DualView<char*, buffer_device_type> imports,
5003 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5004 const bool verbose) const {
5005 using Details::PackTraits;
5006 using Details::Impl::getRowGraphGlobalRow;
5007 using std::endl;
5008 using LO = local_ordinal_type;
5009 using GO = global_ordinal_type;
5010 const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
5011
5012 std::unique_ptr<std::string> prefix;
5013 if (verbose) {
5014 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5015 std::ostringstream os;
5016 os << *prefix << "importLIDs.extent(0): "
5017 << importLIDs.extent(0)
5018 << ", imports.extent(0): "
5019 << imports.extent(0)
5020 << ", numPacketsPerLID.extent(0): "
5021 << numPacketsPerLID.extent(0) << endl;
5022 std::cerr << os.str();
5023 }
5024 const bool extraVerbose =
5025 verbose && Details::Behavior::verbose("CrsPadding");
5026
5027 const LO numImports = static_cast<LO>(importLIDs.extent(0));
5028 TEUCHOS_ASSERT(LO(numPacketsPerLID.extent(0)) >= numImports);
5029 const int myRank = [&]() {
5030 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
5031 return comm.is_null() ? -1 : comm->getRank();
5032 }();
5033 std::unique_ptr<padding_type> padding(
5034 new padding_type(myRank, numImports));
5035
5036 if (imports.need_sync_host()) {
5037 imports.sync_host();
5038 }
5039 auto imports_h = imports.view_host();
5040 if (numPacketsPerLID.need_sync_host()) {
5041 numPacketsPerLID.sync_host();
5042 }
5043 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5044
5045 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
5046 auto importLIDs_h = importLIDs.view_host();
5047
5048 const map_type& tgtRowMap = *rowMap_;
5049 // Always merge source column indices, since isMerged() is
5050 // per-process state, and we don't know its value on other
5051 // processes that sent us data.
5052 constexpr bool src_is_unique = false;
5053 const bool tgt_is_unique = isMerged();
5054
5055 std::vector<GO> srcGblColIndsScratch;
5056 std::vector<GO> tgtGblColIndsScratch;
5057 size_t offset = 0;
5058 execute_sync_host_uvm_access(); // protect host UVM access
5059 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5060 // CrsGraph packs just global column indices, while CrsMatrix
5061 // packs bytes (first the number of entries in the row, then the
5062 // global column indices, then other stuff like the matrix
5063 // values in that row).
5064 const size_t numBytes = numPacketsPerLID_h[whichImport];
5065 if (extraVerbose) {
5066 std::ostringstream os;
5067 os << *prefix << "whichImport=" << whichImport
5068 << ", numImports=" << numImports
5069 << ", numBytes=" << numBytes << endl;
5070 std::cerr << os.str();
5071 }
5072 if (numBytes == 0) {
5073 continue; // special case: no entries to unpack for this row
5074 }
5075 LO origSrcNumEnt = 0;
5076 const size_t numEntBeg = offset;
5077 const size_t numEntLen =
5078 PackTraits<LO>::packValueCount(origSrcNumEnt);
5079 TEUCHOS_ASSERT(numBytes >= numEntLen);
5080 TEUCHOS_ASSERT(imports_h.extent(0) >= numEntBeg + numEntLen);
5081 PackTraits<LO>::unpackValue(origSrcNumEnt,
5082 imports_h.data() + numEntBeg);
5083 if (extraVerbose) {
5084 std::ostringstream os;
5085 os << *prefix << "whichImport=" << whichImport
5086 << ", numImports=" << numImports
5087 << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5088 std::cerr << os.str();
5089 }
5090 TEUCHOS_ASSERT(origSrcNumEnt >= LO(0));
5091 TEUCHOS_ASSERT(numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)));
5092 const size_t gidsBeg = numEntBeg + numEntLen;
5093 if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5094 srcGblColIndsScratch.resize(origSrcNumEnt);
5095 }
5096 GO* const srcGblColInds = srcGblColIndsScratch.data();
5097 PackTraits<GO>::unpackArray(srcGblColInds,
5098 imports_h.data() + gidsBeg,
5099 origSrcNumEnt);
5100 const LO tgtLclRowInd = importLIDs_h[whichImport];
5101 const GO tgtGblRowInd =
5102 tgtRowMap.getGlobalElement(tgtLclRowInd);
5103 auto tgtGblColInds = getRowGraphGlobalRow(
5104 tgtGblColIndsScratch, *this, tgtGblRowInd);
5105 const size_t origNumTgtEnt(tgtGblColInds.size());
5106
5107 if (extraVerbose) {
5108 std::ostringstream os;
5109 os << *prefix << "whichImport=" << whichImport
5110 << ", numImports=" << numImports
5111 << ": Call padding->update_import" << endl;
5112 std::cerr << os.str();
5113 }
5114 padding->update_import(whichImport, tgtLclRowInd,
5115 tgtGblColInds.getRawPtr(),
5116 origNumTgtEnt, tgt_is_unique,
5117 srcGblColInds,
5118 origSrcNumEnt, src_is_unique);
5119 offset += numBytes;
5120 }
5121
5122 if (verbose) {
5123 std::ostringstream os;
5124 os << *prefix << "Done" << endl;
5125 std::cerr << os.str();
5126 }
5127 return padding;
5128}
5129
5130template <class LocalOrdinal, class GlobalOrdinal, class Node>
5131void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5132 packAndPrepare(const SrcDistObject& source,
5133 const Kokkos::DualView<const local_ordinal_type*,
5134 buffer_device_type>& exportLIDs,
5135 Kokkos::DualView<packet_type*,
5136 buffer_device_type>& exports,
5137 Kokkos::DualView<size_t*,
5138 buffer_device_type>
5139 numPacketsPerLID,
5140 size_t& constantNumPackets) {
5142 using GO = global_ordinal_type;
5143 using std::endl;
5144 using crs_graph_type =
5145 CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5146 const char tfecfFuncName[] = "packAndPrepare: ";
5147 ProfilingRegion region_papn("Tpetra::CrsGraph::packAndPrepare");
5148
5149 const bool verbose = verbose_;
5150 std::unique_ptr<std::string> prefix;
5151 if (verbose) {
5152 prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5153 std::ostringstream os;
5154 os << *prefix << "Start" << endl;
5155 std::cerr << os.str();
5156 }
5157
5158 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(exportLIDs.extent(0) != numPacketsPerLID.extent(0),
5159 std::runtime_error,
5160 "exportLIDs.extent(0) = " << exportLIDs.extent(0)
5161 << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
5162 << ".");
5163 const row_graph_type* srcRowGraphPtr =
5164 dynamic_cast<const row_graph_type*>(&source);
5165 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(srcRowGraphPtr == nullptr, std::invalid_argument,
5166 "Source of an Export "
5167 "or Import operation to a CrsGraph must be a RowGraph with the same "
5168 "template parameters.");
5169 // We don't check whether src_graph has had fillComplete called,
5170 // because it doesn't matter whether the *source* graph has been
5171 // fillComplete'd. The target graph can not be fillComplete'd yet.
5172 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillComplete(), std::runtime_error,
5173 "The target graph of an Import or Export must not be fill complete.");
5174
5175 const crs_graph_type* srcCrsGraphPtr =
5176 dynamic_cast<const crs_graph_type*>(&source);
5177
5178 if (srcCrsGraphPtr == nullptr) {
5179 using Teuchos::ArrayView;
5180 using LO = local_ordinal_type;
5181
5182 if (verbose) {
5183 std::ostringstream os;
5184 os << *prefix << "Source is a RowGraph but not a CrsGraph"
5185 << endl;
5186 std::cerr << os.str();
5187 }
5188 // RowGraph::pack serves the "old" DistObject interface. It
5189 // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5190 // entails deep-copying the exports buffer on output. RowGraph
5191 // is a convenience interface when not a CrsGraph, so we accept
5192 // the performance hit.
5193 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5194 auto exportLIDs_h = exportLIDs.view_host();
5195 ArrayView<const LO> exportLIDs_av(exportLIDs_h.data(),
5196 exportLIDs_h.extent(0));
5197 Teuchos::Array<GO> exports_a;
5198
5199 numPacketsPerLID.clear_sync_state();
5200 numPacketsPerLID.modify_host();
5201 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5202 ArrayView<size_t> numPacketsPerLID_av(numPacketsPerLID_h.data(),
5203 numPacketsPerLID_h.extent(0));
5204 srcRowGraphPtr->pack(exportLIDs_av, exports_a, numPacketsPerLID_av,
5205 constantNumPackets);
5206 const size_t newSize = static_cast<size_t>(exports_a.size());
5207 if (static_cast<size_t>(exports.extent(0)) != newSize) {
5208 using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5209 exports = exports_dv_type("exports", newSize);
5210 }
5211 Kokkos::View<const packet_type*, Kokkos::HostSpace,
5212 Kokkos::MemoryUnmanaged>
5213 exports_a_h(exports_a.getRawPtr(), newSize);
5214 exports.clear_sync_state();
5215 exports.modify_host();
5216 // DEEP_COPY REVIEW - NOT TESTED
5217 Kokkos::deep_copy(exports.view_host(), exports_a_h);
5218 }
5219 // packCrsGraphNew requires k_rowPtrsPacked_ to be set
5220 else if (!getColMap().is_null() &&
5221 (this->getRowPtrsPackedDevice().extent(0) != 0 ||
5222 getRowMap()->getLocalNumElements() == 0)) {
5223 if (verbose) {
5224 std::ostringstream os;
5225 os << *prefix << "packCrsGraphNew path" << endl;
5226 std::cerr << os.str();
5227 }
5228 using export_pids_type =
5229 Kokkos::DualView<const int*, buffer_device_type>;
5230 export_pids_type exportPIDs; // not filling it; needed for syntax
5231 using LO = local_ordinal_type;
5232 using NT = node_type;
5234 packCrsGraphNew<LO, GO, NT>(*srcCrsGraphPtr, exportLIDs, exportPIDs,
5235 exports, numPacketsPerLID,
5236 constantNumPackets, false);
5237 } else {
5238 srcCrsGraphPtr->packFillActiveNew(exportLIDs, exports, numPacketsPerLID,
5239 constantNumPackets);
5240 }
5241
5242 if (verbose) {
5243 std::ostringstream os;
5244 os << *prefix << "Done" << endl;
5245 std::cerr << os.str();
5246 }
5247}
5248
5249template <class LocalOrdinal, class GlobalOrdinal, class Node>
5251 pack(const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5252 Teuchos::Array<GlobalOrdinal>& exports,
5253 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5254 size_t& constantNumPackets) const {
5255 auto col_map = this->getColMap();
5256 // packCrsGraph requires k_rowPtrsPacked to be set
5257 if (!col_map.is_null() && (this->getRowPtrsPackedDevice().extent(0) != 0 || getRowMap()->getLocalNumElements() == 0)) {
5261 } else {
5262 this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5264 }
5265}
5266
5267template <class LocalOrdinal, class GlobalOrdinal, class Node>
5269 packFillActive(const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5270 Teuchos::Array<GlobalOrdinal>& exports,
5271 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5272 size_t& constantNumPackets) const {
5273 using std::endl;
5274 using LO = LocalOrdinal;
5275 using GO = GlobalOrdinal;
5276 using host_execution_space =
5277 typename Kokkos::View<size_t*, device_type>::
5278 host_mirror_type::execution_space;
5279 const char tfecfFuncName[] = "packFillActive: ";
5280 const bool verbose = verbose_;
5281
5282 const auto numExportLIDs = exportLIDs.size();
5283 std::unique_ptr<std::string> prefix;
5284 if (verbose) {
5285 prefix = this->createPrefix("CrsGraph", "allocateIndices");
5286 std::ostringstream os;
5287 os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5288 std::cerr << os.str();
5289 }
5290 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs != numPacketsPerLID.size(), std::runtime_error,
5291 "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5292 " = "
5293 << numPacketsPerLID.size() << ".");
5294
5295 const map_type& rowMap = *(this->getRowMap());
5296 const map_type* const colMapPtr = this->colMap_.getRawPtr();
5297 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() && colMapPtr == nullptr, std::logic_error,
5298 "This graph claims to be locally indexed, but its column Map is nullptr. "
5299 "This should never happen. Please report this bug to the Tpetra "
5300 "developers.");
5301
5302 // We may pack different amounts of data for different rows.
5303 constantNumPackets = 0;
5304
5305 // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5306 // it might be now, but we might as well be safe).
5307 size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr();
5308 const LO* const exportLIDs_raw = exportLIDs.getRawPtr();
5309
5310 // Count the total number of packets (column indices, in the case
5311 // of a CrsGraph) to pack. While doing so, set
5312 // numPacketsPerLID[i] to the number of entries owned by the
5313 // calling process in (local) row exportLIDs[i] of the graph, that
5314 // the caller wants us to send out.
5315 Kokkos::RangePolicy<host_execution_space, LO> inputRange(0, numExportLIDs);
5316 size_t totalNumPackets = 0;
5317 size_t errCount = 0;
5318 // lambdas turn what they capture const, so we can't
5319 // atomic_add(&errCount,1). Instead, we need a View to modify.
5320 typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5321 host_device_type;
5322 Kokkos::View<size_t, host_device_type> errCountView(&errCount);
5323 constexpr size_t ONE = 1;
5324
5325 execute_sync_host_uvm_access(); // protect host UVM access
5326 Kokkos::parallel_reduce(
5327 "Tpetra::CrsGraph::pack: totalNumPackets",
5328 inputRange,
5329 [=, *this](const LO& i, size_t& curTotalNumPackets) {
5330 const GO gblRow = rowMap.getGlobalElement(exportLIDs_raw[i]);
5331 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid()) {
5332 Kokkos::atomic_add(&errCountView(), ONE);
5333 numPacketsPerLID_raw[i] = 0;
5334 } else {
5335 const size_t numEnt = this->getNumEntriesInGlobalRow(gblRow);
5336 numPacketsPerLID_raw[i] = numEnt;
5337 curTotalNumPackets += numEnt;
5338 }
5339 },
5340 totalNumPackets);
5341
5342 if (verbose) {
5343 std::ostringstream os;
5344 os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5345 std::cerr << os.str();
5346 }
5347 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5348 "totalNumPackets count encountered "
5349 "one or more errors! errCount = "
5350 << errCount
5351 << ", totalNumPackets = " << totalNumPackets << ".");
5352 errCount = 0;
5353
5354 // Allocate space for all the column indices to pack.
5355 exports.resize(totalNumPackets);
5356
5357 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->supportsRowViews(), std::logic_error,
5358 "this->supportsRowViews() returns false; this should never happen. "
5359 "Please report this bug to the Tpetra developers.");
5360
5361 // Loop again over the rows to export, and pack rows of indices
5362 // into the output buffer.
5363
5364 if (verbose) {
5365 std::ostringstream os;
5366 os << *prefix << "Pack into exports" << endl;
5367 std::cerr << os.str();
5368 }
5369
5370 // Teuchos::ArrayView may not be thread safe, or may not be
5371 // efficiently thread safe. Better to use the raw pointer.
5372 GO* const exports_raw = exports.getRawPtr();
5373 errCount = 0;
5374 Kokkos::parallel_scan("Tpetra::CrsGraph::pack: pack from views",
5375 inputRange, [=, &prefix, *this](const LO i, size_t& exportsOffset, const bool final) {
5376 const size_t curOffset = exportsOffset;
5377 const GO gblRow = rowMap.getGlobalElement(exportLIDs_raw[i]);
5378 const RowInfo rowInfo =
5379 this->getRowInfoFromGlobalRowIndex(gblRow);
5380
5381 using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5382 if (rowInfo.localRow == TDO::invalid()) {
5383 if (verbose) {
5384 std::ostringstream os;
5385 os << *prefix << ": INVALID rowInfo: i=" << i
5386 << ", lclRow=" << exportLIDs_raw[i] << endl;
5387 std::cerr << os.str();
5388 }
5389 Kokkos::atomic_add(&errCountView(), ONE);
5390 } else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5391 if (verbose) {
5392 std::ostringstream os;
5393 os << *prefix << ": UH OH! For i=" << i << ", lclRow="
5394 << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5395 "(= "
5396 << curOffset << ") + numEnt (= " << rowInfo.numEntries
5397 << ") > totalNumPackets (= " << totalNumPackets << ")."
5398 << endl;
5399 std::cerr << os.str();
5400 }
5401 Kokkos::atomic_add(&errCountView(), ONE);
5402 } else {
5403 const LO numEnt = static_cast<LO>(rowInfo.numEntries);
5404 if (this->isLocallyIndexed()) {
5405 auto lclColInds = getLocalIndsViewHost(rowInfo);
5406 if (final) {
5407 for (LO k = 0; k < numEnt; ++k) {
5408 const LO lclColInd = lclColInds(k);
5409 const GO gblColInd = colMapPtr->getGlobalElement(lclColInd);
5410 // Pack it, even if it's wrong. Let the receiving
5411 // process deal with it. Otherwise, we'll miss out
5412 // on any correct data.
5413 exports_raw[curOffset + k] = gblColInd;
5414 } // for each entry in the row
5415 } // final pass?
5416 exportsOffset = curOffset + numEnt;
5417 } else if (this->isGloballyIndexed()) {
5418 auto gblColInds = getGlobalIndsViewHost(rowInfo);
5419 if (final) {
5420 for (LO k = 0; k < numEnt; ++k) {
5421 const GO gblColInd = gblColInds(k);
5422 // Pack it, even if it's wrong. Let the receiving
5423 // process deal with it. Otherwise, we'll miss out
5424 // on any correct data.
5425 exports_raw[curOffset + k] = gblColInd;
5426 } // for each entry in the row
5427 } // final pass?
5428 exportsOffset = curOffset + numEnt;
5429 }
5430 // If neither globally nor locally indexed, then the graph
5431 // has no entries in this row (or indeed, in any row on this
5432 // process) to pack.
5433 }
5434 });
5435
5436 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5437 "Packing encountered "
5438 "one or more errors! errCount = "
5439 << errCount
5440 << ", totalNumPackets = " << totalNumPackets << ".");
5441
5442 if (verbose) {
5443 std::ostringstream os;
5444 os << *prefix << "Done" << endl;
5445 std::cerr << os.str();
5446 }
5447}
5448
5449template <class LocalOrdinal, class GlobalOrdinal, class Node>
5450void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5451 packFillActiveNew(const Kokkos::DualView<const local_ordinal_type*,
5452 buffer_device_type>& exportLIDs,
5453 Kokkos::DualView<packet_type*,
5454 buffer_device_type>& exports,
5455 Kokkos::DualView<size_t*,
5456 buffer_device_type>
5457 numPacketsPerLID,
5458 size_t& constantNumPackets) const {
5459 using std::endl;
5460 using LO = local_ordinal_type;
5461 using GO = global_ordinal_type;
5462 using host_execution_space = typename Kokkos::View<size_t*,
5463 device_type>::host_mirror_type::execution_space;
5464 using host_device_type =
5465 Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5466 using exports_dv_type =
5467 Kokkos::DualView<packet_type*, buffer_device_type>;
5468 const char tfecfFuncName[] = "packFillActiveNew: ";
5469 const bool verbose = verbose_;
5470
5471 const auto numExportLIDs = exportLIDs.extent(0);
5472 std::unique_ptr<std::string> prefix;
5473 if (verbose) {
5474 prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
5475 std::ostringstream os;
5476 os << *prefix << "numExportLIDs: " << numExportLIDs
5477 << ", numPacketsPerLID.extent(0): "
5478 << numPacketsPerLID.extent(0) << endl;
5479 std::cerr << os.str();
5480 }
5481 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs != numPacketsPerLID.extent(0), std::runtime_error,
5482 "exportLIDs.extent(0) = " << numExportLIDs
5483 << " != numPacketsPerLID.extent(0) = "
5484 << numPacketsPerLID.extent(0) << ".");
5485 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5486 auto exportLIDs_h = exportLIDs.view_host();
5487
5488 const map_type& rowMap = *(this->getRowMap());
5489 const map_type* const colMapPtr = this->colMap_.getRawPtr();
5490 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() && colMapPtr == nullptr, std::logic_error,
5491 "This graph claims to be locally indexed, but its column Map is nullptr. "
5492 "This should never happen. Please report this bug to the Tpetra "
5493 "developers.");
5494
5495 // We may pack different amounts of data for different rows.
5496 constantNumPackets = 0;
5497
5498 numPacketsPerLID.clear_sync_state();
5499 numPacketsPerLID.modify_host();
5500 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5501
5502 // Count the total number of packets (column indices, in the case
5503 // of a CrsGraph) to pack. While doing so, set
5504 // numPacketsPerLID[i] to the number of entries owned by the
5505 // calling process in (local) row exportLIDs[i] of the graph, that
5506 // the caller wants us to send out.
5507 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5508 range_type inputRange(0, numExportLIDs);
5509 size_t totalNumPackets = 0;
5510 size_t errCount = 0;
5511 // lambdas turn what they capture const, so we can't
5512 // atomic_add(&errCount,1). Instead, we need a View to modify.
5513 Kokkos::View<size_t, host_device_type> errCountView(&errCount);
5514 constexpr size_t ONE = 1;
5515
5516 if (verbose) {
5517 std::ostringstream os;
5518 os << *prefix << "Compute totalNumPackets" << endl;
5519 std::cerr << os.str();
5520 }
5521
5522 execute_sync_host_uvm_access(); // protect host UVM access
5523 totalNumPackets = 0;
5524 for (size_t i = 0; i < numExportLIDs; ++i) {
5525 const LO lclRow = exportLIDs_h[i];
5526 const GO gblRow = rowMap.getGlobalElement(lclRow);
5527 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid()) {
5528 if (verbose) {
5529 std::ostringstream os;
5530 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5531 << " not in row Map on this process" << endl;
5532 std::cerr << os.str();
5533 }
5534 Kokkos::atomic_add(&errCountView(), ONE);
5535 numPacketsPerLID_h(i) = 0;
5536 } else {
5537 const size_t numEnt = this->getNumEntriesInGlobalRow(gblRow);
5538 numPacketsPerLID_h(i) = numEnt;
5539 totalNumPackets += numEnt;
5540 }
5541 }
5542
5543 if (verbose) {
5544 std::ostringstream os;
5545 os << *prefix << "totalNumPackets: " << totalNumPackets
5546 << ", errCount: " << errCount << endl;
5547 std::cerr << os.str();
5548 }
5549 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5550 "totalNumPackets count encountered "
5551 "one or more errors! totalNumPackets: "
5552 << totalNumPackets
5553 << ", errCount: " << errCount << ".");
5554
5555 // Allocate space for all the column indices to pack.
5556 if (size_t(exports.extent(0)) < totalNumPackets) {
5557 // FIXME (mfh 09 Apr 2019) Create without initializing.
5558 exports = exports_dv_type("exports", totalNumPackets);
5559 }
5560
5561 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->supportsRowViews(), std::logic_error,
5562 "this->supportsRowViews() returns false; this should never happen. "
5563 "Please report this bug to the Tpetra developers.");
5564
5565 // Loop again over the rows to export, and pack rows of indices
5566 // into the output buffer.
5567
5568 if (verbose) {
5569 std::ostringstream os;
5570 os << *prefix << "Pack into exports buffer" << endl;
5571 std::cerr << os.str();
5572 }
5573
5574 exports.clear_sync_state();
5575 exports.modify_host();
5576 auto exports_h = exports.view_host();
5577
5578 errCount = 0;
5579
5580 // The following parallel_scan needs const host access to lclIndsUnpacked_wdv
5581 // (if locally indexed) or gblInds_wdv (if globally indexed).
5582 if (isLocallyIndexed())
5583 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5584 else if (isGloballyIndexed())
5585 gblInds_wdv.getHostView(Access::ReadOnly);
5586
5588 Kokkos::parallel_scan("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5589 inputRange, [=, &prefix, *this](const LO i, size_t& exportsOffset, const bool final) {
5590 const size_t curOffset = exportsOffset;
5591 const LO lclRow = exportLIDs_h(i);
5592 const GO gblRow = rowMap.getGlobalElement(lclRow);
5593 if (gblRow == Details::OrdinalTraits<GO>::invalid()) {
5594 if (verbose) {
5595 std::ostringstream os;
5596 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5597 << " not in row Map on this process" << endl;
5598 std::cerr << os.str();
5599 }
5600 Kokkos::atomic_add(&errCountView(), ONE);
5601 return;
5602 }
5603
5604 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(gblRow);
5605 if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid()) {
5606 if (verbose) {
5607 std::ostringstream os;
5608 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5609 << ", gblRow=" << gblRow << ": invalid rowInfo"
5610 << endl;
5611 std::cerr << os.str();
5612 }
5613 Kokkos::atomic_add(&errCountView(), ONE);
5614 return;
5615 }
5616
5617 if (curOffset + rowInfo.numEntries > totalNumPackets) {
5618 if (verbose) {
5619 std::ostringstream os;
5620 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5621 << ", gblRow=" << gblRow << ", curOffset (= "
5622 << curOffset << ") + numEnt (= " << rowInfo.numEntries
5623 << ") > totalNumPackets (= " << totalNumPackets
5624 << ")." << endl;
5625 std::cerr << os.str();
5626 }
5627 Kokkos::atomic_add(&errCountView(), ONE);
5628 return;
5629 }
5630
5631 const LO numEnt = static_cast<LO>(rowInfo.numEntries);
5632 if (this->isLocallyIndexed()) {
5633 auto lclColInds = getLocalIndsViewHost(rowInfo);
5634 if (final) {
5635 for (LO k = 0; k < numEnt; ++k) {
5636 const LO lclColInd = lclColInds(k);
5637 const GO gblColInd = colMapPtr->getGlobalElement(lclColInd);
5638 // Pack it, even if it's wrong. Let the receiving
5639 // process deal with it. Otherwise, we'll miss out
5640 // on any correct data.
5641 exports_h(curOffset + k) = gblColInd;
5642 } // for each entry in the row
5643 } // final pass?
5644 exportsOffset = curOffset + numEnt;
5645 } else if (this->isGloballyIndexed()) {
5646 auto gblColInds = getGlobalIndsViewHost(rowInfo);
5647 if (final) {
5648 for (LO k = 0; k < numEnt; ++k) {
5649 const GO gblColInd = gblColInds(k);
5650 // Pack it, even if it's wrong. Let the receiving
5651 // process deal with it. Otherwise, we'll miss out
5652 // on any correct data.
5653 exports_h(curOffset + k) = gblColInd;
5654 } // for each entry in the row
5655 } // final pass?
5656 exportsOffset = curOffset + numEnt;
5657 }
5658 // If neither globally nor locally indexed, then the graph
5659 // has no entries in this row (or indeed, in any row on this
5660 // process) to pack.
5661 });
5663
5664 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5665 // (errCount != 0, std::logic_error, "Packing encountered "
5666 // "one or more errors! errCount = " << errCount
5667 // << ", totalNumPackets = " << totalNumPackets << ".");
5668
5669 if (verbose) {
5670 std::ostringstream os;
5671 os << *prefix << "errCount=" << errCount << "; Done" << endl;
5672 std::cerr << os.str();
5673 }
5674}
5675
5676template <class LocalOrdinal, class GlobalOrdinal, class Node>
5678 unpackAndCombine(const Kokkos::DualView<const local_ordinal_type*,
5680 Kokkos::DualView<packet_type*,
5682 imports,
5683 Kokkos::DualView<size_t*,
5686 const size_t /* constantNumPackets */,
5687 const CombineMode /* combineMode */) {
5689 using std::endl;
5690 using LO = local_ordinal_type;
5691 using GO = global_ordinal_type;
5692 const char tfecfFuncName[] = "unpackAndCombine";
5693
5694 ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
5695 const bool verbose = verbose_;
5696
5697 std::unique_ptr<std::string> prefix;
5698 if (verbose) {
5699 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5700 std::ostringstream os;
5701 os << *prefix << "Start" << endl;
5702 std::cerr << os.str();
5703 }
5704 {
5705 auto padding = computeCrsPaddingForImports(
5706 importLIDs, imports, numPacketsPerLID, verbose);
5707 applyCrsPadding(*padding, verbose);
5708 if (verbose) {
5709 std::ostringstream os;
5710 os << *prefix << "Done computing & applying padding" << endl;
5711 std::cerr << os.str();
5712 }
5713 }
5714
5715 // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
5716 // reasonable meaning, whether or not the matrix is fill complete.
5717 // It's just more work to implement.
5718
5719 // We are not checking the value of the CombineMode input
5720 // argument. For CrsGraph, we only support import/export
5721 // operations if fillComplete has not yet been called. Any
5722 // incoming column-indices are inserted into the target graph. In
5723 // this context, CombineMode values of ADD vs INSERT are
5724 // equivalent. What is the meaning of REPLACE for CrsGraph? If a
5725 // duplicate column-index is inserted, it will be compressed out
5726 // when fillComplete is called.
5727 //
5728 // Note: I think REPLACE means that an existing row is replaced by
5729 // the imported row, i.e., the existing indices are cleared. CGB,
5730 // 6/17/2010
5731
5733 std::runtime_error, ": importLIDs.extent(0) = " << importLIDs.extent(0) << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0) << ".");
5734 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillComplete(), std::runtime_error,
5735 ": Import or Export operations are not allowed on a target "
5736 "CrsGraph that is fillComplete.");
5737
5738 const size_t numImportLIDs(importLIDs.extent(0));
5739 if (numPacketsPerLID.need_sync_host()) {
5740 numPacketsPerLID.sync_host();
5741 }
5742 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5743 if (imports.need_sync_host()) {
5744 imports.sync_host();
5745 }
5746 auto imports_h = imports.view_host();
5747 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
5748 auto importLIDs_h = importLIDs.view_host();
5749
5750 // If we're inserting in local indices, let's pre-allocate
5751 Teuchos::Array<LO> lclColInds;
5752 if (isLocallyIndexed()) {
5753 if (verbose) {
5754 std::ostringstream os;
5755 os << *prefix << "Preallocate local indices scratch" << endl;
5756 std::cerr << os.str();
5757 }
5758 size_t maxNumInserts = 0;
5759 for (size_t i = 0; i < numImportLIDs; ++i) {
5761 }
5762 if (verbose) {
5763 std::ostringstream os;
5764 os << *prefix << "Local indices scratch size: "
5765 << maxNumInserts << endl;
5766 std::cerr << os.str();
5767 }
5768 lclColInds.resize(maxNumInserts);
5769 } else {
5770 if (verbose) {
5771 std::ostringstream os;
5772 os << *prefix;
5773 if (isGloballyIndexed()) {
5774 os << "Graph is globally indexed";
5775 } else {
5776 os << "Graph is neither locally nor globally indexed";
5777 }
5778 os << endl;
5779 std::cerr << os.str();
5780 }
5781 }
5782
5783 TEUCHOS_ASSERT(!rowMap_.is_null());
5784 const map_type& rowMap = *rowMap_;
5785
5786 try {
5787 size_t importsOffset = 0;
5788 for (size_t i = 0; i < numImportLIDs; ++i) {
5789 if (verbose) {
5790 std::ostringstream os;
5791 os << *prefix << "i=" << i << ", numImportLIDs="
5792 << numImportLIDs << endl;
5793 std::cerr << os.str();
5794 }
5795 // We can only unpack into owned rows, since we only have
5796 // local row indices.
5797 const LO lclRow = importLIDs_h[i];
5798 const GO gblRow = rowMap.getGlobalElement(lclRow);
5799 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
5800 std::logic_error, "importLIDs[i=" << i << "]=" << lclRow << " is not in the row Map on the calling "
5801 "process.");
5802 const LO numEnt = numPacketsPerLID_h[i];
5803 const GO* const gblColInds = (numEnt == 0) ? nullptr : imports_h.data() + importsOffset;
5804 if (!isLocallyIndexed()) {
5805 insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
5806 } else {
5807 // FIXME (mfh 09 Feb 2020) Now would be a good time to do
5808 // column Map filtering.
5809 for (LO j = 0; j < numEnt; j++) {
5810 lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
5811 }
5812 insertLocalIndices(lclRow, numEnt, lclColInds.data());
5813 }
5815 }
5816 } catch (std::exception& e) {
5817 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
5818 "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
5819 "exception: "
5820 << endl
5821 << e.what());
5822 }
5823
5824 if (verbose) {
5825 std::ostringstream os;
5826 os << *prefix << "Done" << endl;
5827 std::cerr << os.str();
5828 }
5829}
5830
5831template <class LocalOrdinal, class GlobalOrdinal, class Node>
5833 removeEmptyProcessesInPlace(const Teuchos::RCP<const map_type>& newMap) {
5834 using Teuchos::Comm;
5835 using Teuchos::null;
5836 using Teuchos::ParameterList;
5837 using Teuchos::RCP;
5838
5839 // We'll set all the state "transactionally," so that this method
5840 // satisfies the strong exception guarantee. This object's state
5841 // won't be modified until the end of this method.
5845
5846 rowMap = newMap;
5848 (newMap.is_null()) ? null : newMap->getComm();
5849
5850 if (!domainMap_.is_null()) {
5851 if (domainMap_.getRawPtr() == rowMap_.getRawPtr()) {
5852 // Common case: original domain and row Maps are identical.
5853 // In that case, we need only replace the original domain Map
5854 // with the new Map. This ensures that the new domain and row
5855 // Maps _stay_ identical.
5856 domainMap = newMap;
5857 } else {
5858 domainMap = domainMap_->replaceCommWithSubset(newComm);
5859 }
5860 }
5861 if (!rangeMap_.is_null()) {
5862 if (rangeMap_.getRawPtr() == rowMap_.getRawPtr()) {
5863 // Common case: original range and row Maps are identical. In
5864 // that case, we need only replace the original range Map with
5865 // the new Map. This ensures that the new range and row Maps
5866 // _stay_ identical.
5867 rangeMap = newMap;
5868 } else {
5869 rangeMap = rangeMap_->replaceCommWithSubset(newComm);
5870 }
5871 }
5872 if (!colMap_.is_null()) {
5873 colMap = colMap_->replaceCommWithSubset(newComm);
5874 }
5875
5876 // (Re)create the Export and / or Import if necessary.
5877 if (!newComm.is_null()) {
5878 RCP<ParameterList> params = this->getNonconstParameterList(); // could be null
5879 //
5880 // The operations below are collective on the new communicator.
5881 //
5882 // (Re)create the Export object if necessary. If I haven't
5883 // called fillComplete yet, I don't have a rangeMap, so I must
5884 // first check if the _original_ rangeMap is not null. Ditto
5885 // for the Import object and the domain Map.
5886 if (!rangeMap_.is_null() &&
5887 rangeMap != rowMap &&
5888 !rangeMap->isSameAs(*rowMap)) {
5889 if (params.is_null() || !params->isSublist("Export")) {
5891 } else {
5894 }
5895 }
5896 // (Re)create the Import object if necessary.
5897 if (!domainMap_.is_null() &&
5898 domainMap != colMap &&
5899 !domainMap->isSameAs(*colMap)) {
5900 if (params.is_null() || !params->isSublist("Import")) {
5902 } else {
5905 }
5906 }
5907 } // if newComm is not null
5908
5909 // Defer side effects until the end. If no destructors throw
5910 // exceptions (they shouldn't anyway), then this method satisfies
5911 // the strong exception guarantee.
5912 exporter_ = exporter;
5913 importer_ = importer;
5914 rowMap_ = rowMap;
5915 // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
5916 // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
5917 // the same object. We might want to get rid of this redundant
5918 // pointer sometime, but for now, we'll leave it alone and just
5919 // set map_ to the same object.
5920 this->map_ = rowMap;
5921 domainMap_ = domainMap;
5922 rangeMap_ = rangeMap;
5923 colMap_ = colMap;
5924}
5925
5926template <class LocalOrdinal, class GlobalOrdinal, class Node>
5928 getLocalDiagOffsets(const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const {
5929 using std::endl;
5930 using LO = LocalOrdinal;
5931 using GO = GlobalOrdinal;
5932 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
5933 const bool verbose = verbose_;
5934
5935 std::unique_ptr<std::string> prefix;
5936 if (verbose) {
5937 prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
5938 std::ostringstream os;
5939 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
5940 << endl;
5941 std::cerr << os.str();
5942 }
5943
5944 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!hasColMap(), std::runtime_error, "The graph must have a column Map.");
5945 const LO lclNumRows = static_cast<LO>(this->getLocalNumRows());
5946 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<LO>(offsets.extent(0)) < lclNumRows,
5947 std::invalid_argument, "offsets.extent(0) = " << offsets.extent(0) << " < getLocalNumRows() = " << lclNumRows << ".");
5948
5949 const map_type& rowMap = *(this->getRowMap());
5950 const map_type& colMap = *(this->getColMap());
5951
5952 // We only use these in debug mode, but since debug mode is a
5953 // run-time option, they need to exist here. That's why we create
5954 // the vector with explicit size zero, to avoid overhead if debug
5955 // mode is off.
5956 bool allRowMapDiagEntriesInColMap = true;
5957 bool allDiagEntriesFound = true;
5958 bool allOffsetsCorrect = true;
5959 bool noOtherWeirdness = true;
5960 using wrong_offsets_type = std::vector<std::pair<LO, size_t>>;
5962
5963 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
5964 // the subset of Map functionality that we need below.
5965 auto lclRowMap = rowMap.getLocalMap();
5966 auto lclColMap = colMap.getLocalMap();
5967
5968 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
5969 // setup, at least on the host. For CUDA, we have to use LocalMap
5970 // (that comes from each of the two Maps).
5971
5972 const bool sorted = this->isSorted();
5973 if (isFillComplete()) {
5974 auto lclGraph = this->getLocalGraphDevice();
5975 ::Tpetra::Details::getGraphDiagOffsets(offsets, lclRowMap, lclColMap,
5976 lclGraph.row_map,
5977 lclGraph.entries, sorted);
5978 } else {
5979 // NOTE (mfh 22 Feb 2017): We have to run this code on host,
5980 // since the graph is not fill complete. The previous version
5981 // of this code assumed UVM; this version does not.
5982 auto offsets_h = Kokkos::create_mirror_view(offsets);
5983
5984 for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
5985 // Find the diagonal entry. Since the row Map and column Map
5986 // may differ, we have to compare global row and column
5987 // indices, not local.
5988 const GO gblRowInd = lclRowMap.getGlobalElement(lclRowInd);
5989 const GO gblColInd = gblRowInd;
5990 const LO lclColInd = lclColMap.getLocalElement(gblColInd);
5991
5992 if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid()) {
5994 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid();
5995 } else {
5996 const RowInfo rowInfo = this->getRowInfo(lclRowInd);
5997 if (static_cast<LO>(rowInfo.localRow) == lclRowInd &&
5998 rowInfo.numEntries > 0) {
5999 auto colInds = this->getLocalIndsViewHost(rowInfo);
6000 const size_t hint = 0; // not needed for this algorithm
6001 const size_t offset =
6002 KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
6005
6006 if (debug_) {
6007 // Now that we have what we think is an offset, make sure
6008 // that it really does point to the diagonal entry. Offsets
6009 // are _relative_ to each row, not absolute (for the whole
6010 // (local) graph).
6011 typename local_inds_dualv_type::t_host::const_type lclColInds;
6012 try {
6013 lclColInds = this->getLocalIndsViewHost(rowInfo);
6014 } catch (...) {
6015 noOtherWeirdness = false;
6016 }
6017 // Don't continue with error checking if the above failed.
6018 if (noOtherWeirdness) {
6019 const size_t numEnt = lclColInds.extent(0);
6020 if (offset >= numEnt) {
6021 // Offsets are relative to each row, so this means that
6022 // the offset is out of bounds.
6023 allOffsetsCorrect = false;
6024 wrongOffsets.push_back(std::make_pair(lclRowInd, offset));
6025 } else {
6026 const LO actualLclColInd = lclColInds(offset);
6027 const GO actualGblColInd = lclColMap.getGlobalElement(actualLclColInd);
6028 if (actualGblColInd != gblColInd) {
6029 allOffsetsCorrect = false;
6030 wrongOffsets.push_back(std::make_pair(lclRowInd, offset));
6031 }
6032 }
6033 }
6034 } // debug_
6035 } else { // either row is empty, or something went wrong w/ getRowInfo()
6036 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid();
6037 allDiagEntriesFound = false;
6038 }
6039 } // whether lclColInd is a valid local column index
6040 } // for each local row
6041 // DEEP_COPY REVIEW - NOT TESTED
6042 Kokkos::deep_copy(offsets, offsets_h);
6043 } // whether the graph is fill complete
6044
6045 if (verbose && wrongOffsets.size() != 0) {
6046 std::ostringstream os;
6047 os << *prefix << "Wrong offsets: [";
6048 for (size_t k = 0; k < wrongOffsets.size(); ++k) {
6049 os << "(" << wrongOffsets[k].first << ","
6050 << wrongOffsets[k].second << ")";
6051 if (k + 1 < wrongOffsets.size()) {
6052 os << ", ";
6053 }
6054 }
6055 os << "]" << endl;
6056 std::cerr << os.str();
6057 }
6058
6059 if (debug_) {
6060 using std::endl;
6061 using Teuchos::reduceAll;
6062 Teuchos::RCP<const Teuchos::Comm<int>> comm = this->getComm();
6063 const bool localSuccess =
6065 const int numResults = 5;
6066 int lclResults[5];
6068 lclResults[1] = allDiagEntriesFound ? 1 : 0;
6069 lclResults[2] = allOffsetsCorrect ? 1 : 0;
6070 lclResults[3] = noOtherWeirdness ? 1 : 0;
6071 // min-all-reduce will compute least rank of all the processes
6072 // that didn't succeed.
6073 lclResults[4] = !localSuccess ? comm->getRank() : comm->getSize();
6074
6075 int gblResults[5];
6076 gblResults[0] = 0;
6077 gblResults[1] = 0;
6078 gblResults[2] = 0;
6079 gblResults[3] = 0;
6080 gblResults[4] = 0;
6081 reduceAll<int, int>(*comm, Teuchos::REDUCE_MIN,
6083
6084 if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1 || gblResults[3] != 1) {
6085 std::ostringstream os; // build error message
6086 os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6087 "possibly among others): "
6088 << endl;
6089 if (gblResults[0] == 0) {
6090 os << " - The column Map does not contain at least one diagonal entry "
6091 "of the graph."
6092 << endl;
6093 }
6094 if (gblResults[1] == 0) {
6095 os << " - On one or more processes, some row does not contain a "
6096 "diagonal entry."
6097 << endl;
6098 }
6099 if (gblResults[2] == 0) {
6100 os << " - On one or more processes, some offsets are incorrect."
6101 << endl;
6102 }
6103 if (gblResults[3] == 0) {
6104 os << " - One or more processes had some other error."
6105 << endl;
6106 }
6107 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6108 }
6109 } // debug_
6110}
6111
6112template <class LocalOrdinal, class GlobalOrdinal, class Node>
6114 getLocalOffRankOffsets(offset_device_view_type& offsets) const {
6115 using std::endl;
6116 const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
6117 const bool verbose = verbose_;
6118
6119 std::unique_ptr<std::string> prefix;
6120 if (verbose) {
6121 prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
6122 std::ostringstream os;
6123 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6124 << endl;
6125 std::cerr << os.str();
6126 }
6127
6128 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!hasColMap(), std::runtime_error, "The graph must have a column Map.");
6129 // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.
6130
6131 const size_t lclNumRows = this->getLocalNumRows();
6132
6133 if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows + 1) {
6134 offsets = k_offRankOffsets_;
6135 return;
6136 }
6137 haveLocalOffRankOffsets_ = false;
6138
6139 const map_type& colMap = *(this->getColMap());
6140 const map_type& domMap = *(this->getDomainMap());
6141
6142 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6143 // the subset of Map functionality that we need below.
6144 auto lclColMap = colMap.getLocalMap();
6145 auto lclDomMap = domMap.getLocalMap();
6146
6147 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6148 // setup, at least on the host. For CUDA, we have to use LocalMap
6149 // (that comes from each of the two Maps).
6150
6151 TEUCHOS_ASSERT(this->isSorted());
6152 if (isFillComplete()) {
6153 k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows + 1);
6154 auto lclGraph = this->getLocalGraphDevice();
6155 ::Tpetra::Details::getGraphOffRankOffsets(k_offRankOffsets_,
6157 lclGraph);
6158 offsets = k_offRankOffsets_;
6159 haveLocalOffRankOffsets_ = true;
6160 } else {
6161 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, "Can't get off-rank offsets for non-fill-complete graph");
6162 }
6163}
6164
6165namespace { // (anonymous)
6166
6167// mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6168// below). The point is to avoid the deep copy between the input
6169// Teuchos::ArrayRCP and the internally used Kokkos::View. We
6170// can't use UVM to avoid the deep copy with CUDA, because the
6171// ArrayRCP is a host pointer, while the input to the graph's
6172// getLocalDiagOffsets method is a device pointer. Assigning a
6173// host pointer to a device pointer is incorrect unless the host
6174// pointer points to host pinned memory. The goal is to get rid
6175// of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6176// copy for backwards compatibility.
6177//
6178// We have to use template magic because
6179// "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6180// if device_type::memory_space is not Kokkos::HostSpace (as is
6181// the case with CUDA).
6182
6183template <class DeviceType,
6184 const bool memSpaceIsHostSpace =
6185 std::is_same<typename DeviceType::memory_space,
6186 Kokkos::HostSpace>::value>
6187struct HelpGetLocalDiagOffsets {};
6188
6189template <class DeviceType>
6190struct HelpGetLocalDiagOffsets<DeviceType, true> {
6191 typedef DeviceType device_type;
6192 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6193 Kokkos::MemoryUnmanaged>
6194 device_offsets_type;
6195 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6196 Kokkos::MemoryUnmanaged>
6197 host_offsets_type;
6198
6199 static device_offsets_type
6200 getDeviceOffsets(const host_offsets_type& hostOffsets) {
6201 // Host and device are the same; no need to allocate a
6202 // temporary device View.
6203 return hostOffsets;
6204 }
6205
6206 static void
6207 copyBackIfNeeded(const host_offsets_type& /* hostOffsets */,
6208 const device_offsets_type& /* deviceOffsets */) { /* copy back not needed; host and device are the same */
6209 }
6210};
6211
6212template <class DeviceType>
6213struct HelpGetLocalDiagOffsets<DeviceType, false> {
6214 typedef DeviceType device_type;
6215 // We have to do a deep copy, since host memory space != device
6216 // memory space. Thus, the device View is managed (we need to
6217 // allocate a temporary device View).
6218 typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6219 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6220 Kokkos::MemoryUnmanaged>
6221 host_offsets_type;
6222
6223 static device_offsets_type
6224 getDeviceOffsets(const host_offsets_type& hostOffsets) {
6225 // Host memory space != device memory space, so we must
6226 // allocate a temporary device View for the graph.
6227 return device_offsets_type("offsets", hostOffsets.extent(0));
6228 }
6229
6230 static void
6231 copyBackIfNeeded(const host_offsets_type& hostOffsets,
6232 const device_offsets_type& deviceOffsets) {
6233 // DEEP_COPY REVIEW - NOT TESTED
6234 Kokkos::deep_copy(hostOffsets, deviceOffsets);
6235 }
6236};
6237} // namespace
6238
6239template <class LocalOrdinal, class GlobalOrdinal, class Node>
6241 getLocalDiagOffsets(Teuchos::ArrayRCP<size_t>& offsets) const {
6242 typedef LocalOrdinal LO;
6243 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6244 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap(), std::runtime_error,
6245 "The graph does not yet have a column Map.");
6246 const LO myNumRows = static_cast<LO>(this->getLocalNumRows());
6247 if (static_cast<LO>(offsets.size()) != myNumRows) {
6248 // NOTE (mfh 21 Jan 2016) This means that the method does not
6249 // satisfy the strong exception guarantee (no side effects
6250 // unless successful).
6251 offsets.resize(myNumRows);
6252 }
6253
6254 // mfh 21 Jan 2016: This method unfortunately takes a
6255 // Teuchos::ArrayRCP, which is host memory. The graph wants a
6256 // device pointer. We can't access host memory from the device;
6257 // that's the wrong direction for UVM. (It's the right direction
6258 // for inefficient host pinned memory, but we don't want to use
6259 // that here.) Thus, if device memory space != host memory space,
6260 // we allocate and use a temporary device View to get the offsets.
6261 // If the two spaces are equal, the template magic makes the deep
6262 // copy go away.
6264 typedef typename helper_type::host_offsets_type host_offsets_type;
6265 // Unmanaged host View that views the output array.
6266 host_offsets_type hostOffsets(offsets.getRawPtr(), myNumRows);
6267 // Allocate temp device View if host != device, else reuse host array.
6268 auto deviceOffsets = helper_type::getDeviceOffsets(hostOffsets);
6269 // NOT recursion; this calls the overload that takes a device View.
6270 this->getLocalDiagOffsets(deviceOffsets);
6271 helper_type::copyBackIfNeeded(hostOffsets, deviceOffsets);
6272}
6273
6274template <class LocalOrdinal, class GlobalOrdinal, class Node>
6276 supportsRowViews() const {
6277 return true;
6278}
6279
6280template <class LocalOrdinal, class GlobalOrdinal, class Node>
6283 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6284 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>>& domainTransfer,
6285 const Teuchos::RCP<const map_type>& domainMap,
6286 const Teuchos::RCP<const map_type>& rangeMap,
6287 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6288 using std::string;
6289 using Teuchos::ArrayRCP;
6290 using Teuchos::ArrayView;
6291 using Teuchos::Comm;
6292 using Teuchos::ParameterList;
6293 using Teuchos::rcp;
6294 using Teuchos::RCP;
6299
6300 using LO = LocalOrdinal;
6301 using GO = GlobalOrdinal;
6302 using NT = node_type;
6305
6306 const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6307
6308 auto MM = rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC Pack-1"));
6309
6310 // Make sure that the input argument rowTransfer is either an
6311 // Import or an Export. Import and Export are the only two
6312 // subclasses of Transfer that we defined, but users might
6313 // (unwisely, for now at least) decide to implement their own
6314 // subclasses. Exclude this possibility.
6315 const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
6316 const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
6318 xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
6319 prefix << "The 'rowTransfer' input argument must be either an Import or "
6320 "an Export, and its template parameters must match the corresponding "
6321 "template parameters of the CrsGraph.");
6322
6323 // Make sure that the input argument domainTransfer is either an
6324 // Import or an Export. Import and Export are the only two
6325 // subclasses of Transfer that we defined, but users might
6326 // (unwisely, for now at least) decide to implement their own
6327 // subclasses. Exclude this possibility.
6328 Teuchos::RCP<const import_type> xferDomainAsImport =
6329 Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
6330 Teuchos::RCP<const export_type> xferDomainAsExport =
6331 Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
6332
6333 if (!domainTransfer.is_null()) {
6335 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6336 prefix << "The 'domainTransfer' input argument must be either an "
6337 "Import or an Export, and its template parameters must match the "
6338 "corresponding template parameters of the CrsGraph.");
6339
6341 (xferAsImport != nullptr || !xferDomainAsImport.is_null()) &&
6342 ((xferAsImport != nullptr && xferDomainAsImport.is_null()) ||
6343 (xferAsImport == nullptr && !xferDomainAsImport.is_null())),
6344 std::invalid_argument,
6345 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6346 "must be of the same type (either Import or Export).");
6347
6349 (xferAsExport != nullptr || !xferDomainAsExport.is_null()) &&
6350 ((xferAsExport != nullptr && xferDomainAsExport.is_null()) ||
6351 (xferAsExport == nullptr && !xferDomainAsExport.is_null())),
6352 std::invalid_argument,
6353 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6354 "must be of the same type (either Import or Export).");
6355
6356 } // domainTransfer != null
6357
6358 // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6359 // if the source Map is not distributed but the target Map is?
6360 const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6361
6362 //
6363 // Get the caller's parameters
6364 //
6365
6366 bool reverseMode = false; // Are we in reverse mode?
6367 bool restrictComm = false; // Do we need to restrict the communicator?
6368 RCP<ParameterList> graphparams; // parameters for the destination graph
6369 if (!params.is_null()) {
6370 reverseMode = params->get("Reverse Mode", reverseMode);
6371 restrictComm = params->get("Restrict Communicator", restrictComm);
6372 graphparams = sublist(params, "CrsGraph");
6373 }
6374
6375 // Get the new domain and range Maps. We need some of them for error
6376 // checking, now that we have the reverseMode parameter.
6377 RCP<const map_type> MyRowMap = reverseMode ? rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6378 RCP<const map_type> MyColMap; // create this below
6379 RCP<const map_type> MyDomainMap = !domainMap.is_null() ? domainMap : getDomainMap();
6380 RCP<const map_type> MyRangeMap = !rangeMap.is_null() ? rangeMap : getRangeMap();
6381 RCP<const map_type> BaseRowMap = MyRowMap;
6382 RCP<const map_type> BaseDomainMap = MyDomainMap;
6383
6384 // If the user gave us a nonnull destGraph, then check whether it's
6385 // "pristine." That means that it has no entries.
6386 //
6387 // FIXME (mfh 15 May 2014) If this is not true on all processes,
6388 // then this exception test may hang. It would be better to
6389 // forward an error flag to the next communication phase.
6390 if (!destGraph.is_null()) {
6391 // FIXME (mfh 15 May 2014): The Epetra idiom for checking
6392 // whether a graph or matrix has no entries on the calling
6393 // process, is that it is neither locally nor globally indexed.
6394 // This may change eventually with the Kokkos refactor version
6395 // of Tpetra, so it would be better just to check the quantity
6396 // of interest directly. Note that with the Kokkos refactor
6397 // version of Tpetra, asking for the total number of entries in
6398 // a graph or matrix that is not fill complete might require
6399 // computation (kernel launch), since it is not thread scalable
6400 // to update a count every time an entry is inserted.
6401 const bool NewFlag =
6402 !destGraph->isLocallyIndexed() && !destGraph->isGloballyIndexed();
6403 TEUCHOS_TEST_FOR_EXCEPTION(!NewFlag, std::invalid_argument,
6404 prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
6405 "if its graph is empty (neither locally nor globally indexed).");
6406
6407 // FIXME (mfh 15 May 2014) At some point, we want to change
6408 // graphs and matrices so that their DistObject Map
6409 // (this->getMap()) may differ from their row Map. This will
6410 // make redistribution for 2-D distributions more efficient. I
6411 // hesitate to change this check, because I'm not sure how much
6412 // the code here depends on getMap() and getRowMap() being the
6413 // same.
6414 TEUCHOS_TEST_FOR_EXCEPTION(
6415 !destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6416 prefix << "The (row) Map of the input argument 'destGraph' is not the "
6417 "same as the (row) Map specified by the input argument 'rowTransfer'.");
6418
6419 TEUCHOS_TEST_FOR_EXCEPTION(
6420 !destGraph->checkSizes(*this), std::invalid_argument,
6421 prefix << "You provided a nonnull destination graph, but checkSizes() "
6422 "indicates that it is not a legal legal target for redistribution from "
6423 "the source graph (*this). This may mean that they do not have the "
6424 "same dimensions.");
6425 }
6426
6427 // If forward mode (the default), then *this's (row) Map must be
6428 // the same as the source Map of the Transfer. If reverse mode,
6429 // then *this's (row) Map must be the same as the target Map of
6430 // the Transfer.
6431 //
6432 // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6433 // and matrices so that their DistObject Map (this->getMap()) may
6434 // differ from their row Map. This will make redistribution for
6435 // 2-D distributions more efficient. I hesitate to change this
6436 // check, because I'm not sure how much the code here depends on
6437 // getMap() and getRowMap() being the same.
6438 TEUCHOS_TEST_FOR_EXCEPTION(
6439 !(reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6440 std::invalid_argument, prefix << "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6441
6442 TEUCHOS_TEST_FOR_EXCEPTION(
6443 !(!reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6444 std::invalid_argument, prefix << "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6445
6446 // checks for domainTransfer
6447 TEUCHOS_TEST_FOR_EXCEPTION(
6448 !xferDomainAsImport.is_null() && !xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
6449 std::invalid_argument,
6450 prefix << "The target map of the 'domainTransfer' input argument must be "
6451 "the same as the rebalanced domain map 'domainMap'");
6452
6453 TEUCHOS_TEST_FOR_EXCEPTION(
6454 !xferDomainAsExport.is_null() && !xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
6455 std::invalid_argument,
6456 prefix << "The source map of the 'domainTransfer' input argument must be "
6457 "the same as the rebalanced domain map 'domainMap'");
6458
6459 // The basic algorithm here is:
6460 //
6461 // 1. Call the moral equivalent of "Distor.do" to handle the import.
6462 // 2. Copy all the Imported and Copy/Permuted data into the raw
6463 // CrsGraph pointers, still using GIDs.
6464 // 3. Call an optimized version of MakeColMap that avoids the
6465 // Directory lookups (since the importer knows who owns all the
6466 // GIDs) AND reindexes to LIDs.
6467 // 4. Call expertStaticFillComplete()
6468
6469 // Get information from the Importer
6470 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6471 ArrayView<const LO> ExportLIDs = reverseMode ? rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
6472 ArrayView<const LO> RemoteLIDs = reverseMode ? rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
6473 ArrayView<const LO> PermuteToLIDs = reverseMode ? rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
6474 ArrayView<const LO> PermuteFromLIDs = reverseMode ? rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
6475 Distributor& Distor = rowTransfer.getDistributor();
6476
6477 // Owning PIDs
6478 Teuchos::Array<int> SourcePids;
6479 Teuchos::Array<int> TargetPids;
6480 int MyPID = getComm()->getRank();
6481
6482 // Temp variables for sub-communicators
6483 RCP<const map_type> ReducedRowMap, ReducedColMap,
6484 ReducedDomainMap, ReducedRangeMap;
6485 RCP<const Comm<int>> ReducedComm;
6486
6487 // If the user gave us a null destGraph, then construct the new
6488 // destination graph. We will replace its column Map later.
6489 if (destGraph.is_null()) {
6490 destGraph = rcp(new this_CRS_type(MyRowMap, 0, graphparams));
6491 }
6492
6493 /***************************************************/
6494 /***** 1) First communicator restriction phase ****/
6495 /***************************************************/
6496 if (restrictComm) {
6497 ReducedRowMap = MyRowMap->removeEmptyProcesses();
6498 ReducedComm = ReducedRowMap.is_null() ? Teuchos::null : ReducedRowMap->getComm();
6499 destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
6500
6501 ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ? ReducedRowMap : MyDomainMap->replaceCommWithSubset(ReducedComm);
6502 ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ? ReducedRowMap : MyRangeMap->replaceCommWithSubset(ReducedComm);
6503
6504 // Reset the "my" maps
6505 MyRowMap = ReducedRowMap;
6506 MyDomainMap = ReducedDomainMap;
6507 MyRangeMap = ReducedRangeMap;
6508
6509 // Update my PID, if we've restricted the communicator
6510 if (!ReducedComm.is_null()) {
6511 MyPID = ReducedComm->getRank();
6512 } else {
6513 MyPID = -2; // For debugging
6514 }
6515 } else {
6516 ReducedComm = MyRowMap->getComm();
6517 }
6518
6519 /***************************************************/
6520 /***** 2) From Tpera::DistObject::doTransfer() ****/
6521 /***************************************************/
6522 MM = Teuchos::null;
6523 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC ImportSetup"));
6524 // Get the owning PIDs
6525 RCP<const import_type> MyImporter = getImporter();
6526
6527 // check whether domain maps of source graph and base domain map is the same
6528 bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
6529
6530 if (!restrictComm && !MyImporter.is_null() && bSameDomainMap) {
6531 // Same domain map as source graph
6532 //
6533 // NOTE: This won't work for restrictComm (because the Import
6534 // doesn't know the restricted PIDs), though writing an
6535 // optimized version for that case would be easy (Import an
6536 // IntVector of the new PIDs). Might want to add this later.
6537 Import_Util::getPids(*MyImporter, SourcePids, false);
6538 } else if (restrictComm && !MyImporter.is_null() && bSameDomainMap) {
6539 // Same domain map as source graph (restricted communicator)
6540 // We need one import from the domain to the column map
6541 ivector_type SourceDomain_pids(getDomainMap(), true);
6542 ivector_type SourceCol_pids(getColMap());
6543 // SourceDomain_pids contains the restricted pids
6544 SourceDomain_pids.putScalar(MyPID);
6545
6546 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6547 SourcePids.resize(getColMap()->getLocalNumElements());
6548 SourceCol_pids.get1dCopy(SourcePids());
6549 } else if (MyImporter.is_null() && bSameDomainMap) {
6550 // Graph has no off-process entries
6551 SourcePids.resize(getColMap()->getLocalNumElements());
6552 SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
6553 } else if (!MyImporter.is_null() &&
6554 !domainTransfer.is_null()) {
6555 // general implementation for rectangular matrices with
6556 // domain map different than SourceGraph domain map.
6557 // User has to provide a DomainTransfer object. We need
6558 // to communications (import/export)
6559
6560 // TargetDomain_pids lives on the rebalanced new domain map
6561 ivector_type TargetDomain_pids(domainMap);
6562 TargetDomain_pids.putScalar(MyPID);
6563
6564 // SourceDomain_pids lives on the non-rebalanced old domain map
6565 ivector_type SourceDomain_pids(getDomainMap());
6566
6567 // SourceCol_pids lives on the non-rebalanced old column map
6568 ivector_type SourceCol_pids(getColMap());
6569
6570 if (!reverseMode && !xferDomainAsImport.is_null()) {
6571 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6572 } else if (reverseMode && !xferDomainAsExport.is_null()) {
6573 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6574 } else if (!reverseMode && !xferDomainAsExport.is_null()) {
6575 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6576 } else if (reverseMode && !xferDomainAsImport.is_null()) {
6577 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6578 } else {
6579 TEUCHOS_TEST_FOR_EXCEPTION(
6580 true, std::logic_error,
6581 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6582 }
6583 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6584 SourcePids.resize(getColMap()->getLocalNumElements());
6585 SourceCol_pids.get1dCopy(SourcePids());
6586 } else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
6587 getDomainMap()->isSameAs(*getRowMap())) {
6588 // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
6589 ivector_type TargetRow_pids(domainMap);
6590 ivector_type SourceRow_pids(getRowMap());
6591 ivector_type SourceCol_pids(getColMap());
6592
6593 TargetRow_pids.putScalar(MyPID);
6594 if (!reverseMode && xferAsImport != nullptr) {
6595 SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
6596 } else if (reverseMode && xferAsExport != nullptr) {
6597 SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
6598 } else if (!reverseMode && xferAsExport != nullptr) {
6599 SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
6600 } else if (reverseMode && xferAsImport != nullptr) {
6601 SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
6602 } else {
6603 TEUCHOS_TEST_FOR_EXCEPTION(
6604 true, std::logic_error,
6605 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6606 }
6607 SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
6608 SourcePids.resize(getColMap()->getLocalNumElements());
6609 SourceCol_pids.get1dCopy(SourcePids());
6610 } else {
6611 TEUCHOS_TEST_FOR_EXCEPTION(
6612 true, std::invalid_argument,
6613 prefix << "This method only allows either domainMap == getDomainMap(), "
6614 "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
6615 }
6616
6617 // Tpetra-specific stuff
6618 size_t constantNumPackets = destGraph->constantNumberOfPackets();
6619 if (constantNumPackets == 0) {
6620 destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
6621 RemoteLIDs.size());
6622 } else {
6623 // There are a constant number of packets per element. We
6624 // already know (from the number of "remote" (incoming)
6625 // elements) how many incoming elements we expect, so we can
6626 // resize the buffer accordingly.
6627 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
6628 destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
6629 }
6630
6631 {
6632 // packAndPrepare* methods modify numExportPacketsPerLID_.
6633 destGraph->numExportPacketsPerLID_.modify_host();
6634 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
6635 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6636
6637 // Pack & Prepare w/ owning PIDs
6638 packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
6639 numExportPacketsPerLID, ExportLIDs,
6640 SourcePids, constantNumPackets);
6641 }
6642
6643 // Do the exchange of remote data.
6644 MM = Teuchos::null;
6645 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC Transfer"));
6646
6647 if (communication_needed) {
6648 if (reverseMode) {
6649 if (constantNumPackets == 0) { // variable number of packets per LID
6650 // Make sure that host has the latest version, since we're
6651 // using the version on host. If host has the latest
6652 // version, syncing to host does nothing.
6653 destGraph->numExportPacketsPerLID_.sync_host();
6654 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6655 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6656 destGraph->numImportPacketsPerLID_.sync_host();
6657 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
6658 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6659
6660 Distor.doReversePostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
6661 destGraph->numImportPacketsPerLID_.view_host());
6662 size_t totalImportPackets = 0;
6663 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
6664 totalImportPackets += numImportPacketsPerLID[i];
6665 }
6666
6667 // Reallocation MUST go before setting the modified flag,
6668 // because it may clear out the flags.
6669 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
6670 destGraph->imports_.modify_host();
6671 auto hostImports = destGraph->imports_.view_host();
6672 // This is a legacy host pack/unpack path, so use the host
6673 // version of exports_.
6674 destGraph->exports_.sync_host();
6675 auto hostExports = destGraph->exports_.view_host();
6676 Distor.doReversePostsAndWaits(hostExports,
6677 numExportPacketsPerLID,
6678 hostImports,
6679 numImportPacketsPerLID);
6680 } else { // constant number of packets per LI
6681 destGraph->imports_.modify_host();
6682 auto hostImports = destGraph->imports_.view_host();
6683 // This is a legacy host pack/unpack path, so use the host
6684 // version of exports_.
6685 destGraph->exports_.sync_host();
6686 auto hostExports = destGraph->exports_.view_host();
6687 Distor.doReversePostsAndWaits(hostExports,
6688 constantNumPackets,
6689 hostImports);
6690 }
6691 } else { // forward mode (the default)
6692 if (constantNumPackets == 0) { // variable number of packets per LID
6693 // Make sure that host has the latest version, since we're
6694 // using the version on host. If host has the latest
6695 // version, syncing to host does nothing.
6696 destGraph->numExportPacketsPerLID_.sync_host();
6697 destGraph->numImportPacketsPerLID_.sync_host();
6698 Distor.doPostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
6699 destGraph->numImportPacketsPerLID_.view_host());
6700
6701 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
6702 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6703 size_t totalImportPackets = 0;
6704 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
6705 totalImportPackets += numImportPacketsPerLID[i];
6706 }
6707
6708 // Reallocation MUST go before setting the modified flag,
6709 // because it may clear out the flags.
6710 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
6711 destGraph->imports_.modify_host();
6712 auto hostImports = destGraph->imports_.view_host();
6713 // This is a legacy host pack/unpack path, so use the host
6714 // version of exports_.
6715 destGraph->exports_.sync_host();
6716 auto hostExports = destGraph->exports_.view_host();
6717 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6718 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6719 Distor.doPostsAndWaits(hostExports, numExportPacketsPerLID, hostImports, numImportPacketsPerLID);
6720 } else { // constant number of packets per LID
6721 destGraph->imports_.modify_host();
6722 auto hostImports = destGraph->imports_.view_host();
6723 // This is a legacy host pack/unpack path, so use the host
6724 // version of exports_.
6725 destGraph->exports_.sync_host();
6726 auto hostExports = destGraph->exports_.view_host();
6727 Distor.doPostsAndWaits(hostExports, constantNumPackets, hostImports);
6728 }
6729 }
6730 }
6731
6732 /*********************************************************************/
6733 /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
6734 /*********************************************************************/
6735
6736 MM = Teuchos::null;
6737 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC Unpack-1"));
6738
6739 // Backwards compatibility measure. We'll use this again below.
6740 destGraph->numImportPacketsPerLID_.sync_host();
6741 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
6742 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6743 destGraph->imports_.sync_host();
6744 Teuchos::ArrayView<const packet_type> hostImports =
6745 getArrayViewFromDualView(destGraph->imports_);
6746 size_t mynnz =
6747 unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
6748 numImportPacketsPerLID,
6749 constantNumPackets, INSERT,
6750 NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
6751 size_t N = BaseRowMap->getLocalNumElements();
6752
6753 // Allocations
6754 ArrayRCP<size_t> CSR_rowptr(N + 1);
6755 ArrayRCP<GO> CSR_colind_GID;
6756 ArrayRCP<LO> CSR_colind_LID;
6757 CSR_colind_GID.resize(mynnz);
6758
6759 // If LO and GO are the same, we can reuse memory when
6760 // converting the column indices from global to local indices.
6761 if (typeid(LO) == typeid(GO)) {
6762 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
6763 } else {
6764 CSR_colind_LID.resize(mynnz);
6765 }
6766
6767 // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
6768 // unpackAndCombine method on a "CrsArrays" object? This passing
6769 // in a huge list of arrays is icky. Can't we have a bit of an
6770 // abstraction? Implementing a concrete DistObject subclass only
6771 // takes five methods.
6772 unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
6773 numImportPacketsPerLID, constantNumPackets,
6774 INSERT, NumSameIDs, PermuteToLIDs,
6775 PermuteFromLIDs, N, mynnz, MyPID,
6776 CSR_rowptr(), CSR_colind_GID(),
6777 SourcePids(), TargetPids);
6778
6779 /**************************************************************/
6780 /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
6781 /**************************************************************/
6782 MM = Teuchos::null;
6783 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC Unpack-2"));
6784 // Call an optimized version of makeColMap that avoids the
6785 // Directory lookups (since the Import object knows who owns all
6786 // the GIDs).
6787 Teuchos::Array<int> RemotePids;
6788 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
6789 CSR_colind_LID(),
6790 CSR_colind_GID(),
6791 BaseDomainMap,
6792 TargetPids, RemotePids,
6793 MyColMap);
6794
6795 /*******************************************************/
6796 /**** 4) Second communicator restriction phase ****/
6797 /*******************************************************/
6798 if (restrictComm) {
6799 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ? ReducedRowMap : MyColMap->replaceCommWithSubset(ReducedComm);
6800 MyColMap = ReducedColMap; // Reset the "my" maps
6801 }
6802
6803 // Replace the col map
6804 destGraph->replaceColMap(MyColMap);
6805
6806 // Short circuit if the processor is no longer in the communicator
6807 //
6808 // NOTE: Epetra replaces modifies all "removed" processes so they
6809 // have a dummy (serial) Map that doesn't touch the original
6810 // communicator. Duplicating that here might be a good idea.
6811 if (ReducedComm.is_null()) {
6812 return;
6813 }
6814
6815 /***************************************************/
6816 /**** 5) Sort ****/
6817 /***************************************************/
6818 if ((!reverseMode && xferAsImport != nullptr) ||
6819 (reverseMode && xferAsExport != nullptr)) {
6820 Import_Util::sortCrsEntries(CSR_rowptr(),
6821 CSR_colind_LID());
6822 } else if ((!reverseMode && xferAsExport != nullptr) ||
6823 (reverseMode && xferAsImport != nullptr)) {
6824 Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
6825 CSR_colind_LID());
6826 if (CSR_rowptr[N] != mynnz) {
6827 CSR_colind_LID.resize(CSR_rowptr[N]);
6828 }
6829 } else {
6830 TEUCHOS_TEST_FOR_EXCEPTION(
6831 true, std::logic_error,
6832 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6833 }
6834 /***************************************************/
6835 /**** 6) Reset the colmap and the arrays ****/
6836 /***************************************************/
6837
6838 // Call constructor for the new graph (restricted as needed)
6839 //
6840 destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
6841
6842 /***************************************************/
6843 /**** 7) Build Importer & Call ESFC ****/
6844 /***************************************************/
6845 // Pre-build the importer using the existing PIDs
6846 Teuchos::ParameterList esfc_params;
6847 MM = Teuchos::null;
6848 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC CreateImporter"));
6849 RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
6850 MM = Teuchos::null;
6851 MM = Teuchos::rcp(new Tpetra::Details::ProfilingRegion("Tpetra CrsGraph TAFC ESFC"));
6852
6853 if (!params.is_null())
6854 esfc_params.set("compute global constants", params->get("compute global constants", true));
6855
6856 destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
6857 MyImport, Teuchos::null, rcp(&esfc_params, false));
6858}
6859
6860template <class LocalOrdinal, class GlobalOrdinal, class Node>
6863 const import_type& importer,
6864 const Teuchos::RCP<const map_type>& domainMap,
6865 const Teuchos::RCP<const map_type>& rangeMap,
6866 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6867 transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
6868}
6869
6870template <class LocalOrdinal, class GlobalOrdinal, class Node>
6873 const import_type& rowImporter,
6875 const Teuchos::RCP<const map_type>& domainMap,
6876 const Teuchos::RCP<const map_type>& rangeMap,
6877 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6878 transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
6879}
6880
6881template <class LocalOrdinal, class GlobalOrdinal, class Node>
6884 const export_type& exporter,
6885 const Teuchos::RCP<const map_type>& domainMap,
6886 const Teuchos::RCP<const map_type>& rangeMap,
6887 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6888 transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
6889}
6890
6891template <class LocalOrdinal, class GlobalOrdinal, class Node>
6894 const export_type& rowExporter,
6896 const Teuchos::RCP<const map_type>& domainMap,
6897 const Teuchos::RCP<const map_type>& rangeMap,
6898 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6899 transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
6900}
6901
6902template <class LocalOrdinal, class GlobalOrdinal, class Node>
6905 std::swap(graph.need_sync_host_uvm_access, this->need_sync_host_uvm_access);
6906
6907 std::swap(graph.rowMap_, this->rowMap_);
6908 std::swap(graph.colMap_, this->colMap_);
6909 std::swap(graph.rangeMap_, this->rangeMap_);
6910 std::swap(graph.domainMap_, this->domainMap_);
6911
6912 std::swap(graph.importer_, this->importer_);
6913 std::swap(graph.exporter_, this->exporter_);
6914
6915 std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
6916
6917 std::swap(graph.globalNumEntries_, this->globalNumEntries_);
6918 std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
6919
6920 std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
6921
6922 std::swap(graph.rowPtrsPacked_dev_, this->rowPtrsPacked_dev_);
6923 std::swap(graph.rowPtrsPacked_host_, this->rowPtrsPacked_host_);
6924
6925 std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_);
6926 std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_);
6927 std::swap(graph.packedUnpackedRowPtrsMatch_, this->packedUnpackedRowPtrsMatch_);
6928
6929 std::swap(graph.k_offRankOffsets_, this->k_offRankOffsets_);
6930
6931 std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv);
6932 std::swap(graph.gblInds_wdv, this->gblInds_wdv);
6933 std::swap(graph.lclIndsPacked_wdv, this->lclIndsPacked_wdv);
6934
6935 std::swap(graph.storageStatus_, this->storageStatus_);
6936
6937 std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
6938 std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
6939 std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
6940 std::swap(graph.fillComplete_, this->fillComplete_);
6941 std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
6942 std::swap(graph.noRedundancies_, this->noRedundancies_);
6943 std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
6944 std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
6945 std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_);
6946
6947 std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
6948
6949 std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
6950 std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
6951 std::swap(graph.nonlocals_, this->nonlocals_); // std::map
6952}
6953
6954template <class LocalOrdinal, class GlobalOrdinal, class Node>
6957 auto compare_nonlocals = [&](const nonlocals_type& m1, const nonlocals_type& m2) {
6958 bool output = true;
6959 output = m1.size() == m2.size() ? output : false;
6960 for (auto& it_m : m1) {
6961 size_t key = it_m.first;
6962 output = m2.find(key) != m2.end() ? output : false;
6963 if (output) {
6964 auto v1 = m1.find(key)->second;
6965 auto v2 = m2.find(key)->second;
6966 std::sort(v1.begin(), v1.end());
6967 std::sort(v2.begin(), v2.end());
6968
6969 output = v1.size() == v2.size() ? output : false;
6970 for (size_t i = 0; output && i < v1.size(); i++) {
6971 output = v1[i] == v2[i] ? output : false;
6972 }
6973 }
6974 }
6975 return output;
6976 };
6977
6978 bool output = true;
6979
6980 output = this->rowMap_->isSameAs(*(graph.rowMap_)) ? output : false;
6981 output = this->colMap_->isSameAs(*(graph.colMap_)) ? output : false;
6982 output = this->rangeMap_->isSameAs(*(graph.rangeMap_)) ? output : false;
6983 output = this->domainMap_->isSameAs(*(graph.domainMap_)) ? output : false;
6984
6985 output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
6986
6987 output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
6988 output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
6989
6990 output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
6991
6992 output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
6993
6994 output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
6995 output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
6996 output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
6997 output = this->fillComplete_ == graph.fillComplete_ ? output : false;
6998 output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
6999 output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7000 output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7001 output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7002 output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false;
7003 output = this->sortGhostsAssociatedWithEachProcessor_ == graph.sortGhostsAssociatedWithEachProcessor_ ? output : false;
7004
7005 // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7006 // nonlocals_ isa std::map<GO, std::vector<GO> >
7007 output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7008
7009 // Compare k_numAllocPerRow_ isa Kokkos::View::host_mirror_type
7010 // - since this is a host_mirror_type type, it should be in host memory already
7011 output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7012 if (output && this->k_numAllocPerRow_.extent(0) > 0) {
7013 for (size_t i = 0; output && i < this->k_numAllocPerRow_.extent(0); i++)
7014 output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7015 }
7016
7017 // Compare k_numRowEntries_ isa Kokkos::View::host_mirror_type
7018 // - since this is a host_mirror_type type, it should be in host memory already
7019 output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7020 if (output && this->k_numRowEntries_.extent(0) > 0) {
7021 for (size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7022 output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7023 }
7024
7025 // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7026 {
7027 auto rowPtrsThis = this->getRowPtrsUnpackedHost();
7028 auto rowPtrsGraph = graph.getRowPtrsUnpackedHost();
7029 output = rowPtrsThis.extent(0) == rowPtrsGraph.extent(0) ? output : false;
7030 for (size_t i = 0; output && i < rowPtrsThis.extent(0); i++)
7031 output = rowPtrsThis(i) == rowPtrsGraph(i) ? output : false;
7032 }
7033
7034 // Compare lclIndsUnpacked_wdv isa Kokkos::View<LocalOrdinal*, ...>
7035 output = this->lclIndsUnpacked_wdv.extent(0) == graph.lclIndsUnpacked_wdv.extent(0) ? output : false;
7036 if (output && this->lclIndsUnpacked_wdv.extent(0) > 0) {
7037 auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7038 auto indGraph = graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7039 for (size_t i = 0; output && i < indThis.extent(0); i++)
7040 output = indThis(i) == indGraph(i) ? output : false;
7041 }
7042
7043 // Compare gblInds_wdv isa Kokkos::View<GlobalOrdinal*, ...>
7044 output = this->gblInds_wdv.extent(0) == graph.gblInds_wdv.extent(0) ? output : false;
7045 if (output && this->gblInds_wdv.extent(0) > 0) {
7046 auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7047 auto indtGraph = graph.gblInds_wdv.getHostView(Access::ReadOnly);
7048 for (size_t i = 0; output && i < indtThis.extent(0); i++)
7049 output = indtThis(i) == indtGraph(i) ? output : false;
7050 }
7051
7052 // Check lclGraph_ isa
7053 // KokkosSparse::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7054 // KokkosSparse::StaticCrsGraph has 3 data members in it:
7055 // Kokkos::View<size_type*, ...> row_map
7056 // (local_graph_device_type::row_map_type)
7057 // Kokkos::View<data_type*, ...> entries
7058 // (local_graph_device_type::entries_type)
7059 // Kokkos::View<size_type*, ...> row_block_offsets
7060 // (local_graph_device_type::row_block_type)
7061 // There is currently no KokkosSparse::StaticCrsGraph comparison function
7062 // that's built-in, so we will just compare
7063 // the three data items here. This can be replaced if Kokkos ever
7064 // puts in its own comparison routine.
7065 local_graph_host_type thisLclGraph = this->getLocalGraphHost();
7066 local_graph_host_type graphLclGraph = graph.getLocalGraphHost();
7067
7068 output = thisLclGraph.row_map.extent(0) == graphLclGraph.row_map.extent(0)
7069 ? output
7070 : false;
7071 if (output && thisLclGraph.row_map.extent(0) > 0) {
7074 for (size_t i = 0; output && i < lclGraph_rowmap_host_this.extent(0); i++)
7076 ? output
7077 : false;
7078 }
7079
7080 output = thisLclGraph.entries.extent(0) == graphLclGraph.entries.extent(0)
7081 ? output
7082 : false;
7083 if (output && thisLclGraph.entries.extent(0) > 0) {
7086 for (size_t i = 0; output && i < lclGraph_entries_host_this.extent(0); i++)
7088 ? output
7089 : false;
7090 }
7091
7092 output =
7093 thisLclGraph.row_block_offsets.extent(0) ==
7094 graphLclGraph.row_block_offsets.extent(0)
7095 ? output
7096 : false;
7097 if (output && thisLclGraph.row_block_offsets.extent(0) > 0) {
7098 auto lclGraph_rbo_host_this = thisLclGraph.row_block_offsets;
7099 auto lclGraph_rbo_host_graph = graphLclGraph.row_block_offsets;
7100 for (size_t i = 0; output && i < lclGraph_rbo_host_this.extent(0); i++)
7102 ? output
7103 : false;
7104 }
7105
7106 // For Importer and Exporter, we don't need to explicitly check them since
7107 // they will be consistent with the maps.
7108 // Note: importer_ isa Teuchos::RCP<const import_type>
7109 // exporter_ isa Teuchos::RCP<const export_type>
7110
7111 return output;
7112}
7113
7114template <class LocalOrdinal, class GlobalOrdinal, class Node>
7118 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
7119 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
7121 using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
7122 using LO = LocalOrdinal;
7123 using GO = GlobalOrdinal;
7124 typedef typename crs_graph_type::global_inds_device_view_type::non_const_value_type global_inds_device_value_t;
7125 typedef typename Node::execution_space exec_space;
7126 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
7127
7128 const LocalOrdinal LINV = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
7129 const GlobalOrdinal GINV = Teuchos::OrdinalTraits<GlobalOrdinal>::invalid();
7130
7131 using local_map_type = typename crs_graph_type::map_type::local_map_type;
7132 local_map_type srcRowMapLocal = srcCrsGraph.getRowMap()->getLocalMap();
7133 local_map_type srcColMapLocal = srcCrsGraph.getColMap()->getLocalMap();
7134 local_map_type tgtRowMapLocal = tgtCrsGraph.getRowMap()->getLocalMap();
7135
7136 auto tgtLocalRowPtrsDevice = tgtCrsGraph.getRowPtrsUnpackedDevice();
7137 auto tgtGlobalColInds = tgtCrsGraph.gblInds_wdv.getDeviceView(Access::ReadWrite);
7138 auto srcLocalRowPtrsDevice = srcCrsGraph.getLocalRowPtrsDevice();
7139 auto srcLocalColIndsDevice = srcCrsGraph.lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly);
7140
7141 typename crs_graph_type::num_row_entries_type::non_const_type h_numRowEnt = tgtCrsGraph.k_numRowEntries_;
7142
7143 auto k_numRowEnt = Kokkos::create_mirror_view_and_copy(device_type(), h_numRowEnt);
7144
7145 const bool sorted = false;
7146
7147 bool hasMap = permuteFromLIDs.extent(0) > 0;
7148 auto permuteToLIDs_d = permuteToLIDs.view_device();
7149 auto permuteFromLIDs_d = permuteFromLIDs.view_device();
7150
7151#ifdef CRSGRAPH_INNER_ABORT
7152#undef CRSGRAPH_INNER_ABORT
7153#endif
7154
7155#ifdef KOKKOS_ENABLE_SYCL
7156#define CRSGRAPH_INNER_ABORT(lin) \
7157 do { \
7158 sycl::ext::oneapi::experimental::printf("ERROR: Tpetra_CrsGraph_def.hpp:%d", lin); \
7159 Kokkos::abort("error"); \
7160 } while (0)
7161#else
7162#define CRSGRAPH_INNER_ABORT(lin) \
7163 do { \
7164 printf("ERROR: Tpetra_CrsGraph_def.hpp:%d", lin); \
7165 Kokkos::abort("error"); \
7166 } while (0)
7167#endif
7168
7169 Kokkos::parallel_for(
7170 "Tpetra_CrsGraph::copyAndPermuteNew",
7171 range_type(0, loopEnd),
7172 KOKKOS_LAMBDA(const LO sourceLID) {
7173 auto srcLid = sourceLID;
7174 auto tgtLid = sourceLID;
7175 if (hasMap) {
7178 }
7179 auto srcGid = srcRowMapLocal.getGlobalElement(srcLid);
7180 if (srcGid == GINV) CRSGRAPH_INNER_ABORT(__LINE__);
7181 auto tgtGid = tgtRowMapLocal.getGlobalElement(tgtLid);
7182 auto tgtLocalRow = tgtRowMapLocal.getLocalElement(tgtGid);
7183 if (tgtLocalRow == LINV) CRSGRAPH_INNER_ABORT(__LINE__);
7184 if (tgtLocalRow != tgtLid) CRSGRAPH_INNER_ABORT(__LINE__);
7185 auto tgtNumEntries = k_numRowEnt(tgtLocalRow);
7186
7187 // FIXME no auto use
7188 auto start = srcLocalRowPtrsDevice(srcLid);
7189 auto end = srcLocalRowPtrsDevice(srcLid + 1);
7190 auto rowLength = (end - start);
7191
7192 auto tstart = tgtLocalRowPtrsDevice(tgtLocalRow);
7193 auto tend = tstart + tgtNumEntries;
7194 auto tend1 = tgtLocalRowPtrsDevice(tgtLocalRow + 1);
7195
7196 const size_t num_avail = (tend1 < tend) ? size_t(0) : tend1 - tend;
7197 size_t num_inserted = 0;
7198
7199 global_inds_device_value_t* tgtGlobalColIndsPtr = tgtGlobalColInds.data();
7200
7201 size_t hint = 0;
7202 for (size_t j = 0; j < rowLength; j++) {
7203 auto ci = srcLocalColIndsDevice(start + j);
7204 GO gi = srcColMapLocal.getGlobalElement(ci);
7205 if (gi == GINV) CRSGRAPH_INNER_ABORT(__LINE__);
7206 auto numInTgtRow = (tend - tstart);
7207
7208 const size_t offset = KokkosSparse::findRelOffset(
7209 tgtGlobalColIndsPtr + tstart, numInTgtRow, gi, hint, sorted);
7210
7211 if (offset == numInTgtRow) {
7212 if (num_inserted >= num_avail) { // not enough room
7213 Kokkos::abort("num_avail");
7214 }
7215 tgtGlobalColIndsPtr[tstart + offset] = gi;
7216 ++tend;
7217 hint = offset + 1;
7218 ++num_inserted;
7219 }
7220 }
7221 k_numRowEnt(tgtLocalRow) += num_inserted;
7222 return size_t(0);
7223 });
7224 Kokkos::deep_copy(tgtCrsGraph.k_numRowEntries_, k_numRowEnt);
7225 tgtCrsGraph.setLocallyModified();
7226}
7227
7228template <class LocalOrdinal, class GlobalOrdinal, class Node>
7229void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::copyAndPermuteNew(
7230 const row_graph_type& srcRowGraph,
7231 row_graph_type& tgtRowGraph,
7232 const size_t numSameIDs,
7233 const Kokkos::DualView<const local_ordinal_type*,
7234 buffer_device_type>& permuteToLIDs,
7235 const Kokkos::DualView<const local_ordinal_type*,
7236 buffer_device_type>& permuteFromLIDs,
7237 const CombineMode CM) {
7238 using std::endl;
7239 using LO = local_ordinal_type;
7240 using GO = global_ordinal_type;
7241 const char tfecfFuncName[] = "copyAndPermuteNew: ";
7242 const bool verbose = verbose_;
7243
7244 Details::ProfilingRegion regionCAP("Tpetra::CrsGraph::copyAndPermuteNew");
7245 std::unique_ptr<std::string> prefix;
7246 if (verbose) {
7247 prefix = this->createPrefix("CrsGraph", "copyAndPermuteNew");
7248 std::ostringstream os;
7249 os << *prefix << endl;
7250 std::cerr << os.str();
7251 }
7252
7253 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7254 permuteToLIDs.extent(0) != permuteFromLIDs.extent(0),
7255 std::runtime_error,
7256 "permuteToLIDs.extent(0) = " << permuteToLIDs.extent(0) << " != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0) << ".");
7257
7258 if (verbose) {
7259 std::ostringstream os;
7260 os << *prefix << "Compute padding" << endl;
7261 std::cerr << os.str();
7262 }
7263
7264 using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
7265 const crs_graph_type* srcCrsGraphPtr = dynamic_cast<const crs_graph_type*>(&srcRowGraph);
7266 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7267 !srcCrsGraphPtr, std::runtime_error, "error srcGraph type= " << typeid(srcRowGraph).name());
7268 const crs_graph_type& srcCrsGraph = *srcCrsGraphPtr;
7269
7270 crs_graph_type* tgtCrsGraphPtr = dynamic_cast<crs_graph_type*>(&tgtRowGraph);
7271 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7272 !tgtCrsGraphPtr, std::runtime_error, "error tgtGraph type= " << typeid(tgtRowGraph).name());
7273
7274 crs_graph_type& tgtCrsGraph = *tgtCrsGraphPtr;
7275 auto padding = tgtCrsGraph.computeCrsPadding(
7276 srcRowGraph, numSameIDs, permuteToLIDs, permuteFromLIDs, verbose);
7277 tgtCrsGraph.applyCrsPadding(*padding, verbose);
7278
7279 const map_type& srcRowMap = *(srcRowGraph.getRowMap());
7280 const map_type& tgtRowMap = *(tgtRowGraph.getRowMap());
7281 const bool src_filled = srcRowGraph.isFillComplete();
7282 nonconst_global_inds_host_view_type row_copy;
7283 LO myid = 0;
7284
7285 //
7286 // "Copy" part of "copy and permute."
7287 //
7288 LO numSameIDs_as_LID = static_cast<LO>(numSameIDs);
7289
7290 if (src_filled || srcCrsGraphPtr == nullptr) {
7291 if (verbose) {
7292 std::ostringstream os;
7293 os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
7294 std::cerr << os.str();
7295 }
7296 // If the source graph is fill complete, we can't use view mode,
7297 // because the data might be stored in a different format not
7298 // compatible with the expectations of view mode. Also, if the
7299 // source graph is not a CrsGraph, we can't use view mode,
7300 // because RowGraph only provides copy mode access to the data.
7301 Kokkos::DualView<const local_ordinal_type*, buffer_device_type> noPermute;
7302 insertGlobalIndicesDevice(srcCrsGraph, tgtCrsGraph,
7303 noPermute, noPermute,
7304 numSameIDs_as_LID);
7305 } else {
7306 if (verbose) {
7307 std::ostringstream os;
7308 os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
7309 std::cerr << os.str();
7310 }
7311 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
7312 const GO gid = srcRowMap.getGlobalElement(myid);
7313 global_inds_host_view_type row;
7314 srcCrsGraph.getGlobalRowView(gid, row);
7315 tgtCrsGraph.insertGlobalIndices(gid, row.extent(0), row.data());
7316 }
7317 }
7318
7319 //
7320 // "Permute" part of "copy and permute."
7321 //
7322 auto permuteToLIDs_h = permuteToLIDs.view_host();
7323 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
7324 auto permuteToLIDs_d = permuteToLIDs.view_device();
7325 auto permuteFromLIDs_d = permuteFromLIDs.view_device();
7326
7327 if (src_filled || srcCrsGraphPtr == nullptr) {
7328 insertGlobalIndicesDevice(
7329 srcCrsGraph,
7330 tgtCrsGraph,
7331 permuteToLIDs,
7332 permuteFromLIDs, // note reversed arg order, tgt, then src
7333 static_cast<LO>(permuteToLIDs_h.extent(0)));
7334 } else {
7335 for (LO i = 0; i < static_cast<LO>(permuteToLIDs_h.extent(0)); ++i) {
7336 const GO mygid = tgtRowMap.getGlobalElement(permuteToLIDs_h[i]);
7337 const GO srcgid = srcRowMap.getGlobalElement(permuteFromLIDs_h[i]);
7338 global_inds_host_view_type row;
7339 srcCrsGraph.getGlobalRowView(srcgid, row);
7340 tgtCrsGraph.insertGlobalIndices(mygid, row.extent(0), row.data());
7341 }
7342 }
7343
7344 if (verbose) {
7345 std::ostringstream os;
7346 os << *prefix << "Done" << endl;
7347 std::cerr << os.str();
7348 }
7349}
7350
7351} // namespace Tpetra
7352
7353//
7354// Explicit instantiation macros
7355//
7356// Must be expanded from within the Tpetra namespace!
7357//
7358
7359#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7360 template <> \
7361 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7362 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7363 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7364 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7365 CrsGraph<LO, GO, NODE>::node_type>& importer, \
7366 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7367 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7368 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7369 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7370 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7371 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7372 const Teuchos::RCP<Teuchos::ParameterList>& params);
7373
7374#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7375 template <> \
7376 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7377 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7378 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7379 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7380 CrsGraph<LO, GO, NODE>::node_type>& rowImporter, \
7381 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7382 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7383 CrsGraph<LO, GO, NODE>::node_type>& domainImporter, \
7384 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7385 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7386 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7387 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7388 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7389 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7390 const Teuchos::RCP<Teuchos::ParameterList>& params);
7391
7392#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7393 template <> \
7394 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7395 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7396 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7397 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7398 CrsGraph<LO, GO, NODE>::node_type>& exporter, \
7399 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7400 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7401 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7402 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7403 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7404 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7405 const Teuchos::RCP<Teuchos::ParameterList>& params);
7406
7407#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7408 template <> \
7409 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7410 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7411 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7412 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7413 CrsGraph<LO, GO, NODE>::node_type>& rowExporter, \
7414 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7415 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7416 CrsGraph<LO, GO, NODE>::node_type>& domainExporter, \
7417 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7418 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7419 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7420 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7421 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7422 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7423 const Teuchos::RCP<Teuchos::ParameterList>& params);
7424
7425#define TPETRA_CRSGRAPH_INSTANT(LO, GO, NODE) \
7426 template class CrsGraph<LO, GO, NODE>; \
7427 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7428 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7429 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7430 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE)
7431
7432#endif // TPETRA_CRSGRAPH_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular,...
Functions for manipulating CRS arrays.
Declaration of a function that prints strings from each process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Utility functions for packing and unpacking sparse matrix entries.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
Stand-alone utility functions and macros.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
local_inds_dualv_type::t_dev::const_type getLocalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
GlobalOrdinal global_ordinal_type
The type of the graph's global indices.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
std::pair< size_t, std::string > makeIndicesLocal(const bool verbose=false)
Convert column indices from global to local.
local_inds_device_view_type getLocalIndicesDevice() const
Get a device view of the packed column indicies.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void globalAssemble()
Communicate nonlocal contributions to other processes.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
global_inds_wdv_type gblInds_wdv
Global ordinals of column indices for all rows.
bool hasColMap() const override
Whether the graph has a column Map.
LocalOrdinal local_ordinal_type
The type of the graph's local indices.
std::string description() const override
Return a one-line human-readable description of this object.
bool isStorageOptimized() const
Returns true if storage has been optimized.
void getGlobalRowCopy(global_ordinal_type gblRow, nonconst_global_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, const Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given parameters.
void computeLocalConstants()
Compute local constants, if they have not yet been computed.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
typename local_graph_device_type::host_mirror_type local_graph_host_type
The type of the part of the sparse graph on each MPI process.
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
typename Node::device_type device_type
This class' Kokkos device type.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
global_inds_dualv_type::t_dev::const_type getGlobalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
KokkosSparse::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
row_ptrs_host_view_type getLocalRowPtrsHost() const
Get a host view of the packed row offsets.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
void setAllIndices(const typename local_graph_device_type::row_map_type &rowPointers, const typename local_graph_device_type::entries_type::non_const_type &columnIndices)
Set the graph's data directly, using 1-D storage.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
local_inds_host_view_type getLocalIndicesHost() const
Get a host view of the packed column indicies.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
void getGlobalRowView(const global_ordinal_type gblRow, global_inds_host_view_type &gblColInds) const override
Get a const view of the given global row's global column indices.
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
void makeColMap(Teuchos::Array< int > &remotePIDs)
Make and set the graph's column Map.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph's communicator.
void checkInternalState() const
Throw an exception if the internal state is not consistent.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
row_ptrs_device_view_type getLocalRowPtrsDevice() const
Get a device view of the packed row offsets.
void getLocalRowCopy(local_ordinal_type gblRow, nonconst_local_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
size_t getLocalNumEntries() const override
The local number of entries in the graph.
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
local_inds_wdv_type lclIndsPacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true Built during fillCo...
size_t getLocalNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets) const override
void getLocalOffRankOffsets(offset_device_view_type &offsets) const
Get offsets of the off-rank entries in the graph.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
typename row_graph_type::local_inds_device_view_type local_inds_device_view_type
The Kokkos::View type for views of local ordinals on device and host.
Kokkos::View< constsize_t *, device_type >::host_mirror_type k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
Node node_type
This class' Kokkos Node type.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
local_inds_dualv_type::t_host getLocalIndsViewHostNonConst(const RowInfo &rowinfo)
Get a ReadWrite locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(m...
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
void computeGlobalConstants()
Compute global constants, if they have not yet been computed.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
void setLocallyModified()
Report that we made a local modification to its structure.
size_t getLocalAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current Range Map with the given objects.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
void getLocalRowView(const LocalOrdinal lclRow, local_inds_host_view_type &lclColInds) const override
Get a const view of the given local row's local column indices.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
size_t getLocalNumRows() const override
Returns the number of graph rows owned on the calling node.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph's current column Map with the given Map.
Struct that holds views of the contents of a CrsMatrix.
Teuchos::RCP< const map_type > colMap
Col map for the original version of the matrix.
Teuchos::RCP< const map_type > domainMap
Domain map for original matrix.
Teuchos::RCP< const map_type > rowMap
Desired row map for "imported" version of the matrix.
Teuchos::RCP< const map_type > origRowMap
Original row map of matrix.
static bool useNewCopyAndPermute()
Use new implementation of copyAndPermute.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
"Local" part of Map suitable for Kokkos kernels.
Sets up and executes a communication plan for a Tpetra DistObject.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
An abstract interface for graphs accessed by rows.
Abstract base class for objects that can be the source of an Import or Export operation.
Implementation details of Tpetra.
int local_ordinal_type
Default value of Scalar template parameter.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a CrsGraph's global column indices into local column indices.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
CombineMode
Rule for combining data in an Import or Export.
@ INSERT
Insert new values that don't currently exist.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.