Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_CRSGRAPH_DEF_HPP
11#define TPETRA_CRSGRAPH_DEF_HPP
12
15
20#include "Tpetra_Details_getGraphDiagOffsets.hpp"
21#include "Tpetra_Details_getGraphOffRankOffsets.hpp"
22#include "Tpetra_Details_makeColMap.hpp"
26#include "Tpetra_Distributor.hpp"
27#include "Teuchos_SerialDenseMatrix.hpp"
28#include "Tpetra_Vector.hpp"
31#include "Tpetra_Details_packCrsGraph.hpp"
32#include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
33#include "Tpetra_Details_CrsPadding.hpp"
34#include "Tpetra_Util.hpp"
35#include <algorithm>
36#include <limits>
37#include <map>
38#include <sstream>
39#include <string>
40#include <type_traits>
41#include <utility>
42#include <vector>
43
44namespace Tpetra {
45namespace Details {
46namespace Impl {
47
48template <class MapIter>
49void verbosePrintMap(std::ostream& out,
50 MapIter beg,
51 MapIter end,
52 const size_t numEnt,
53 const char mapName[]) {
54 using ::Tpetra::Details::Behavior;
55 using ::Tpetra::Details::verbosePrintArray;
56
57 out << mapName << ": {";
58 const size_t maxNumToPrint =
60 if (maxNumToPrint == 0) {
61 if (numEnt != 0) {
62 out << "...";
63 }
64 } else {
65 const size_t numToPrint = numEnt > maxNumToPrint ? maxNumToPrint : numEnt;
66 size_t count = 0;
67 for (MapIter it = beg; it != end; ++it) {
68 out << "(" << (*it).first << ", ";
69 verbosePrintArray(out, (*it).second, "gblColInds",
70 maxNumToPrint);
71 out << ")";
72 if (count + size_t(1) < numToPrint) {
73 out << ", ";
74 }
75 ++count;
76 }
77 if (count < numEnt) {
78 out << ", ...";
79 }
80 }
81 out << "}";
82}
83
84template <class LO, class GO, class Node>
85Teuchos::ArrayView<GO>
86getRowGraphGlobalRow(
87 std::vector<GO>& gblColIndsStorage,
88 const RowGraph<LO, GO, Node>& graph,
89 const GO gblRowInd) {
90 size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
91 if (gblColIndsStorage.size() < origNumEnt) {
92 gblColIndsStorage.resize(origNumEnt);
93 }
94 typename CrsGraph<LO, GO, Node>::nonconst_global_inds_host_view_type gblColInds(gblColIndsStorage.data(),
95 origNumEnt);
96 graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
97 Teuchos::ArrayView<GO> retval(gblColIndsStorage.data(), origNumEnt);
98 return retval;
99}
100
101template <class LO, class GO, class DT, class OffsetType, class NumEntType>
102class ConvertColumnIndicesFromGlobalToLocal {
103 public:
104 ConvertColumnIndicesFromGlobalToLocal(const ::Kokkos::View<LO*, DT>& lclColInds,
105 const ::Kokkos::View<const GO*, DT>& gblColInds,
106 const ::Kokkos::View<const OffsetType*, DT>& ptr,
107 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
108 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
109 : lclColInds_(lclColInds)
110 , gblColInds_(gblColInds)
111 , ptr_(ptr)
112 , lclColMap_(lclColMap)
113 , numRowEnt_(numRowEnt) {}
114
115 KOKKOS_FUNCTION void
116 operator()(const LO& lclRow, OffsetType& curNumBad) const {
117 const OffsetType offset = ptr_(lclRow);
118 // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
119 // of entries in a row to LO, as long as the row doesn't have
120 // too many duplicate entries.
121 const LO numEnt = static_cast<LO>(numRowEnt_(lclRow));
122 for (LO j = 0; j < numEnt; ++j) {
123 const GO gid = gblColInds_(offset + j);
124 const LO lid = lclColMap_.getLocalElement(gid);
125 lclColInds_(offset + j) = lid;
126 if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid()) {
127 ++curNumBad;
128 }
129 }
130 }
131
132 static OffsetType
133 run(const ::Kokkos::View<LO*, DT>& lclColInds,
134 const ::Kokkos::View<const GO*, DT>& gblColInds,
135 const ::Kokkos::View<const OffsetType*, DT>& ptr,
136 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
137 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) {
138 typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
139 typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
140
141 const LO lclNumRows = ptr.extent(0) == 0 ? static_cast<LO>(0) : static_cast<LO>(ptr.extent(0) - 1);
142 OffsetType numBad = 0;
143 // Count of "bad" column indices is a reduction over rows.
144 ::Kokkos::parallel_reduce(range_type(0, lclNumRows),
145 functor_type(lclColInds, gblColInds, ptr,
146 lclColMap, numRowEnt),
147 numBad);
148 return numBad;
149 }
150
151 private:
152 ::Kokkos::View<LO*, DT> lclColInds_;
153 ::Kokkos::View<const GO*, DT> gblColInds_;
154 ::Kokkos::View<const OffsetType*, DT> ptr_;
156 ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
157};
158
159} // namespace Impl
160
175template <class LO, class GO, class DT, class OffsetType, class NumEntType>
176OffsetType
178 const Kokkos::View<const GO*, DT>& gblColInds,
179 const Kokkos::View<const OffsetType*, DT>& ptr,
181 const Kokkos::View<const NumEntType*, DT>& numRowEnt) {
182 using Impl::ConvertColumnIndicesFromGlobalToLocal;
184 return impl_type::run(lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
185}
186
187template <class ViewType, class LO>
188class MaxDifference {
189 public:
190 MaxDifference(const ViewType& ptr)
191 : ptr_(ptr) {}
192
193 KOKKOS_INLINE_FUNCTION void init(LO& dst) const {
194 dst = 0;
195 }
196
197 KOKKOS_INLINE_FUNCTION void
198 join(LO& dst, const LO& src) const {
199 dst = (src > dst) ? src : dst;
200 }
201
202 KOKKOS_INLINE_FUNCTION void
203 operator()(const LO lclRow, LO& maxNumEnt) const {
204 const LO numEnt = static_cast<LO>(ptr_(lclRow + 1) - ptr_(lclRow));
205 maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
206 }
207
208 private:
209 typename ViewType::const_type ptr_;
210};
211
212template <class ViewType, class LO>
213typename ViewType::non_const_value_type
214maxDifference(const char kernelLabel[],
215 const ViewType& ptr,
216 const LO lclNumRows) {
217 if (lclNumRows == 0) {
218 // mfh 07 May 2018: Weirdly, I need this special case,
219 // otherwise I get the wrong answer.
220 return static_cast<LO>(0);
221 } else {
222 using execution_space = typename ViewType::execution_space;
223 using range_type = Kokkos::RangePolicy<execution_space, LO>;
224 LO theMaxNumEnt{0};
225 Kokkos::parallel_reduce(kernelLabel,
226 range_type(0, lclNumRows),
227 MaxDifference<ViewType, LO>(ptr),
228 theMaxNumEnt);
229 return theMaxNumEnt;
230 }
231}
232
233} // namespace Details
234
235template <class LocalOrdinal, class GlobalOrdinal, class Node>
236bool CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
237 getDebug() {
238 return Details::Behavior::debug("CrsGraph");
239}
240
241template <class LocalOrdinal, class GlobalOrdinal, class Node>
242bool CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
243 getVerbose() {
244 return Details::Behavior::verbose("CrsGraph");
245}
246
247template <class LocalOrdinal, class GlobalOrdinal, class Node>
249 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
250 const size_t maxNumEntriesPerRow,
251 const Teuchos::RCP<Teuchos::ParameterList>& params)
252 : dist_object_type(rowMap)
253 , rowMap_(rowMap)
254 , numAllocForAllRows_(maxNumEntriesPerRow) {
255 const char tfecfFuncName[] =
256 "CrsGraph(rowMap,maxNumEntriesPerRow,params): ";
257 staticAssertions();
258 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid(),
259 std::invalid_argument,
260 "The allocation hint maxNumEntriesPerRow must be "
261 "a valid size_t value, which in this case means it must not be "
262 "Teuchos::OrdinalTraits<size_t>::invalid().");
265}
266
267template <class LocalOrdinal, class GlobalOrdinal, class Node>
269 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
270 const Teuchos::RCP<const map_type>& colMap,
271 const size_t maxNumEntriesPerRow,
272 const Teuchos::RCP<Teuchos::ParameterList>& params)
273 : dist_object_type(rowMap)
274 , rowMap_(rowMap)
275 , colMap_(colMap)
276 , numAllocForAllRows_(maxNumEntriesPerRow) {
277 const char tfecfFuncName[] =
278 "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,params): ";
279 staticAssertions();
281 maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid(),
282 std::invalid_argument,
283 "The allocation hint maxNumEntriesPerRow must be "
284 "a valid size_t value, which in this case means it must not be "
285 "Teuchos::OrdinalTraits<size_t>::invalid().");
288}
289
290template <class LocalOrdinal, class GlobalOrdinal, class Node>
292 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
293 const Teuchos::ArrayView<const size_t>& numEntPerRow,
294 const Teuchos::RCP<Teuchos::ParameterList>& params)
296 , rowMap_(rowMap)
297 , numAllocForAllRows_(0) {
298 const char tfecfFuncName[] =
299 "CrsGraph(rowMap,numEntPerRow,params): ";
300 staticAssertions();
301
302 const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
304 static_cast<size_t>(numEntPerRow.size()) != lclNumRows,
305 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size() << " != the local number of rows " << lclNumRows << " as specified by "
306 "the input row Map.");
307
308 if (debug_) {
309 for (size_t r = 0; r < lclNumRows; ++r) {
310 const size_t curRowCount = numEntPerRow[r];
311 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
312 std::invalid_argument, "numEntPerRow(" << r << ") "
313 "specifies an invalid number of entries "
314 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
315 }
316 }
317
318 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
319 // The latter is a const View, so we have to copy into a nonconst
320 // View first, then assign.
321 typedef decltype(k_numAllocPerRow_) out_view_type;
322 typedef typename out_view_type::non_const_type nc_view_type;
323 typedef Kokkos::View<const size_t*,
324 typename nc_view_type::array_layout,
325 Kokkos::HostSpace,
326 Kokkos::MemoryUnmanaged>
327 in_view_type;
329 nc_view_type numAllocPerRowOut("Tpetra::CrsGraph::numAllocPerRow",
330 lclNumRows);
331 // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
332 using exec_space = typename nc_view_type::execution_space;
333 Kokkos::deep_copy(exec_space(), numAllocPerRowOut, numAllocPerRowIn);
334 k_numAllocPerRow_ = numAllocPerRowOut;
335
336 resumeFill(params);
337 checkInternalState();
338}
339
340template <class LocalOrdinal, class GlobalOrdinal, class Node>
342 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
343 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
344 const Teuchos::RCP<Teuchos::ParameterList>& params)
345 : dist_object_type(rowMap)
346 , rowMap_(rowMap)
347 , k_numAllocPerRow_(numEntPerRow.view_host())
348 , numAllocForAllRows_(0) {
349 const char tfecfFuncName[] =
350 "CrsGraph(rowMap,numEntPerRow,params): ";
351 staticAssertions();
352
353 const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
355 static_cast<size_t>(numEntPerRow.extent(0)) != lclNumRows,
356 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.extent(0) << " != the local number of rows " << lclNumRows << " as specified by "
357 "the input row Map.");
358
359 if (debug_) {
360 for (size_t r = 0; r < lclNumRows; ++r) {
361 const size_t curRowCount = numEntPerRow.view_host()(r);
362 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
363 std::invalid_argument, "numEntPerRow(" << r << ") "
364 "specifies an invalid number of entries "
365 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
366 }
368
369 resumeFill(params);
370 checkInternalState();
371}
372
373template <class LocalOrdinal, class GlobalOrdinal, class Node>
375 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
376 const Teuchos::RCP<const map_type>& colMap,
377 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
378 const Teuchos::RCP<Teuchos::ParameterList>& params)
379 : dist_object_type(rowMap)
380 , rowMap_(rowMap)
381 , colMap_(colMap)
382 , k_numAllocPerRow_(numEntPerRow.view_host())
383 , numAllocForAllRows_(0) {
384 const char tfecfFuncName[] =
385 "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
386 staticAssertions();
388 const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
390 static_cast<size_t>(numEntPerRow.extent(0)) != lclNumRows,
391 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.extent(0) << " != the local number of rows " << lclNumRows << " as specified by "
392 "the input row Map.");
393
394 if (debug_) {
395 for (size_t r = 0; r < lclNumRows; ++r) {
396 const size_t curRowCount = numEntPerRow.view_host()(r);
397 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
398 std::invalid_argument, "numEntPerRow(" << r << ") "
399 "specifies an invalid number of entries "
400 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
401 }
402 }
406}
407
408template <class LocalOrdinal, class GlobalOrdinal, class Node>
410 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
411 const Teuchos::RCP<const map_type>& colMap,
412 const Teuchos::ArrayView<const size_t>& numEntPerRow,
413 const Teuchos::RCP<Teuchos::ParameterList>& params)
414 : dist_object_type(rowMap)
415 , rowMap_(rowMap)
416 , colMap_(colMap)
417 , numAllocForAllRows_(0) {
418 const char tfecfFuncName[] =
419 "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
420 staticAssertions();
421
422 const size_t lclNumRows = rowMap.is_null() ? static_cast<size_t>(0) : rowMap->getLocalNumElements();
424 static_cast<size_t>(numEntPerRow.size()) != lclNumRows,
425 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size() << " != the local number of rows " << lclNumRows << " as specified by "
426 "the input row Map.");
427
428 if (debug_) {
429 for (size_t r = 0; r < lclNumRows; ++r) {
430 const size_t curRowCount = numEntPerRow[r];
431 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(curRowCount == Teuchos::OrdinalTraits<size_t>::invalid(),
432 std::invalid_argument, "numEntPerRow(" << r << ") "
433 "specifies an invalid number of entries "
434 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
435 }
436 }
437
438 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
439 // The latter is a const View, so we have to copy into a nonconst
440 // View first, then assign.
441 typedef decltype(k_numAllocPerRow_) out_view_type;
442 typedef typename out_view_type::non_const_type nc_view_type;
443 typedef Kokkos::View<const size_t*,
444 typename nc_view_type::array_layout,
445 Kokkos::HostSpace,
446 Kokkos::MemoryUnmanaged>
447 in_view_type;
448 in_view_type numAllocPerRowIn(numEntPerRow.getRawPtr(), lclNumRows);
449 nc_view_type numAllocPerRowOut("Tpetra::CrsGraph::numAllocPerRow",
450 lclNumRows);
451 // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
452 using exec_space = typename nc_view_type::execution_space;
453 Kokkos::deep_copy(exec_space(), numAllocPerRowOut, numAllocPerRowIn);
454 k_numAllocPerRow_ = numAllocPerRowOut;
455
456 resumeFill(params);
458}
459
460template <class LocalOrdinal, class GlobalOrdinal, class Node>
463 const Teuchos::RCP<const map_type>& rowMap,
464 const Teuchos::RCP<Teuchos::ParameterList>& params)
465 : dist_object_type(rowMap)
466 , rowMap_(rowMap)
467 , colMap_(originalGraph.colMap_)
468 , numAllocForAllRows_(originalGraph.numAllocForAllRows_)
469 , storageStatus_(originalGraph.storageStatus_)
470 , indicesAreAllocated_(originalGraph.indicesAreAllocated_)
471 , indicesAreLocal_(originalGraph.indicesAreLocal_)
472 , indicesAreSorted_(originalGraph.indicesAreSorted_) {
473 staticAssertions();
474
475 int numRows = rowMap->getLocalNumElements();
476 size_t numNonZeros = originalGraph.getRowPtrsPackedHost()(numRows);
477 auto rowsToUse = Kokkos::pair<size_t, size_t>(0, numRows + 1);
478
479 this->setRowPtrsUnpacked(Kokkos::subview(originalGraph.getRowPtrsUnpackedDevice(), rowsToUse));
480 this->setRowPtrsPacked(Kokkos::subview(originalGraph.getRowPtrsPackedDevice(), rowsToUse));
481
482 if (indicesAreLocal_) {
485 } else {
486 gblInds_wdv = global_inds_wdv_type(originalGraph.gblInds_wdv, 0, numNonZeros);
487 }
488
490}
491
492template <class LocalOrdinal, class GlobalOrdinal, class Node>
494 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
495 const Teuchos::RCP<const map_type>& colMap,
496 const typename local_graph_device_type::row_map_type& rowPointers,
497 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
498 const Teuchos::RCP<Teuchos::ParameterList>& params)
499 : dist_object_type(rowMap)
500 , rowMap_(rowMap)
501 , colMap_(colMap)
502 , numAllocForAllRows_(0)
503 , storageStatus_(Details::STORAGE_1D_PACKED)
504 , indicesAreAllocated_(true)
505 , indicesAreLocal_(true) {
506 staticAssertions();
507 if (!params.is_null() && params->isParameter("sorted") &&
508 !params->get<bool>("sorted")) {
509 indicesAreSorted_ = false;
510 } else {
511 indicesAreSorted_ = true;
512 }
516
517template <class LocalOrdinal, class GlobalOrdinal, class Node>
519 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
520 const Teuchos::RCP<const map_type>& colMap,
521 const Teuchos::ArrayRCP<size_t>& rowPointers,
522 const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices,
523 const Teuchos::RCP<Teuchos::ParameterList>& params)
524 : dist_object_type(rowMap)
525 , rowMap_(rowMap)
526 , colMap_(colMap)
527 , numAllocForAllRows_(0)
528 , storageStatus_(Details::STORAGE_1D_PACKED)
529 , indicesAreAllocated_(true)
530 , indicesAreLocal_(true) {
531 staticAssertions();
532 if (!params.is_null() && params->isParameter("sorted") &&
533 !params->get<bool>("sorted")) {
534 indicesAreSorted_ = false;
535 } else {
536 indicesAreSorted_ = true;
537 }
538 setAllIndices(rowPointers, columnIndices);
539 checkInternalState();
540}
541
542template <class LocalOrdinal, class GlobalOrdinal, class Node>
544 CrsGraph(const Teuchos::RCP<const map_type>& rowMap,
545 const Teuchos::RCP<const map_type>& colMap,
547 const Teuchos::RCP<Teuchos::ParameterList>& params)
549 rowMap,
550 colMap,
551 Teuchos::null,
552 Teuchos::null,
553 params) {}
554
555template <class LocalOrdinal, class GlobalOrdinal, class Node>
558 const Teuchos::RCP<const map_type>& rowMap,
559 const Teuchos::RCP<const map_type>& colMap,
560 const Teuchos::RCP<const map_type>& domainMap,
561 const Teuchos::RCP<const map_type>& rangeMap,
562 const Teuchos::RCP<Teuchos::ParameterList>& params)
564 , rowMap_(rowMap)
565 , colMap_(colMap)
566 , numAllocForAllRows_(0)
567 , storageStatus_(Details::STORAGE_1D_PACKED)
568 , indicesAreAllocated_(true)
569 , indicesAreLocal_(true) {
570 staticAssertions();
571 const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
572
574 colMap.is_null(), std::runtime_error,
575 ": The input column Map must be nonnull.");
577 k_local_graph_.numRows() != rowMap->getLocalNumElements(),
578 std::runtime_error,
579 ": The input row Map and the input local graph need to have the same "
580 "number of rows. The row Map claims "
581 << rowMap->getLocalNumElements()
582 << " row(s), but the local graph claims " << k_local_graph_.numRows()
583 << " row(s).");
584
585 // NOTE (mfh 17 Mar 2014) getLocalNumRows() returns
586 // rowMap_->getLocalNumElements(), but it doesn't have to.
587 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
588 // k_local_graph_.numRows () != getLocalNumRows (), std::runtime_error,
589 // ": The input row Map and the input local graph need to have the same "
590 // "number of rows. The row Map claims " << getLocalNumRows () << " row(s), "
591 // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
593 lclIndsUnpacked_wdv.extent(0) != 0 || gblInds_wdv.extent(0) != 0, std::logic_error,
594 ": cannot have 1D data structures allocated.");
595
596 if (!params.is_null() && params->isParameter("sorted") &&
597 !params->get<bool>("sorted")) {
598 indicesAreSorted_ = false;
599 } else {
600 indicesAreSorted_ = true;
601 }
602
603 setDomainRangeMaps(domainMap.is_null() ? rowMap_ : domainMap,
604 rangeMap.is_null() ? rowMap_ : rangeMap);
605 Teuchos::Array<int> remotePIDs(0); // unused output argument
606 this->makeImportExport(remotePIDs, false);
607
608 lclIndsPacked_wdv = local_inds_wdv_type(k_local_graph_.entries);
610 this->setRowPtrs(k_local_graph_.row_map);
611
612 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
613
614 const bool callComputeGlobalConstants = params.get() == nullptr ||
615 params->get("compute global constants", true);
616
617 if (callComputeGlobalConstants) {
619 }
620 this->fillComplete_ = true;
621 this->checkInternalState();
622}
623
624template <class LocalOrdinal, class GlobalOrdinal, class Node>
627 const Teuchos::RCP<const map_type>& rowMap,
628 const Teuchos::RCP<const map_type>& colMap,
629 const Teuchos::RCP<const map_type>& domainMap,
630 const Teuchos::RCP<const map_type>& rangeMap,
631 const Teuchos::RCP<const import_type>& importer,
632 const Teuchos::RCP<const export_type>& exporter,
633 const Teuchos::RCP<Teuchos::ParameterList>& params)
635 , rowMap_(rowMap)
636 , colMap_(colMap)
637 , rangeMap_(rangeMap.is_null() ? rowMap : rangeMap)
638 , domainMap_(domainMap.is_null() ? rowMap : domainMap)
639 , importer_(importer)
640 , exporter_(exporter)
641 , numAllocForAllRows_(0)
642 , storageStatus_(Details::STORAGE_1D_PACKED)
643 , indicesAreAllocated_(true)
644 , indicesAreLocal_(true) {
645 staticAssertions();
646 const char tfecfFuncName[] =
647 "Tpetra::CrsGraph(local_graph_device_type,"
648 "Map,Map,Map,Map,Import,Export,params): ";
649
650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(colMap.is_null(), std::runtime_error,
651 "The input column Map must be nonnull.");
652
655 setRowPtrs(lclGraph.row_map);
656
657 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
658
659 if (!params.is_null() && params->isParameter("sorted") &&
660 !params->get<bool>("sorted")) {
661 indicesAreSorted_ = false;
662 } else {
664 }
665
666 const bool callComputeGlobalConstants =
667 params.get() == nullptr ||
668 params->get("compute global constants", true);
671 }
672 fillComplete_ = true;
674}
675
676template <class LocalOrdinal, class GlobalOrdinal, class Node>
678 CrsGraph(const row_ptrs_device_view_type& rowPointers,
680 const Teuchos::RCP<const map_type>& rowMap,
681 const Teuchos::RCP<const map_type>& colMap,
682 const Teuchos::RCP<const map_type>& domainMap,
683 const Teuchos::RCP<const map_type>& rangeMap,
684 const Teuchos::RCP<const import_type>& importer,
685 const Teuchos::RCP<const export_type>& exporter,
686 const Teuchos::RCP<Teuchos::ParameterList>& params)
688 , rowMap_(rowMap)
689 , colMap_(colMap)
690 , rangeMap_(rangeMap.is_null() ? rowMap : rangeMap)
691 , domainMap_(domainMap.is_null() ? rowMap : domainMap)
692 , importer_(importer)
693 , exporter_(exporter)
694 , numAllocForAllRows_(0)
695 , storageStatus_(Details::STORAGE_1D_PACKED)
696 , indicesAreAllocated_(true)
697 , indicesAreLocal_(true) {
698 staticAssertions();
699 const char tfecfFuncName[] =
700 "Tpetra::CrsGraph(row_ptrs_device_view_type,local_inds_wdv_type"
701 "Map,Map,Map,Map,Import,Export,params): ";
702
703 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(colMap.is_null(), std::runtime_error,
704 "The input column Map must be nonnull.");
706 lclIndsPacked_wdv = columnIndices;
707 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
708 setRowPtrs(rowPointers);
709
710 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
711
712 if (!params.is_null() && params->isParameter("sorted") &&
713 !params->get<bool>("sorted")) {
714 indicesAreSorted_ = false;
715 } else {
716 indicesAreSorted_ = true;
717 }
718
719 const bool callComputeGlobalConstants =
720 params.get() == nullptr ||
721 params->get("compute global constants", true);
722 if (callComputeGlobalConstants) {
723 this->computeGlobalConstants();
724 }
725 fillComplete_ = true;
726 checkInternalState();
727}
728
729template <class LocalOrdinal, class GlobalOrdinal, class Node>
730Teuchos::RCP<const Teuchos::ParameterList>
732 getValidParameters() const {
733 using Teuchos::ParameterList;
734 using Teuchos::parameterList;
735 using Teuchos::RCP;
736
737 RCP<ParameterList> params = parameterList("Tpetra::CrsGraph");
738
739 // Make a sublist for the Import.
741
742 // FIXME (mfh 02 Apr 2012) We should really have the Import and
743 // Export objects fill in these lists. However, we don't want to
744 // create an Import or Export unless we need them. For now, we
745 // know that the Import and Export just pass the list directly to
746 // their Distributor, so we can create a Distributor here
747 // (Distributor's constructor is a lightweight operation) and have
748 // it fill in the list.
750 // Fill in Distributor default parameters by creating a
751 // Distributor and asking it to do the work.
752 Distributor distributor(rowMap_->getComm(), importSublist);
753 params->set("Import", *importSublist, "How the Import performs communication.");
754
755 // Make a sublist for the Export. For now, it's a clone of the
756 // Import sublist. It's not a shallow copy, though, since we
757 // might like the Import to do communication differently than the
758 // Export.
759 params->set("Export", *importSublist, "How the Export performs communication.");
760
761 return params;
763
764template <class LocalOrdinal, class GlobalOrdinal, class Node>
766 setParameterList(const Teuchos::RCP<Teuchos::ParameterList>& params) {
767 Teuchos::RCP<const Teuchos::ParameterList> validParams =
768 getValidParameters();
769 params->validateParametersAndSetDefaults(*validParams);
770 this->setMyParamList(params);
771}
772
773template <class LocalOrdinal, class GlobalOrdinal, class Node>
776 getGlobalNumRows() const {
777 return rowMap_->getGlobalNumElements();
778}
779
780template <class LocalOrdinal, class GlobalOrdinal, class Node>
784 const char tfecfFuncName[] = "getGlobalNumCols: ";
786 !isFillComplete() || getDomainMap().is_null(), std::runtime_error,
787 "The graph does not have a domain Map. You may not call this method in "
788 "that case.");
789 return getDomainMap()->getGlobalNumElements();
790}
791
792template <class LocalOrdinal, class GlobalOrdinal, class Node>
793size_t
795 getLocalNumRows() const {
796 return this->rowMap_.is_null() ? static_cast<size_t>(0) : this->rowMap_->getLocalNumElements();
797}
798
799template <class LocalOrdinal, class GlobalOrdinal, class Node>
800size_t
802 getLocalNumCols() const {
803 const char tfecfFuncName[] = "getLocalNumCols: ";
805 !hasColMap(), std::runtime_error,
806 "The graph does not have a column Map. You may not call this method "
807 "unless the graph has a column Map. This requires either that a custom "
808 "column Map was given to the constructor, or that fillComplete() has "
809 "been called.");
810 return colMap_.is_null() ? static_cast<size_t>(0) : colMap_->getLocalNumElements();
811}
812
813template <class LocalOrdinal, class GlobalOrdinal, class Node>
814Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
816 getRowMap() const {
817 return rowMap_;
818}
819
820template <class LocalOrdinal, class GlobalOrdinal, class Node>
821Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
823 getColMap() const {
824 return colMap_;
825}
826
827template <class LocalOrdinal, class GlobalOrdinal, class Node>
828Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
830 getDomainMap() const {
831 return domainMap_;
832}
833
834template <class LocalOrdinal, class GlobalOrdinal, class Node>
835Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
837 getRangeMap() const {
838 return rangeMap_;
839}
840
841template <class LocalOrdinal, class GlobalOrdinal, class Node>
842Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
844 getImporter() const {
845 return importer_;
846}
847
848template <class LocalOrdinal, class GlobalOrdinal, class Node>
849Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
851 getExporter() const {
852 return exporter_;
853}
854
855template <class LocalOrdinal, class GlobalOrdinal, class Node>
857 hasColMap() const {
858 return !colMap_.is_null();
859}
860
861template <class LocalOrdinal, class GlobalOrdinal, class Node>
863 isStorageOptimized() const {
864 // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
865 // getLocalNumRows() is zero?
866
867 const bool isOpt = indicesAreAllocated_ &&
868 k_numRowEntries_.extent(0) == 0 &&
869 getLocalNumRows() > 0;
870
871 return isOpt;
872}
873
874template <class LocalOrdinal, class GlobalOrdinal, class Node>
877 getGlobalNumEntries() const {
878 const char tfecfFuncName[] = "getGlobalNumEntries: ";
879 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->haveGlobalConstants_, std::logic_error,
880 "The graph does not have global constants computed, "
881 "but the user has requested them.");
882
883 return globalNumEntries_;
884}
885
886template <class LocalOrdinal, class GlobalOrdinal, class Node>
887size_t
889 getLocalNumEntries() const {
890 const char tfecfFuncName[] = "getLocalNumEntries: ";
891 typedef LocalOrdinal LO;
892
893 if (this->indicesAreAllocated_) {
894 const LO lclNumRows = this->getLocalNumRows();
895 if (lclNumRows == 0) {
896 return static_cast<size_t>(0);
897 } else {
898 // Avoid the "*this capture" issue by creating a local Kokkos::View.
899 auto numEntPerRow = this->k_numRowEntries_;
900 const LO numNumEntPerRow = numEntPerRow.extent(0);
901 if (numNumEntPerRow == 0) {
902 if (static_cast<LO>(this->getRowPtrsPackedDevice().extent(0)) <
903 static_cast<LO>(lclNumRows + 1)) {
904 return static_cast<size_t>(0);
905 } else {
906 // indices are allocated and k_numRowEntries_ is not allocated,
907 // so we have packed storage and the length of lclIndsPacked_wdv
908 // must be the number of local entries.
909 if (debug_) {
910 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->getRowPtrsPackedHost()(lclNumRows) != lclIndsPacked_wdv.extent(0), std::logic_error,
911 "Final entry of packed host rowptrs doesn't match the length of lclIndsPacked");
913 return lclIndsPacked_wdv.extent(0);
914 }
915 } else { // k_numRowEntries_ is populated
916 // k_numRowEntries_ is actually be a host View, so we run
917 // the sum in its native execution space. This also means
918 // that we can use explicit capture (which could perhaps
919 // improve build time) instead of KOKKOS_LAMBDA, and avoid
920 // any CUDA build issues with trying to run a __device__ -
921 // only function on host.
922 typedef typename num_row_entries_type::execution_space
923 host_exec_space;
924 typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
925
927 size_t nodeNumEnt = 0;
928 Kokkos::parallel_reduce(
929 "Tpetra::CrsGraph::getNumNodeEntries",
930 range_type(0, upperLoopBound),
931 [=](const LO& k, size_t& lclSum) {
933 },
934 nodeNumEnt);
935 return nodeNumEnt;
936 }
938 } else { // nothing allocated on this process, so no entries
939 return static_cast<size_t>(0);
941}
942
943template <class LocalOrdinal, class GlobalOrdinal, class Node>
947 const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
948 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->haveGlobalConstants_, std::logic_error,
949 "The graph does not have global constants computed, "
950 "but the user has requested them.");
951
952 return globalMaxNumRowEntries_;
953}
954
955template <class LocalOrdinal, class GlobalOrdinal, class Node>
956size_t
959 return nodeMaxNumRowEntries_;
960}
962template <class LocalOrdinal, class GlobalOrdinal, class Node>
964 isFillComplete() const {
965 return fillComplete_;
966}
967
968template <class LocalOrdinal, class GlobalOrdinal, class Node>
971 return !fillComplete_;
972}
973
974template <class LocalOrdinal, class GlobalOrdinal, class Node>
976 isLocallyIndexed() const {
977 return indicesAreLocal_;
978}
979
980template <class LocalOrdinal, class GlobalOrdinal, class Node>
982 isGloballyIndexed() const {
983 return indicesAreGlobal_;
984}
985
986template <class LocalOrdinal, class GlobalOrdinal, class Node>
987size_t
990 typedef LocalOrdinal LO;
992 if (this->indicesAreAllocated_) {
993 const LO lclNumRows = this->getLocalNumRows();
994 if (lclNumRows == 0) {
995 return static_cast<size_t>(0);
996 } else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
997 if (static_cast<LO>(this->getRowPtrsPackedDevice().extent(0)) <
998 static_cast<LO>(lclNumRows + 1)) {
999 return static_cast<size_t>(0);
1000 } else {
1001 if (this->isLocallyIndexed())
1002 return lclIndsPacked_wdv.extent(0);
1003 else
1004 return gblInds_wdv.extent(0);
1005 }
1006 } else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1007 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1008 if (rowPtrsUnpacked_host.extent(0) == 0) {
1009 return static_cast<size_t>(0);
1010 } else {
1011 if (this->isLocallyIndexed())
1012 return lclIndsUnpacked_wdv.extent(0);
1013 else
1014 return gblInds_wdv.extent(0);
1015 }
1016 } else {
1017 return static_cast<size_t>(0);
1018 }
1019 } else {
1020 return Tpetra::Details::OrdinalTraits<size_t>::invalid();
1021 }
1022}
1023
1024template <class LocalOrdinal, class GlobalOrdinal, class Node>
1025Teuchos::RCP<const Teuchos::Comm<int>>
1027 getComm() const {
1028 return this->rowMap_.is_null() ? Teuchos::null : this->rowMap_->getComm();
1029}
1031template <class LocalOrdinal, class GlobalOrdinal, class Node>
1034 getIndexBase() const {
1035 return rowMap_->getIndexBase();
1036}
1037
1038template <class LocalOrdinal, class GlobalOrdinal, class Node>
1040 indicesAreAllocated() const {
1041 return indicesAreAllocated_;
1042}
1043
1044template <class LocalOrdinal, class GlobalOrdinal, class Node>
1046 isSorted() const {
1047 return indicesAreSorted_;
1048}
1049
1050template <class LocalOrdinal, class GlobalOrdinal, class Node>
1052 isMerged() const {
1053 return noRedundancies_;
1054}
1055
1056template <class LocalOrdinal, class GlobalOrdinal, class Node>
1059 // FIXME (mfh 07 May 2013) How do we know that the change
1060 // introduced a redundancy, or even that it invalidated the sorted
1061 // order of indices? CrsGraph has always made this conservative
1062 // guess. It could be a bit costly to check at insertion time,
1063 // though.
1064 indicesAreSorted_ = false;
1065 noRedundancies_ = false;
1066
1067 // We've modified the graph, so we'll have to recompute local
1068 // constants like the number of diagonal entries on this process.
1069 haveLocalConstants_ = false;
1070}
1072template <class LocalOrdinal, class GlobalOrdinal, class Node>
1074 allocateIndices(const ELocalGlobal lg, const bool verbose) {
1076 using std::endl;
1077 using Teuchos::arcp;
1078 using Teuchos::Array;
1079 using Teuchos::ArrayRCP;
1080 typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1081 typedef typename local_graph_device_type::row_map_type::non_const_type
1083 const char tfecfFuncName[] = "allocateIndices: ";
1084 const char suffix[] =
1085 " Please report this bug to the Tpetra developers.";
1086 ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1087
1088 std::unique_ptr<std::string> prefix;
1089 if (verbose) {
1090 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1091 std::ostringstream os;
1092 os << *prefix << "Start: lg="
1093 << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1094 << ", numRows: " << this->getLocalNumRows() << endl;
1095 std::cerr << os.str();
1096 }
1098 // This is a protected function, only callable by us. If it was
1099 // called incorrectly, it is our fault. That's why the tests
1100 // below throw std::logic_error instead of std::invalid_argument.
1101 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isLocallyIndexed() && lg == GlobalIndices, std::logic_error,
1102 ": The graph is locally indexed, but Tpetra code is calling "
1103 "this method with lg=GlobalIndices."
1104 << suffix);
1105 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isGloballyIndexed() && lg == LocalIndices, std::logic_error,
1106 ": The graph is globally indexed, but Tpetra code is calling "
1107 "this method with lg=LocalIndices."
1108 << suffix);
1109 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreAllocated(), std::logic_error,
1110 ": The graph's "
1111 "indices are already allocated, but Tpetra is calling "
1112 "allocateIndices again."
1113 << suffix);
1114 const size_t numRows = this->getLocalNumRows();
1115
1116 //
1117 // STATIC ALLOCATION PROFILE
1118 //
1119 size_type numInds = 0;
1120 {
1121 if (verbose) {
1122 std::ostringstream os;
1123 os << *prefix << "Allocate k_rowPtrs: " << (numRows + 1) << endl;
1124 std::cerr << os.str();
1125 }
1126 non_const_row_map_type k_rowPtrs("Tpetra::CrsGraph::ptr", numRows + 1);
1127
1128 if (this->k_numAllocPerRow_.extent(0) != 0) {
1129 // It's OK to throw std::invalid_argument here, because we
1130 // haven't incurred any side effects yet. Throwing that
1131 // exception (and not, say, std::logic_error) implies that the
1132 // instance can recover.
1133 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->k_numAllocPerRow_.extent(0) != numRows,
1134 std::invalid_argument,
1135 "k_numAllocPerRow_ is allocated, that is, "
1136 "has nonzero length "
1137 << this->k_numAllocPerRow_.extent(0)
1138 << ", but its length != numRows = " << numRows << ".");
1139
1140 // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1141 // we want to compute here) lives on device. That's OK;
1142 // computeOffsetsFromCounts can handle this case.
1144
1145 // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1146 // doesn't attempt to check its input for "invalid" flag
1147 // values. For now, we omit that feature of the sequential
1148 // code disabled below.
1149 numInds = computeOffsetsFromCounts(k_rowPtrs, k_numAllocPerRow_);
1150 } else {
1151 // It's OK to throw std::invalid_argument here, because we
1152 // haven't incurred any side effects yet. Throwing that
1153 // exception (and not, say, std::logic_error) implies that the
1154 // instance can recover.
1155 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->numAllocForAllRows_ ==
1156 Tpetra::Details::OrdinalTraits<size_t>::invalid(),
1157 std::invalid_argument,
1158 "numAllocForAllRows_ has an invalid value, "
1159 "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = "
1160 << Tpetra::Details::OrdinalTraits<size_t>::invalid() << ".");
1161
1163 numInds = computeOffsetsFromConstantCount(k_rowPtrs, this->numAllocForAllRows_);
1164 }
1165 // "Commit" the resulting row offsets.
1166 setRowPtrsUnpacked(k_rowPtrs);
1167 }
1168 if (debug_) {
1169 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numInds != size_type(this->getRowPtrsUnpackedHost()(numRows)), std::logic_error,
1170 ": Number of indices produced by computeOffsetsFrom[Constant]Counts "
1171 "does not match final entry of rowptrs unpacked");
1172 }
1173
1174 if (lg == LocalIndices) {
1175 if (verbose) {
1176 std::ostringstream os;
1177 os << *prefix << "Allocate local column indices "
1178 "lclIndsUnpacked_wdv: "
1179 << numInds << endl;
1180 std::cerr << os.str();
1182 lclIndsUnpacked_wdv = local_inds_wdv_type(
1183 local_inds_dualv_type("Tpetra::CrsGraph::lclInd", numInds));
1184 } else {
1185 if (verbose) {
1186 std::ostringstream os;
1187 os << *prefix << "Allocate global column indices "
1188 "gblInds_wdv: "
1189 << numInds << endl;
1190 std::cerr << os.str();
1191 }
1192 gblInds_wdv = global_inds_wdv_type(
1193 global_inds_dualv_type("Tpetra::CrsGraph::gblInd", numInds));
1194 }
1195 storageStatus_ = Details::STORAGE_1D_UNPACKED;
1196
1197 this->indicesAreLocal_ = (lg == LocalIndices);
1198 this->indicesAreGlobal_ = (lg == GlobalIndices);
1199
1200 if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1201 using Kokkos::ViewAllocateWithoutInitializing;
1202 const char label[] = "Tpetra::CrsGraph::numRowEntries";
1203 if (verbose) {
1204 std::ostringstream os;
1205 os << *prefix << "Allocate k_numRowEntries_: " << numRows
1206 << endl;
1207 std::cerr << os.str();
1208 }
1209 num_row_entries_type numRowEnt(ViewAllocateWithoutInitializing(label), numRows);
1210 // DEEP_COPY REVIEW - VALUE-TO-HOSTMIRROR
1211 Kokkos::deep_copy(execution_space(), numRowEnt, static_cast<size_t>(0)); // fill w/ 0s
1212 Kokkos::fence("CrsGraph::allocateIndices"); // TODO: Need to understand downstream failure points and move this fence.
1213 this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1214 }
1215
1216 // Once indices are allocated, CrsGraph needs to free this information.
1217 this->numAllocForAllRows_ = 0;
1218 this->k_numAllocPerRow_ = decltype(k_numAllocPerRow_)();
1219 this->indicesAreAllocated_ = true;
1220
1221 try {
1222 this->checkInternalState();
1223 } catch (std::logic_error& e) {
1224 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
1225 "At end of allocateIndices, "
1226 "checkInternalState threw std::logic_error: "
1227 << e.what());
1228 } catch (std::exception& e) {
1229 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1230 "At end of allocateIndices, "
1231 "checkInternalState threw std::exception: "
1232 << e.what());
1233 } catch (...) {
1234 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1235 "At end of allocateIndices, "
1236 "checkInternalState threw an exception "
1237 "not a subclass of std::exception.");
1238 }
1239
1240 if (verbose) {
1241 std::ostringstream os;
1242 os << *prefix << "Done" << endl;
1243 std::cerr << os.str();
1244 }
1245}
1246
1247template <class LocalOrdinal, class GlobalOrdinal, class Node>
1248typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1249 local_inds_dualv_type::t_host::const_type
1252 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1253 return typename local_inds_dualv_type::t_host::const_type();
1254 else
1255 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1256 rowinfo.allocSize,
1257 Access::ReadOnly);
1258}
1259
1260template <class LocalOrdinal, class GlobalOrdinal, class Node>
1262 local_inds_dualv_type::t_host
1265 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1266 return typename local_inds_dualv_type::t_host();
1267 else
1268 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1269 rowinfo.allocSize,
1270 Access::ReadWrite);
1271}
1272
1273template <class LocalOrdinal, class GlobalOrdinal, class Node>
1275 global_inds_dualv_type::t_host::const_type
1278 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1279 return typename global_inds_dualv_type::t_host::const_type();
1280 else
1281 return gblInds_wdv.getHostSubview(rowinfo.offset1D,
1282 rowinfo.allocSize,
1283 Access::ReadOnly);
1284}
1285
1286template <class LocalOrdinal, class GlobalOrdinal, class Node>
1288 local_inds_dualv_type::t_dev::const_type
1291 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1292 return typename local_inds_dualv_type::t_dev::const_type();
1293 else
1294 return lclIndsUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
1295 rowinfo.allocSize,
1296 Access::ReadOnly);
1297}
1298
1299template <class LocalOrdinal, class GlobalOrdinal, class Node>
1301 global_inds_dualv_type::t_dev::const_type
1304 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1305 return typename global_inds_dualv_type::t_dev::const_type();
1306 else
1307 return gblInds_wdv.getDeviceSubview(rowinfo.offset1D,
1308 rowinfo.allocSize,
1309 Access::ReadOnly);
1310}
1311
1312template <class LocalOrdinal, class GlobalOrdinal, class Node>
1313RowInfo
1315 getRowInfo(const LocalOrdinal myRow) const {
1316 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
1317 RowInfo ret;
1318 if (this->rowMap_.is_null() || !this->rowMap_->isNodeLocalElement(myRow)) {
1319 ret.localRow = STINV;
1320 ret.allocSize = 0;
1321 ret.numEntries = 0;
1322 ret.offset1D = STINV;
1323 return ret;
1324 }
1325
1326 ret.localRow = static_cast<size_t>(myRow);
1327 if (this->indicesAreAllocated()) {
1328 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1329 // Offsets tell us the allocation size in this case.
1330 if (rowPtrsUnpacked_host.extent(0) == 0) {
1331 ret.offset1D = 0;
1332 ret.allocSize = 0;
1333 } else {
1334 ret.offset1D = rowPtrsUnpacked_host(myRow);
1336 }
1337
1338 ret.numEntries = (this->k_numRowEntries_.extent(0) == 0) ? ret.allocSize : this->k_numRowEntries_(myRow);
1339 } else { // haven't performed allocation yet; probably won't hit this code
1340 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1341 // allocate, rather than doing lazy allocation at first insert.
1342 // This will make k_numAllocPerRow_ obsolete.
1343 ret.allocSize = (this->k_numAllocPerRow_.extent(0) != 0) ? this->k_numAllocPerRow_(myRow) : // this is a host View
1344 this->numAllocForAllRows_;
1345 ret.numEntries = 0;
1346 ret.offset1D = STINV;
1347 }
1348
1349 return ret;
1350}
1351
1352template <class LocalOrdinal, class GlobalOrdinal, class Node>
1353RowInfo
1356 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
1357 RowInfo ret;
1358 if (this->rowMap_.is_null()) {
1359 ret.localRow = STINV;
1360 ret.allocSize = 0;
1361 ret.numEntries = 0;
1362 ret.offset1D = STINV;
1363 return ret;
1364 }
1365 const LocalOrdinal myRow = this->rowMap_->getLocalElement(gblRow);
1366 if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid()) {
1367 ret.localRow = STINV;
1368 ret.allocSize = 0;
1369 ret.numEntries = 0;
1370 ret.offset1D = STINV;
1371 return ret;
1372 }
1373
1374 ret.localRow = static_cast<size_t>(myRow);
1375 if (this->indicesAreAllocated()) {
1376 // graph data structures have the info that we need
1377 //
1378 // if static graph, offsets tell us the allocation size
1379 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1380 if (rowPtrsUnpacked_host.extent(0) == 0) {
1381 ret.offset1D = 0;
1382 ret.allocSize = 0;
1383 } else {
1384 ret.offset1D = rowPtrsUnpacked_host(myRow);
1387
1388 ret.numEntries = (this->k_numRowEntries_.extent(0) == 0) ? ret.allocSize : this->k_numRowEntries_(myRow);
1389 } else { // haven't performed allocation yet; probably won't hit this code
1390 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1391 // allocate, rather than doing lazy allocation at first insert.
1392 // This will make k_numAllocPerRow_ obsolete.
1393 ret.allocSize = (this->k_numAllocPerRow_.extent(0) != 0) ? this->k_numAllocPerRow_(myRow) : // this is a host View
1394 this->numAllocForAllRows_;
1395 ret.numEntries = 0;
1396 ret.offset1D = STINV;
1397 }
1398
1399 return ret;
1400}
1401
1402template <class LocalOrdinal, class GlobalOrdinal, class Node>
1403void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1404 staticAssertions() const {
1405 using Teuchos::OrdinalTraits;
1406 typedef LocalOrdinal LO;
1407 typedef GlobalOrdinal GO;
1408 typedef global_size_t GST;
1409
1410 // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1411 // This is so that we can store local indices in the memory
1412 // formerly occupied by global indices.
1413 static_assert(sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal),
1414 "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1415 // Assumption: max(size_t) >= max(LocalOrdinal)
1416 // This is so that we can represent any LocalOrdinal as a size_t.
1417 static_assert(sizeof(size_t) >= sizeof(LocalOrdinal),
1418 "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1419 static_assert(sizeof(GST) >= sizeof(size_t),
1420 "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1421
1422 // FIXME (mfh 30 Sep 2015) We're not using
1423 // Teuchos::CompileTimeAssert any more. Can we do these checks
1424 // with static_assert?
1425
1426 // can't call max() with CompileTimeAssert, because it isn't a
1427 // constant expression; will need to make this a runtime check
1428 const char msg[] =
1429 "Tpetra::CrsGraph: Object cannot be created with the "
1430 "given template arguments: size assumptions are not valid.";
1432 static_cast<size_t>(Teuchos::OrdinalTraits<LO>::max()) > Teuchos::OrdinalTraits<size_t>::max(),
1433 std::runtime_error, msg);
1435 static_cast<GST>(Teuchos::OrdinalTraits<LO>::max()) > static_cast<GST>(Teuchos::OrdinalTraits<GO>::max()),
1436 std::runtime_error, msg);
1438 static_cast<size_t>(Teuchos::OrdinalTraits<GO>::max()) > Teuchos::OrdinalTraits<GST>::max(),
1439 std::runtime_error, msg);
1441 Teuchos::OrdinalTraits<size_t>::max() > Teuchos::OrdinalTraits<GST>::max(),
1442 std::runtime_error, msg);
1443}
1444
1445template <class LocalOrdinal, class GlobalOrdinal, class Node>
1446size_t
1449 const SLocalGlobalViews& newInds,
1450 const ELocalGlobal lg,
1451 const ELocalGlobal I) {
1452 using Teuchos::ArrayView;
1453 typedef LocalOrdinal LO;
1454 typedef GlobalOrdinal GO;
1455 const char tfecfFuncName[] = "insertIndices: ";
1456
1457 size_t oldNumEnt = 0;
1458 if (debug_) {
1459 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1460 "lg must be either GlobalIndices or LocalIndices.");
1461 oldNumEnt = this->getNumEntriesInLocalRow(rowinfo.localRow);
1462 }
1463
1464 size_t numNewInds = 0;
1465 if (lg == GlobalIndices) { // input indices are global
1467 numNewInds = new_ginds.size();
1468 if (I == GlobalIndices) { // store global indices
1469 auto gind_view = gblInds_wdv.getHostView(Access::ReadWrite);
1470 if (debug_) {
1471 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(gind_view.size()) <
1472 rowinfo.numEntries + numNewInds,
1473 std::logic_error,
1474 "gind_view.size() = " << gind_view.size()
1475 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1476 << ") + numNewInds (= " << numNewInds << ").");
1477 }
1478 GO* const gblColInds_out = gind_view.data() + rowinfo.offset1D + rowinfo.numEntries;
1479 for (size_t k = 0; k < numNewInds; ++k) {
1481 }
1482 } else if (I == LocalIndices) { // store local indices
1483 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1484 if (debug_) {
1485 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(lind_view.size()) <
1486 rowinfo.numEntries + numNewInds,
1487 std::logic_error,
1488 "lind_view.size() = " << lind_view.size()
1489 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1490 << ") + numNewInds (= " << numNewInds << ").");
1492 LO* const lclColInds_out = lind_view.data() + rowinfo.offset1D + rowinfo.numEntries;
1493 for (size_t k = 0; k < numNewInds; ++k) {
1494 lclColInds_out[k] = colMap_->getLocalElement(new_ginds[k]);
1495 }
1496 }
1497 } else if (lg == LocalIndices) { // input indices are local
1498 ArrayView<const LO> new_linds = newInds.linds;
1499 numNewInds = new_linds.size();
1500 if (I == LocalIndices) { // store local indices
1501 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1502 if (debug_) {
1503 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(lind_view.size()) <
1504 rowinfo.numEntries + numNewInds,
1505 std::logic_error,
1506 "lind_view.size() = " << lind_view.size()
1507 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1508 << ") + numNewInds (= " << numNewInds << ").");
1509 }
1510 LO* const lclColInds_out = lind_view.data() + rowinfo.offset1D + rowinfo.numEntries;
1511 for (size_t k = 0; k < numNewInds; ++k) {
1512 lclColInds_out[k] = new_linds[k];
1513 }
1514 } else if (I == GlobalIndices) {
1515 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
1516 "The case where the input indices are local "
1517 "and the indices to write are global (lg=LocalIndices, I="
1518 "GlobalIndices) is not implemented, because it does not make sense."
1519 << std::endl
1520 << "If you have correct local column indices, that "
1521 "means the graph has a column Map. In that case, you should be "
1522 "storing local indices.");
1524 }
1525
1526 rowinfo.numEntries += numNewInds;
1527 this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1528 this->setLocallyModified();
1529
1530 if (debug_) {
1531 const size_t chkNewNumEnt =
1532 this->getNumEntriesInLocalRow(rowinfo.localRow);
1533 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1534 "chkNewNumEnt = " << chkNewNumEnt
1535 << " != oldNumEnt (= " << oldNumEnt
1536 << ") + numNewInds (= " << numNewInds << ").");
1537 }
1538
1539 return numNewInds;
1540}
1541
1542template <class LocalOrdinal, class GlobalOrdinal, class Node>
1543size_t
1547 const size_t numInputInds) {
1548 return this->insertGlobalIndicesImpl(this->getRowInfo(lclRow),
1550}
1551
1552template <class LocalOrdinal, class GlobalOrdinal, class Node>
1553size_t
1557 const size_t numInputInds,
1558 std::function<void(const size_t, const size_t, const size_t)> fun) {
1560 using Kokkos::MemoryUnmanaged;
1561 using Kokkos::subview;
1562 using Kokkos::View;
1563 using Teuchos::ArrayView;
1564 using LO = LocalOrdinal;
1565 using GO = GlobalOrdinal;
1566 const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1567 const LO lclRow = static_cast<LO>(rowInfo.localRow);
1568
1569 auto numEntries = rowInfo.numEntries;
1572 size_t numInserted;
1573 {
1574 auto gblIndsHostView = this->gblInds_wdv.getHostView(Access::ReadWrite);
1575 numInserted = Details::insertCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1577 numEntries, inputInds, fun);
1578 }
1579
1580 const bool insertFailed =
1581 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1582 if (insertFailed) {
1583 constexpr size_t ONE(1);
1584 const int myRank = this->getComm()->getRank();
1585 std::ostringstream os;
1586
1587 os << "Proc " << myRank << ": Not enough capacity to insert "
1588 << numInputInds
1589 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1590 << " into local row " << lclRow << ", which currently has "
1591 << rowInfo.numEntries
1592 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1593 << " and total allocation size " << rowInfo.allocSize
1594 << ". ";
1595 const size_t maxNumToPrint =
1598 numInputInds);
1599 verbosePrintArray(os, inputGblColIndsView,
1600 "Input global "
1601 "column indices",
1603 os << ", ";
1604 auto curGblColInds = getGlobalIndsViewHost(rowInfo);
1606 rowInfo.numEntries);
1607 verbosePrintArray(os, curGblColIndsView,
1608 "Current global "
1609 "column indices",
1611 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
1612 }
1613
1614 this->k_numRowEntries_(lclRow) += numInserted;
1615
1616 this->setLocallyModified();
1617 return numInserted;
1618}
1619
1620template <class LocalOrdinal, class GlobalOrdinal, class Node>
1623 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1624 std::function<void(const size_t, const size_t, const size_t)> fun) {
1625 using Kokkos::MemoryUnmanaged;
1626 using Kokkos::subview;
1627 using Kokkos::View;
1628 using LO = LocalOrdinal;
1629 const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1630
1631 const RowInfo rowInfo = this->getRowInfo(myRow);
1632
1633 size_t numNewInds = 0;
1634 size_t newNumEntries = 0;
1635
1636 auto numEntries = rowInfo.numEntries;
1637 // Note: Teuchos::ArrayViews are in HostSpace
1639 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1640 size_t numInserted = 0;
1641 {
1642 auto lclInds = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1643 numInserted = Details::insertCrsIndices(myRow, this->getRowPtrsUnpackedHost(), lclInds,
1644 numEntries, inputInds, fun);
1645 }
1646
1647 const bool insertFailed =
1648 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1649 if (insertFailed) {
1650 constexpr size_t ONE(1);
1651 const size_t numInputInds(indices.size());
1652 const int myRank = this->getComm()->getRank();
1653 std::ostringstream os;
1654 os << "On MPI Process " << myRank << ": Not enough capacity to "
1655 "insert "
1656 << numInputInds
1657 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1658 << " into local row " << myRow << ", which currently has "
1659 << rowInfo.numEntries
1660 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1661 << " and total allocation size " << rowInfo.allocSize << ".";
1662 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
1663 }
1665 newNumEntries = rowInfo.numEntries + numNewInds;
1666
1667 this->k_numRowEntries_(myRow) += numNewInds;
1668 this->setLocallyModified();
1669
1670 if (debug_) {
1671 const size_t chkNewNumEntries = this->getNumEntriesInLocalRow(myRow);
1673 "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1674 << " != newNumEntries = " << newNumEntries
1675 << ". Please report this bug to the Tpetra developers.");
1676 }
1677}
1678
1679template <class LocalOrdinal, class GlobalOrdinal, class Node>
1680size_t
1683 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1684 std::function<void(const size_t, const size_t, const size_t)> fun) const {
1685 using GO = GlobalOrdinal;
1686 using Kokkos::MemoryUnmanaged;
1687 using Kokkos::View;
1688 auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1689
1691 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1692
1693 size_t numFound = 0;
1694 LocalOrdinal lclRow = rowInfo.localRow;
1695 if (this->isLocallyIndexed()) {
1696 if (this->colMap_.is_null())
1697 return invalidCount;
1698 const auto& colMap = *(this->colMap_);
1699 auto map = [&](GO const gblInd) { return colMap.getLocalElement(gblInd); };
1700 numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1701 rowInfo.numEntries,
1702 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly), inputInds, map, fun);
1703 } else if (this->isGloballyIndexed()) {
1704 numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1705 rowInfo.numEntries,
1706 gblInds_wdv.getHostView(Access::ReadOnly), inputInds, fun);
1708 return numFound;
1709}
1710
1711template <class LocalOrdinal, class GlobalOrdinal, class Node>
1712size_t
1715 const bool sorted,
1716 const bool merged) {
1717 const size_t origNumEnt = rowInfo.numEntries;
1718 if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid() &&
1719 origNumEnt != 0) {
1720 auto lclColInds = this->getLocalIndsViewHostNonConst(rowInfo);
1721
1722 LocalOrdinal* const lclColIndsRaw = lclColInds.data();
1723 if (!sorted) {
1725 }
1726
1727 if (!merged) {
1729 LocalOrdinal* const end = beg + rowInfo.numEntries;
1730 LocalOrdinal* const newend = std::unique(beg, end);
1731 const size_t newNumEnt = newend - beg;
1732
1733 // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
1734 this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
1735 return origNumEnt - newNumEnt; // the number of duplicates in the row
1736 } else {
1737 return static_cast<size_t>(0); // assume no duplicates
1738 }
1739 } else {
1740 return static_cast<size_t>(0); // no entries in the row
1741 }
1742}
1743
1744template <class LocalOrdinal, class GlobalOrdinal, class Node>
1746 setDomainRangeMaps(const Teuchos::RCP<const map_type>& domainMap,
1747 const Teuchos::RCP<const map_type>& rangeMap) {
1748 // simple pointer comparison for equality
1749 if (domainMap_ != domainMap) {
1750 domainMap_ = domainMap;
1751 importer_ = Teuchos::null;
1752 }
1753 if (rangeMap_ != rangeMap) {
1754 rangeMap_ = rangeMap;
1755 exporter_ = Teuchos::null;
1756 }
1757}
1758
1759template <class LocalOrdinal, class GlobalOrdinal, class Node>
1762 const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
1763
1764 globalNumEntries_ = INV;
1765 globalMaxNumRowEntries_ = INV;
1766 haveGlobalConstants_ = false;
1767}
1768
1769template <class LocalOrdinal, class GlobalOrdinal, class Node>
1771 checkInternalState() const {
1772 if (debug_) {
1773 using std::endl;
1774 const char tfecfFuncName[] = "checkInternalState: ";
1775 const char suffix[] = " Please report this bug to the Tpetra developers.";
1776
1777 std::unique_ptr<std::string> prefix;
1778 if (verbose_) {
1779 prefix = this->createPrefix("CrsGraph", "checkInternalState");
1780 std::ostringstream os;
1781 os << *prefix << "Start" << endl;
1782 std::cerr << os.str();
1783 }
1784
1785 const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid();
1786 // const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
1787 // check the internal state of this data structure
1788 // this is called by numerous state-changing methods, in a debug build, to ensure that the object
1789 // always remains in a valid state
1790
1791 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->rowMap_.is_null(), std::logic_error,
1792 "Row Map is null." << suffix);
1793 // This may access the row Map, so we need to check first (above)
1794 // whether the row Map is null.
1796 static_cast<LocalOrdinal>(this->getLocalNumRows());
1797
1798 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillActive() == this->isFillComplete(), std::logic_error,
1799 "Graph cannot be both fill active and fill complete." << suffix);
1800 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillComplete() &&
1801 (this->colMap_.is_null() ||
1802 this->rangeMap_.is_null() ||
1803 this->domainMap_.is_null()),
1804 std::logic_error,
1805 "Graph is full complete, but at least one of {column, range, domain} "
1806 "Map is null."
1807 << suffix);
1808 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isStorageOptimized() && !this->indicesAreAllocated(),
1809 std::logic_error,
1810 "Storage is optimized, but indices are not "
1811 "allocated, not even trivially."
1812 << suffix);
1813
1814 size_t nodeAllocSize = 0;
1815 try {
1816 nodeAllocSize = this->getLocalAllocationSize();
1817 } catch (std::logic_error& e) {
1818 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1819 "getLocalAllocationSize threw "
1820 "std::logic_error: "
1821 << e.what());
1822 } catch (std::exception& e) {
1823 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1824 "getLocalAllocationSize threw an "
1825 "std::exception: "
1826 << e.what());
1827 } catch (...) {
1828 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
1829 "getLocalAllocationSize threw an exception "
1830 "not a subclass of std::exception.");
1831 }
1832
1833 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isStorageOptimized() &&
1834 nodeAllocSize != this->getLocalNumEntries(),
1835 std::logic_error,
1836 "Storage is optimized, but "
1837 "this->getLocalAllocationSize() = "
1838 << nodeAllocSize
1839 << " != this->getLocalNumEntries() = " << this->getLocalNumEntries()
1840 << "." << suffix);
1841 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->haveGlobalConstants_ &&
1842 (this->globalNumEntries_ != GSTI ||
1843 this->globalMaxNumRowEntries_ != GSTI),
1844 std::logic_error,
1845 "Graph claims not to have global constants, but "
1846 "some of the global constants are not marked as invalid."
1847 << suffix);
1848 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->haveGlobalConstants_ &&
1849 (this->globalNumEntries_ == GSTI ||
1850 this->globalMaxNumRowEntries_ == GSTI),
1851 std::logic_error,
1852 "Graph claims to have global constants, but "
1853 "some of them are marked as invalid."
1854 << suffix);
1855 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->haveGlobalConstants_ &&
1856 (this->globalNumEntries_ < this->getLocalNumEntries() ||
1857 this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
1858 std::logic_error,
1859 "Graph claims to have global constants, and "
1860 "all of the values of the global constants are valid, but "
1861 "some of the local constants are greater than "
1862 "their corresponding global constants."
1864 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->indicesAreAllocated() &&
1865 (this->numAllocForAllRows_ != 0 ||
1866 this->k_numAllocPerRow_.extent(0) != 0),
1867 std::logic_error,
1868 "The graph claims that its indices are allocated, but "
1869 "either numAllocForAllRows_ (= "
1870 << this->numAllocForAllRows_ << ") is "
1871 "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
1872 "the graph is supposed to release its \"allocation specifications\" "
1873 "when it allocates its indices."
1874 << suffix);
1875 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1876 auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
1878 std::logic_error,
1879 "The host and device views of k_rowPtrs_ have "
1880 "different sizes; rowPtrsUnpacked_host_ has size "
1881 << rowPtrsUnpacked_host.extent(0)
1882 << ", but rowPtrsUnpacked_dev_ has size "
1883 << rowPtrsUnpacked_dev.extent(0)
1884 << "." << suffix);
1885 if (isGloballyIndexed() && rowPtrsUnpacked_host.extent(0) != 0) {
1887 std::logic_error,
1888 "The graph is globally indexed and "
1889 "k_rowPtrs has nonzero size "
1890 << rowPtrsUnpacked_host.extent(0)
1891 << ", but that size does not equal lclNumRows+1 = "
1892 << (lclNumRows + 1) << "." << suffix);
1894 std::logic_error,
1895 "The graph is globally indexed and "
1896 "k_rowPtrs_ has nonzero size "
1897 << rowPtrsUnpacked_host.extent(0)
1898 << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
1900 << " != gblInds_wdv.extent(0)="
1901 << gblInds_wdv.extent(0) << "." << suffix);
1902 }
1903 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() &&
1904 rowPtrsUnpacked_host.extent(0) != 0 &&
1905 (static_cast<size_t>(rowPtrsUnpacked_host.extent(0)) !=
1906 static_cast<size_t>(lclNumRows + 1) ||
1908 static_cast<size_t>(this->lclIndsUnpacked_wdv.extent(0))),
1909 std::logic_error,
1910 "If k_rowPtrs_ has nonzero size and "
1911 "the graph is locally indexed, then "
1912 "k_rowPtrs_ must have N+1 rows, and "
1913 "k_rowPtrs_(N) must equal lclIndsUnpacked_wdv.extent(0)."
1914 << suffix);
1915
1916 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->indicesAreAllocated() &&
1917 nodeAllocSize > 0 &&
1918 this->lclIndsUnpacked_wdv.extent(0) == 0 &&
1919 this->gblInds_wdv.extent(0) == 0,
1920 std::logic_error,
1921 "Graph is allocated nontrivially, but "
1922 "but 1-D allocations are not present."
1923 << suffix);
1924
1925 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->indicesAreAllocated() &&
1926 ((rowPtrsUnpacked_host.extent(0) != 0 ||
1927 this->k_numRowEntries_.extent(0) != 0) ||
1928 this->lclIndsUnpacked_wdv.extent(0) != 0 ||
1929 this->gblInds_wdv.extent(0) != 0),
1930 std::logic_error,
1931 "If indices are not allocated, "
1932 "then none of the buffers should be."
1933 << suffix);
1934 // indices may be local or global only if they are allocated
1935 // (numAllocated is redundant; could simply be indicesAreLocal_ ||
1936 // indicesAreGlobal_)
1937 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
1938 !this->indicesAreAllocated_,
1939 std::logic_error,
1940 "Indices may be local or global only if they are "
1941 "allocated."
1942 << suffix);
1943 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->indicesAreLocal_ && this->indicesAreGlobal_,
1944 std::logic_error, "Indices may not be both local and global." << suffix);
1945 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreLocal_ && gblInds_wdv.extent(0) != 0,
1946 std::logic_error,
1947 "Indices are local, but "
1948 "gblInds_wdv.extent(0) (= "
1949 << gblInds_wdv.extent(0)
1950 << ") != 0. In other words, if indices are local, then "
1951 "allocations of global indices should not be present."
1952 << suffix);
1953 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreGlobal_ && lclIndsUnpacked_wdv.extent(0) != 0,
1954 std::logic_error,
1955 "Indices are global, but "
1956 "lclIndsUnpacked_wdv.extent(0) (= "
1957 << lclIndsUnpacked_wdv.extent(0)
1958 << ") != 0. In other words, if indices are global, "
1959 "then allocations for local indices should not be present."
1960 << suffix);
1961 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreLocal_ && nodeAllocSize > 0 &&
1962 lclIndsUnpacked_wdv.extent(0) == 0 && getLocalNumRows() > 0,
1963 std::logic_error,
1964 "Indices are local and "
1965 "getLocalAllocationSize() = "
1966 << nodeAllocSize << " > 0, but "
1967 "lclIndsUnpacked_wdv.extent(0) = 0 and getLocalNumRows() = "
1968 << getLocalNumRows() << " > 0." << suffix);
1969 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(indicesAreGlobal_ && nodeAllocSize > 0 &&
1970 gblInds_wdv.extent(0) == 0 && getLocalNumRows() > 0,
1971 std::logic_error,
1972 "Indices are global and "
1973 "getLocalAllocationSize() = "
1974 << nodeAllocSize << " > 0, but "
1975 "gblInds_wdv.extent(0) = 0 and getLocalNumRows() = "
1976 << getLocalNumRows() << " > 0." << suffix);
1977 // check the actual allocations
1978 if (this->indicesAreAllocated() &&
1979 rowPtrsUnpacked_host.extent(0) != 0) {
1980 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(rowPtrsUnpacked_host.extent(0)) !=
1981 this->getLocalNumRows() + 1,
1982 std::logic_error,
1983 "Indices are allocated and "
1984 "k_rowPtrs_ has nonzero length, but rowPtrsUnpacked_host_.extent(0) = "
1985 << rowPtrsUnpacked_host.extent(0) << " != getLocalNumRows()+1 = "
1986 << (this->getLocalNumRows() + 1) << "." << suffix);
1987 const size_t actualNumAllocated =
1988 rowPtrsUnpacked_host(this->getLocalNumRows());
1989 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() &&
1990 static_cast<size_t>(this->lclIndsUnpacked_wdv.extent(0)) != actualNumAllocated,
1991 std::logic_error,
1992 "Graph is locally indexed, indices are "
1993 "are allocated, and k_rowPtrs_ has nonzero length, but "
1994 "lclIndsUnpacked_wdv.extent(0) = "
1995 << this->lclIndsUnpacked_wdv.extent(0)
1996 << " != actualNumAllocated = " << actualNumAllocated << suffix);
1997 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isGloballyIndexed() &&
1998 static_cast<size_t>(this->gblInds_wdv.extent(0)) != actualNumAllocated,
1999 std::logic_error,
2000 "Graph is globally indexed, indices "
2001 "are allocated, and k_rowPtrs_ has nonzero length, but "
2002 "gblInds_wdv.extent(0) = "
2003 << this->gblInds_wdv.extent(0)
2004 << " != actualNumAllocated = " << actualNumAllocated << suffix);
2005 }
2006
2007 if (verbose_) {
2008 std::ostringstream os;
2009 os << *prefix << "Done" << endl;
2010 std::cerr << os.str();
2011 }
2012 }
2013}
2014
2015template <class LocalOrdinal, class GlobalOrdinal, class Node>
2016size_t
2019 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(globalRow);
2020 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2021 return Teuchos::OrdinalTraits<size_t>::invalid();
2022 } else {
2023 return rowInfo.numEntries;
2024 }
2025}
2026
2027template <class LocalOrdinal, class GlobalOrdinal, class Node>
2028size_t
2030 getNumEntriesInLocalRow(LocalOrdinal localRow) const {
2031 const RowInfo rowInfo = this->getRowInfo(localRow);
2032 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2033 return Teuchos::OrdinalTraits<size_t>::invalid();
2034 } else {
2035 return rowInfo.numEntries;
2036 }
2037}
2038
2039template <class LocalOrdinal, class GlobalOrdinal, class Node>
2040size_t
2043 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(globalRow);
2044 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2045 return Teuchos::OrdinalTraits<size_t>::invalid();
2046 } else {
2047 return rowInfo.allocSize;
2048 }
2049}
2050
2051template <class LocalOrdinal, class GlobalOrdinal, class Node>
2052size_t
2055 const RowInfo rowInfo = this->getRowInfo(localRow);
2056 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid()) {
2057 return Teuchos::OrdinalTraits<size_t>::invalid();
2058 } else {
2059 return rowInfo.allocSize;
2060 }
2061}
2062
2063template <class LocalOrdinal, class GlobalOrdinal, class Node>
2064typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type
2066 getLocalRowPtrsHost() const {
2067 return getRowPtrsPackedHost();
2068}
2070template <class LocalOrdinal, class GlobalOrdinal, class Node>
2071typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_device_view_type
2073 getLocalRowPtrsDevice() const {
2074 return getRowPtrsPackedDevice();
2075}
2076
2077template <class LocalOrdinal, class GlobalOrdinal, class Node>
2078typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type
2080 getLocalIndicesHost() const {
2081 return lclIndsPacked_wdv.getHostView(Access::ReadOnly);
2082}
2083
2084template <class LocalOrdinal, class GlobalOrdinal, class Node>
2087 getLocalIndicesDevice() const {
2088 return lclIndsPacked_wdv.getDeviceView(Access::ReadOnly);
2089}
2090
2091template <class LocalOrdinal, class GlobalOrdinal, class Node>
2094 nonconst_local_inds_host_view_type& indices,
2095 size_t& numEntries) const {
2096 using Teuchos::ArrayView;
2097 const char tfecfFuncName[] = "getLocalRowCopy: ";
2098
2100 isGloballyIndexed() && !hasColMap(), std::runtime_error,
2101 "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2102 "does not have a column Map yet. That means we don't have local indices "
2103 "for columns yet, so it doesn't make sense to call this method. If the "
2104 "graph doesn't have a column Map yet, you should call fillComplete on "
2105 "it first.");
2106
2107 // This does the right thing (reports an empty row) if the input
2108 // row is invalid.
2109 const RowInfo rowinfo = this->getRowInfo(localRow);
2110 // No side effects on error.
2111 const size_t theNumEntries = rowinfo.numEntries;
2112 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) < theNumEntries, std::runtime_error,
2113 "Specified storage (size==" << indices.size() << ") does not suffice "
2114 "to hold all "
2115 << theNumEntries << " entry/ies for this row.");
2116 numEntries = theNumEntries;
2117
2118 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid()) {
2119 if (isLocallyIndexed()) {
2120 auto lclInds = getLocalIndsViewHost(rowinfo);
2121 for (size_t j = 0; j < theNumEntries; ++j) {
2122 indices[j] = lclInds(j);
2123 }
2124 } else if (isGloballyIndexed()) {
2125 auto gblInds = getGlobalIndsViewHost(rowinfo);
2126 for (size_t j = 0; j < theNumEntries; ++j) {
2127 indices[j] = colMap_->getLocalElement(gblInds(j));
2128 }
2129 }
2130 }
2131}
2132
2133template <class LocalOrdinal, class GlobalOrdinal, class Node>
2136 nonconst_global_inds_host_view_type& indices,
2137 size_t& numEntries) const {
2138 using Teuchos::ArrayView;
2139 const char tfecfFuncName[] = "getGlobalRowCopy: ";
2140
2141 // This does the right thing (reports an empty row) if the input
2142 // row is invalid.
2143 const RowInfo rowinfo = getRowInfoFromGlobalRowIndex(globalRow);
2144 const size_t theNumEntries = rowinfo.numEntries;
2146 static_cast<size_t>(indices.size()) < theNumEntries, std::runtime_error,
2147 "Specified storage (size==" << indices.size() << ") does not suffice "
2148 "to hold all "
2149 << theNumEntries << " entry/ies for this row.");
2150 numEntries = theNumEntries; // first side effect
2151
2152 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid()) {
2153 if (isLocallyIndexed()) {
2154 auto lclInds = getLocalIndsViewHost(rowinfo);
2155 for (size_t j = 0; j < theNumEntries; ++j) {
2156 indices[j] = colMap_->getGlobalElement(lclInds(j));
2157 }
2158 } else if (isGloballyIndexed()) {
2159 auto gblInds = getGlobalIndsViewHost(rowinfo);
2160 for (size_t j = 0; j < theNumEntries; ++j) {
2161 indices[j] = gblInds(j);
2162 }
2163 }
2164 }
2165}
2166
2167template <class LocalOrdinal, class GlobalOrdinal, class Node>
2170 const LocalOrdinal localRow,
2171 local_inds_host_view_type& indices) const {
2172 const char tfecfFuncName[] = "getLocalRowView: ";
2173
2174 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isGloballyIndexed(), std::runtime_error,
2175 "The graph's indices are "
2176 "currently stored as global indices, so we cannot return a view with "
2177 "local column indices, whether or not the graph has a column Map. If "
2178 "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2179
2180 const RowInfo rowInfo = getRowInfo(localRow);
2181 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid() &&
2182 rowInfo.numEntries > 0) {
2183 indices = lclIndsUnpacked_wdv.getHostSubview(rowInfo.offset1D,
2184 rowInfo.numEntries,
2185 Access::ReadOnly);
2186 } else {
2187 // This does the right thing (reports an empty row) if the input
2188 // row is invalid.
2189 indices = local_inds_host_view_type();
2190 }
2191
2192 if (debug_) {
2193 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) !=
2194 getNumEntriesInLocalRow(localRow),
2195 std::logic_error,
2196 "indices.size() "
2197 "= " << indices.extent(0)
2198 << " != getNumEntriesInLocalRow(localRow=" << localRow << ") = " << getNumEntriesInLocalRow(localRow) << ". Please report this bug to the Tpetra developers.");
2199 }
2200}
2201
2202template <class LocalOrdinal, class GlobalOrdinal, class Node>
2206 global_inds_host_view_type& indices) const {
2207 const char tfecfFuncName[] = "getGlobalRowView: ";
2208
2209 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isLocallyIndexed(), std::runtime_error,
2210 "The graph's indices are "
2211 "currently stored as local indices, so we cannot return a view with "
2212 "global column indices. Use getGlobalRowCopy() instead.");
2213
2214 // This does the right thing (reports an empty row) if the input
2215 // row is invalid.
2216 const RowInfo rowInfo = getRowInfoFromGlobalRowIndex(globalRow);
2217 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid() &&
2218 rowInfo.numEntries > 0) {
2219 indices = gblInds_wdv.getHostSubview(rowInfo.offset1D,
2220 rowInfo.numEntries,
2221 Access::ReadOnly);
2222 } else {
2223 indices = typename global_inds_dualv_type::t_host::const_type();
2224 }
2225 if (debug_) {
2226 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(indices.size()) !=
2227 getNumEntriesInGlobalRow(globalRow),
2228 std::logic_error, "indices.size() = " << indices.extent(0) << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = " << getNumEntriesInGlobalRow(globalRow) << ". Please report this bug to the Tpetra developers.");
2229 }
2230}
2231
2232template <class LocalOrdinal, class GlobalOrdinal, class Node>
2234 insertLocalIndices(const LocalOrdinal localRow,
2235 const Teuchos::ArrayView<const LocalOrdinal>& indices) {
2236 const char tfecfFuncName[] = "insertLocalIndices: ";
2237
2238 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive(), std::runtime_error, "Fill must be active.");
2239 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isGloballyIndexed(), std::runtime_error,
2240 "Graph indices are global; use insertGlobalIndices().");
2241 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!hasColMap(), std::runtime_error,
2242 "Cannot insert local indices without a column Map.");
2243 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!rowMap_->isNodeLocalElement(localRow), std::runtime_error,
2244 "Local row index " << localRow << " is not in the row Map "
2245 "on the calling process.");
2246 if (!indicesAreAllocated()) {
2247 allocateIndices(LocalIndices, verbose_);
2248 }
2249
2250 if (debug_) {
2251 // In debug mode, if the graph has a column Map, test whether any
2252 // of the given column indices are not in the column Map. Keep
2253 // track of the invalid column indices so we can tell the user
2254 // about them.
2255 if (hasColMap()) {
2256 using std::endl;
2257 using Teuchos::Array;
2258 using Teuchos::toString;
2259 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2260
2261 const map_type& colMap = *colMap_;
2263 bool allInColMap = true;
2264 for (size_type k = 0; k < indices.size(); ++k) {
2265 if (!colMap.isNodeLocalElement(indices[k])) {
2266 allInColMap = false;
2267 badColInds.push_back(indices[k]);
2268 }
2269 }
2270 if (!allInColMap) {
2271 std::ostringstream os;
2272 os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2273 "entries in owned row "
2274 << localRow << ", at the following column "
2275 "indices: "
2276 << toString(indices) << "." << endl;
2277 os << "Of those, the following indices are not in the column Map on "
2278 "this process: "
2279 << toString(badColInds) << "." << endl
2280 << "Since "
2281 "the graph has a column Map already, it is invalid to insert entries "
2282 "at those locations.";
2283 TEUCHOS_TEST_FOR_EXCEPTION(!allInColMap, std::invalid_argument, os.str());
2284 }
2285 }
2286 }
2287
2288 insertLocalIndicesImpl(localRow, indices);
2289
2290 if (debug_) {
2291 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!indicesAreAllocated() || !isLocallyIndexed(), std::logic_error,
2292 "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2293 "! isLocallyIndexed() is true. Please report this bug to the "
2294 "Tpetra developers.");
2295 }
2296}
2297
2298template <class LocalOrdinal, class GlobalOrdinal, class Node>
2300 insertLocalIndices(const LocalOrdinal localRow,
2302 const LocalOrdinal inds[]) {
2303 Teuchos::ArrayView<const LocalOrdinal> indsT(inds, numEnt);
2304 this->insertLocalIndices(localRow, indsT);
2305}
2306
2307template <class LocalOrdinal, class GlobalOrdinal, class Node>
2312 typedef LocalOrdinal LO;
2313 const char tfecfFuncName[] = "insertGlobalIndices: ";
2314
2315 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed(), std::runtime_error,
2316 "graph indices are local; use insertLocalIndices().");
2317 // This can't really be satisfied for now, because if we are
2318 // fillComplete(), then we are local. In the future, this may
2319 // change. However, the rule that modification require active
2320 // fill will not change.
2321 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->isFillActive(), std::runtime_error,
2322 "You are not allowed to call this method if fill is not active. "
2323 "If fillComplete has been called, you must first call resumeFill "
2324 "before you may insert indices.");
2325 if (!indicesAreAllocated()) {
2326 allocateIndices(GlobalIndices, verbose_);
2327 }
2328 const LO lclRow = this->rowMap_->getLocalElement(gblRow);
2329 if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid()) {
2330 if (debug_) {
2331 if (this->hasColMap()) {
2332 using std::endl;
2333 const map_type& colMap = *(this->colMap_);
2334 // In a debug build, keep track of the nonowned ("bad") column
2335 // indices, so that we can display them in the exception
2336 // message. In a release build, just ditch the loop early if
2337 // we encounter a nonowned column index.
2338 std::vector<GlobalOrdinal> badColInds;
2339 bool allInColMap = true;
2340 for (LO k = 0; k < numInputInds; ++k) {
2341 if (!colMap.isNodeGlobalElement(inputGblColInds[k])) {
2342 allInColMap = false;
2343 badColInds.push_back(inputGblColInds[k]);
2344 }
2345 }
2346 if (!allInColMap) {
2347 std::ostringstream os;
2348 os << "You attempted to insert entries in owned row " << gblRow
2349 << ", at the following column indices: [";
2350 for (LO k = 0; k < numInputInds; ++k) {
2351 os << inputGblColInds[k];
2352 if (k + static_cast<LO>(1) < numInputInds) {
2353 os << ",";
2354 }
2355 }
2356 os << "]." << endl
2357 << "Of those, the following indices are not in "
2358 "the column Map on this process: [";
2359 for (size_t k = 0; k < badColInds.size(); ++k) {
2360 os << badColInds[k];
2361 if (k + size_t(1) < badColInds.size()) {
2362 os << ",";
2363 }
2364 }
2365 os << "]." << endl
2366 << "Since the matrix has a column Map already, "
2367 "it is invalid to insert entries at those locations.";
2368 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::invalid_argument, os.str());
2369 }
2370 }
2371 } // debug_
2372 this->insertGlobalIndicesImpl(lclRow, inputGblColInds, numInputInds);
2373 } else { // a nonlocal row
2374 this->insertGlobalIndicesIntoNonownedRows(gblRow, inputGblColInds,
2375 numInputInds);
2376 }
2377}
2378
2379template <class LocalOrdinal, class GlobalOrdinal, class Node>
2382 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds) {
2383 this->insertGlobalIndices(gblRow, inputGblColInds.size(),
2384 inputGblColInds.getRawPtr());
2385}
2386
2387template <class LocalOrdinal, class GlobalOrdinal, class Node>
2390 const GlobalOrdinal gblColInds[],
2392 typedef LocalOrdinal LO;
2393 typedef GlobalOrdinal GO;
2394 const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2395
2396 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed(), std::runtime_error,
2397 "Graph indices are local; use insertLocalIndices().");
2398 // This can't really be satisfied for now, because if we are
2399 // fillComplete(), then we are local. In the future, this may
2400 // change. However, the rule that modification require active
2401 // fill will not change.
2402 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->isFillActive(), std::runtime_error,
2403 "You are not allowed to call this method if fill is not active. "
2404 "If fillComplete has been called, you must first call resumeFill "
2405 "before you may insert indices.");
2406 if (!indicesAreAllocated()) {
2407 allocateIndices(GlobalIndices, verbose_);
2408 }
2409
2410 Teuchos::ArrayView<const GO> gblColInds_av(gblColInds, numGblColInds);
2411 // If we have a column Map, use it to filter the entries.
2412 if (!colMap_.is_null()) {
2413 const map_type& colMap = *(this->colMap_);
2414
2415 LO curOffset = 0;
2416 while (curOffset < numGblColInds) {
2417 // Find a sequence of input indices that are in the column Map
2418 // on the calling process. Doing a sequence at a time,
2419 // instead of one at a time, amortizes some overhead.
2420 LO endOffset = curOffset;
2421 for (; endOffset < numGblColInds; ++endOffset) {
2422 const LO lclCol = colMap.getLocalElement(gblColInds[endOffset]);
2423 if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid()) {
2424 break; // first entry, in current sequence, not in the column Map
2425 }
2426 }
2427 // curOffset, endOffset: half-exclusive range of indices in
2428 // the column Map on the calling process. If endOffset ==
2429 // curOffset, the range is empty.
2430 const LO numIndInSeq = (endOffset - curOffset);
2431 if (numIndInSeq != 0) {
2432 this->insertGlobalIndicesImpl(lclRow, gblColInds + curOffset,
2433 numIndInSeq);
2434 }
2435 // Invariant before this line: Either endOffset ==
2436 // numGblColInds, or gblColInds[endOffset] is not in the
2437 // column Map on the calling process.
2438 curOffset = endOffset + 1;
2439 }
2440 } else {
2441 this->insertGlobalIndicesImpl(lclRow, gblColInds_av.getRawPtr(),
2442 gblColInds_av.size());
2443 }
2444}
2445
2446template <class LocalOrdinal, class GlobalOrdinal, class Node>
2449 const GlobalOrdinal gblColInds[],
2451 // This creates the std::vector if it doesn't exist yet.
2452 // std::map's operator[] does a lookup each time, so it's better
2453 // to pull nonlocals_[grow] out of the loop.
2454 std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2455 for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2456 // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2457 // order to avoid duplicates. globalAssemble() sorts these
2458 // anyway.
2459 nonlocalRow.push_back(gblColInds[k]);
2460 }
2461}
2462
2463template <class LocalOrdinal, class GlobalOrdinal, class Node>
2466 const char tfecfFuncName[] = "removeLocalIndices: ";
2468 !isFillActive(), std::runtime_error, "requires that fill is active.");
2470 isStorageOptimized(), std::runtime_error,
2471 "cannot remove indices after optimizeStorage() has been called.");
2473 isGloballyIndexed(), std::runtime_error, "graph indices are global.");
2475 !rowMap_->isNodeLocalElement(lrow), std::runtime_error,
2476 "Local row " << lrow << " is not in the row Map on the calling process.");
2477 if (!indicesAreAllocated()) {
2478 allocateIndices(LocalIndices, verbose_);
2479 }
2480
2481 if (k_numRowEntries_.extent(0) != 0) {
2482 this->k_numRowEntries_(lrow) = 0;
2483 }
2484
2485 if (debug_) {
2486 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(getNumEntriesInLocalRow(lrow) != 0 ||
2487 !indicesAreAllocated() ||
2488 !isLocallyIndexed(),
2489 std::logic_error,
2490 "Violated stated post-conditions. Please contact Tpetra team.");
2491 }
2492}
2493
2494template <class LocalOrdinal, class GlobalOrdinal, class Node>
2496 setAllIndices(const typename local_graph_device_type::row_map_type& rowPointers,
2497 const typename local_graph_device_type::entries_type::non_const_type& columnIndices) {
2498 using ProfilingRegion = Details::ProfilingRegion;
2499 ProfilingRegion region("Tpetra::CrsGraph::setAllIndices");
2500 const char tfecfFuncName[] = "setAllIndices: ";
2502 !hasColMap() || getColMap().is_null(), std::runtime_error,
2503 "The graph must have a column Map before you may call this method.");
2504 LocalOrdinal numLocalRows = this->getLocalNumRows();
2505 {
2507 if (numLocalRows == 0) {
2509 rowPtrLen != 0 && rowPtrLen != 1,
2510 std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2511 } else {
2513 rowPtrLen != numLocalRows + 1,
2514 std::runtime_error, "rowPointers.size() = " << rowPtrLen << " != this->getLocalNumRows()+1 = " << (numLocalRows + 1) << ".");
2515 }
2516 }
2517
2518 if (debug_) {
2519 using exec_space = typename local_graph_device_type::execution_space;
2520 int columnsOutOfBounds = 0;
2521 local_ordinal_type numLocalCols = this->getLocalNumCols();
2522 Kokkos::parallel_reduce(
2523 Kokkos::RangePolicy<exec_space>(0, columnIndices.extent(0)),
2525 if (columnIndices(i) < 0 || columnIndices(i) >= numLocalCols)
2526 lOutOfBounds++;
2527 },
2529 int globalColsOutOfBounds = 0;
2530 auto comm = this->getComm();
2531 Teuchos::reduceAll<int, int>(*comm, Teuchos::REDUCE_MAX, columnsOutOfBounds,
2532 Teuchos::outArg(globalColsOutOfBounds));
2534 std::string message;
2535 if (columnsOutOfBounds) {
2536 // Only print message from ranks with the problem
2537 message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2538 }
2539 Details::gathervPrint(std::cout, message, *comm);
2540 throw std::invalid_argument("CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2541 }
2542 }
2543
2544 if (debug_ && this->isSorted()) {
2545 // Verify that the local indices are actually sorted
2546 int notSorted = 0;
2547 using exec_space = typename local_graph_device_type::execution_space;
2548 using size_type = typename local_graph_device_type::size_type;
2549 Kokkos::parallel_reduce(
2550 Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2551 KOKKOS_LAMBDA(const LocalOrdinal i, int& lNotSorted) {
2552 size_type rowBegin = rowPointers(i);
2553 size_type rowEnd = rowPointers(i + 1);
2554 for (size_type j = rowBegin + 1; j < rowEnd; j++) {
2555 if (columnIndices(j - 1) > columnIndices(j)) {
2556 lNotSorted = 1;
2557 }
2558 }
2559 },
2560 notSorted);
2561 // All-reduce notSorted to avoid rank divergence
2562 int globalNotSorted = 0;
2563 auto comm = this->getComm();
2564 Teuchos::reduceAll<int, int>(*comm, Teuchos::REDUCE_MAX, notSorted,
2565 Teuchos::outArg(globalNotSorted));
2566 if (globalNotSorted) {
2567 std::string message;
2568 if (notSorted) {
2569 // Only print message from ranks with the problem
2570 message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2571 }
2572 Details::gathervPrint(std::cout, message, *comm);
2573 throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2574 }
2575 }
2576
2577 indicesAreAllocated_ = true;
2578 indicesAreLocal_ = true;
2579 indicesAreSorted_ = true;
2580 noRedundancies_ = true;
2581 lclIndsPacked_wdv = local_inds_wdv_type(columnIndices);
2582 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
2583 setRowPtrs(rowPointers);
2584
2585 set_need_sync_host_uvm_access(); // columnIndices and rowPointers potentially still in a kernel
2586
2587 // Storage MUST be packed, since the interface doesn't give any
2588 // way to indicate any extra space at the end of each row.
2589 storageStatus_ = Details::STORAGE_1D_PACKED;
2590
2591 // These normally get cleared out at the end of allocateIndices.
2592 // It makes sense to clear them out here, because at the end of
2593 // this method, the graph is allocated on the calling process.
2594 numAllocForAllRows_ = 0;
2595 k_numAllocPerRow_ = decltype(k_numAllocPerRow_)();
2596
2597 checkInternalState();
2598}
2599
2600template <class LocalOrdinal, class GlobalOrdinal, class Node>
2602 setAllIndices(const Teuchos::ArrayRCP<size_t>& rowPointers,
2603 const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices) {
2604 using Kokkos::View;
2605 typedef typename local_graph_device_type::row_map_type row_map_type;
2606 typedef typename row_map_type::array_layout layout_type;
2607 typedef typename row_map_type::non_const_value_type row_offset_type;
2608 typedef View<size_t*, layout_type, Kokkos::HostSpace,
2609 Kokkos::MemoryUnmanaged>
2610 input_view_type;
2611 typedef typename row_map_type::non_const_type nc_row_map_type;
2612
2613 const size_t size = static_cast<size_t>(rowPointers.size());
2614 constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2615 input_view_type ptr_in(rowPointers.getRawPtr(), size);
2616
2617 nc_row_map_type ptr_rot("Tpetra::CrsGraph::ptr", size);
2618
2619 if constexpr (same) { // size_t == row_offset_type
2620 using lexecution_space = typename device_type::execution_space;
2621 Kokkos::deep_copy(lexecution_space(),
2622 ptr_rot,
2623 ptr_in);
2624 } else { // size_t != row_offset_type
2625 // CudaUvmSpace != HostSpace, so this will be false in that case.
2626 constexpr bool inHostMemory =
2627 std::is_same<typename row_map_type::memory_space,
2628 Kokkos::HostSpace>::value;
2629 if (inHostMemory) {
2630 // Copy (with cast from size_t to row_offset_type, with bounds
2631 // checking if necessary) to ptr_rot.
2633 } else { // Copy input row offsets to device first.
2634 //
2635 // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
2636 // execution space would avoid the double copy.
2637 //
2638 View<size_t*, layout_type, device_type> ptr_st("Tpetra::CrsGraph::ptr", size);
2639
2640 // DEEP_COPY REVIEW - NOT TESTED
2641 Kokkos::deep_copy(ptr_st, ptr_in);
2642 // Copy on device (casting from size_t to row_offset_type,
2643 // with bounds checking if necessary) to ptr_rot. This
2644 // executes in the output View's execution space, which is the
2645 // same as execution_space.
2647 }
2648 }
2649
2650 Kokkos::View<LocalOrdinal*, layout_type, device_type> k_ind =
2651 Kokkos::Compat::getKokkosViewDeepCopy<device_type>(columnIndices());
2652 setAllIndices(ptr_rot, k_ind);
2653}
2654
2655template <class LocalOrdinal, class GlobalOrdinal, class Node>
2658 using std::endl;
2659 using Teuchos::Comm;
2660 using Teuchos::outArg;
2661 using Teuchos::RCP;
2662 using Teuchos::rcp;
2663 using Teuchos::REDUCE_MAX;
2664 using Teuchos::REDUCE_MIN;
2665 using Teuchos::reduceAll;
2666 using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
2667 using LO = local_ordinal_type;
2668 using GO = global_ordinal_type;
2669 using size_type = typename Teuchos::Array<GO>::size_type;
2670 const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
2671
2672 std::unique_ptr<std::string> prefix;
2673 if (verbose_) {
2674 prefix = this->createPrefix("CrsGraph", "globalAssemble");
2675 std::ostringstream os;
2676 os << *prefix << "Start" << endl;
2677 std::cerr << os.str();
2678 }
2679 RCP<const Comm<int>> comm = getComm();
2680
2681 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive(), std::runtime_error,
2682 "Fill must be active before "
2683 "you may call this method.");
2684
2685 const size_t myNumNonlocalRows = this->nonlocals_.size();
2686
2687 // If no processes have nonlocal rows, then we don't have to do
2688 // anything. Checking this is probably cheaper than constructing
2689 // the Map of nonlocal rows (see below) and noticing that it has
2690 // zero global entries.
2691 {
2692 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
2693 int someoneHasNonlocalRows = 0;
2696 if (someoneHasNonlocalRows == 0) {
2697 if (verbose_) {
2698 std::ostringstream os;
2699 os << *prefix << "Done: No nonlocal rows" << endl;
2700 std::cerr << os.str();
2701 }
2702 return;
2703 } else if (verbose_) {
2704 std::ostringstream os;
2705 os << *prefix << "At least 1 process has nonlocal rows"
2706 << endl;
2707 std::cerr << os.str();
2708 }
2709 }
2710
2711 // 1. Create a list of the "nonlocal" rows on each process. this
2712 // requires iterating over nonlocals_, so while we do this,
2713 // deduplicate the entries and get a count for each nonlocal
2714 // row on this process.
2715 // 2. Construct a new row Map corresponding to those rows. This
2716 // Map is likely overlapping. We know that the Map is not
2717 // empty on all processes, because the above all-reduce and
2718 // return exclude that case.
2719
2721 // Keep this for CrsGraph's constructor.
2722 Teuchos::Array<size_t> numEntPerNonlocalRow(myNumNonlocalRows);
2723 {
2724 Teuchos::Array<GO> myNonlocalGblRows(myNumNonlocalRows);
2725 size_type curPos = 0;
2726 for (auto mapIter = this->nonlocals_.begin();
2727 mapIter != this->nonlocals_.end();
2728 ++mapIter, ++curPos) {
2730 std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
2731 std::sort(gblCols.begin(), gblCols.end());
2732 auto vecLast = std::unique(gblCols.begin(), gblCols.end());
2733 gblCols.erase(vecLast, gblCols.end());
2735 }
2736
2737 // Currently, Map requires that its indexBase be the global min
2738 // of all its global indices. Map won't compute this for us, so
2739 // we must do it. If our process has no nonlocal rows, set the
2740 // "min" to the max possible GO value. This ensures that if
2741 // some process has at least one nonlocal row, then it will pick
2742 // that up as the min. We know that at least one process has a
2743 // nonlocal row, since the all-reduce and return at the top of
2744 // this method excluded that case.
2745 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max();
2746 {
2747 auto iter = std::min_element(myNonlocalGblRows.begin(),
2748 myNonlocalGblRows.end());
2749 if (iter != myNonlocalGblRows.end()) {
2751 }
2752 }
2753 GO gblMinNonlocalGblRow = 0;
2757 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
2759 }
2760
2761 if (verbose_) {
2762 std::ostringstream os;
2763 os << *prefix << "nonlocalRowMap->getIndexBase()="
2764 << nonlocalRowMap->getIndexBase() << endl;
2765 std::cerr << os.str();
2766 }
2767
2768 // 3. Use the column indices for each nonlocal row, as stored in
2769 // nonlocals_, to construct a CrsGraph corresponding to
2770 // nonlocal rows. We need, but we have, exact counts of the
2771 // number of entries in each nonlocal row.
2772
2774 rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow()));
2775 {
2776 size_type curPos = 0;
2777 for (auto mapIter = this->nonlocals_.begin();
2778 mapIter != this->nonlocals_.end();
2779 ++mapIter, ++curPos) {
2780 const GO gblRow = mapIter->first;
2781 std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
2782 const LO numEnt = static_cast<LO>(numEntPerNonlocalRow[curPos]);
2783 nonlocalGraph->insertGlobalIndices(gblRow, numEnt, gblCols.data());
2784 }
2785 }
2786 if (verbose_) {
2787 std::ostringstream os;
2788 os << *prefix << "Built nonlocal graph" << endl;
2789 std::cerr << os.str();
2790 }
2791 // There's no need to fill-complete the nonlocals graph.
2792 // We just use it as a temporary container for the Export.
2793
2794 // 4. If the original row Map is one to one, then we can Export
2795 // directly from nonlocalGraph into this. Otherwise, we have
2796 // to create a temporary graph with a one-to-one row Map,
2797 // Export into that, then Import from the temporary graph into
2798 // *this.
2799
2800 auto origRowMap = this->getRowMap();
2801 const bool origRowMapIsOneToOne = origRowMap->isOneToOne();
2802
2804 if (verbose_) {
2805 std::ostringstream os;
2806 os << *prefix << "Original row Map is 1-to-1" << endl;
2807 std::cerr << os.str();
2808 }
2810 this->doExport(*nonlocalGraph, exportToOrig, Tpetra::INSERT);
2811 // We're done at this point!
2812 } else {
2813 if (verbose_) {
2814 std::ostringstream os;
2815 os << *prefix << "Original row Map is NOT 1-to-1" << endl;
2816 std::cerr << os.str();
2817 }
2818 // If you ask a Map whether it is one to one, it does some
2819 // communication and stashes intermediate results for later use
2820 // by createOneToOne. Thus, calling createOneToOne doesn't cost
2821 // much more then the original cost of calling isOneToOne.
2824
2825 // Create a temporary graph with the one-to-one row Map.
2826 //
2827 // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
2828 // row, to avoid reallocation during the Export operation.
2829 crs_graph_type oneToOneGraph(oneToOneRowMap, 0);
2830
2831 // Export from graph of nonlocals into the temp one-to-one graph.
2832 if (verbose_) {
2833 std::ostringstream os;
2834 os << *prefix << "Export nonlocal graph" << endl;
2835 std::cerr << os.str();
2836 }
2838
2839 // We don't need the graph of nonlocals anymore, so get rid of
2840 // it, to keep the memory high-water mark down.
2841 nonlocalGraph = Teuchos::null;
2842
2843 // Import from the one-to-one graph to the original graph.
2845 if (verbose_) {
2846 std::ostringstream os;
2847 os << *prefix << "Import nonlocal graph" << endl;
2848 std::cerr << os.str();
2849 }
2850 this->doImport(oneToOneGraph, importToOrig, Tpetra::INSERT);
2851 }
2852
2853 // It's safe now to clear out nonlocals_, since we've already
2854 // committed side effects to *this. The standard idiom for
2855 // clearing a Container like std::map, is to swap it with an empty
2856 // Container and let the swapped Container fall out of scope.
2857 decltype(this->nonlocals_) newNonlocals;
2858 std::swap(this->nonlocals_, newNonlocals);
2859
2860 checkInternalState();
2861 if (verbose_) {
2862 std::ostringstream os;
2863 os << *prefix << "Done" << endl;
2864 std::cerr << os.str();
2865 }
2866}
2867
2868template <class LocalOrdinal, class GlobalOrdinal, class Node>
2870 resumeFill(const Teuchos::RCP<Teuchos::ParameterList>& params) {
2871 clearGlobalConstants();
2872 if (params != Teuchos::null) this->setParameterList(params);
2873 // either still sorted/merged or initially sorted/merged
2874 indicesAreSorted_ = true;
2875 noRedundancies_ = true;
2876 fillComplete_ = false;
2877}
2878
2879template <class LocalOrdinal, class GlobalOrdinal, class Node>
2881 fillComplete(const Teuchos::RCP<Teuchos::ParameterList>& params) {
2882 // If the graph already has domain and range Maps, don't clobber
2883 // them. If it doesn't, use the current row Map for both the
2884 // domain and range Maps.
2885 //
2886 // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
2887 // column Map, and column indices are inserted which are not in
2888 // the row Map on any process, this will cause troubles. However,
2889 // that is not a common case for most applications that we
2890 // encounter, and checking for it might require more
2891 // communication.
2892 Teuchos::RCP<const map_type> domMap = this->getDomainMap();
2893 if (domMap.is_null()) {
2894 domMap = this->getRowMap();
2895 }
2896 Teuchos::RCP<const map_type> ranMap = this->getRangeMap();
2897 if (ranMap.is_null()) {
2898 ranMap = this->getRowMap();
2899 }
2900 this->fillComplete(domMap, ranMap, params);
2901}
2902
2903template <class LocalOrdinal, class GlobalOrdinal, class Node>
2905 fillComplete(const Teuchos::RCP<const map_type>& domainMap,
2906 const Teuchos::RCP<const map_type>& rangeMap,
2907 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2908 using std::endl;
2909 const char tfecfFuncName[] = "fillComplete: ";
2910 const bool verbose = verbose_;
2911
2912 std::unique_ptr<std::string> prefix;
2913 if (verbose) {
2914 prefix = this->createPrefix("CrsGraph", "fillComplete");
2915 std::ostringstream os;
2916 os << *prefix << "Start" << endl;
2917 std::cerr << os.str();
2918 }
2919
2920 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!isFillActive() || isFillComplete(), std::runtime_error,
2921 "Graph fill state must be active (isFillActive() "
2922 "must be true) before calling fillComplete().");
2923
2924 const int numProcs = getComm()->getSize();
2925
2926 //
2927 // Read and set parameters
2928 //
2929
2930 // Does the caller want to sort remote GIDs (within those owned by
2931 // the same process) in makeColMap()?
2932 if (!params.is_null()) {
2933 if (params->isParameter("sort column map ghost gids")) {
2934 sortGhostsAssociatedWithEachProcessor_ =
2935 params->get<bool>("sort column map ghost gids",
2936 sortGhostsAssociatedWithEachProcessor_);
2937 } else if (params->isParameter("Sort column Map ghost GIDs")) {
2938 sortGhostsAssociatedWithEachProcessor_ =
2939 params->get<bool>("Sort column Map ghost GIDs",
2940 sortGhostsAssociatedWithEachProcessor_);
2941 }
2942 }
2943
2944 // If true, the caller promises that no process did nonlocal
2945 // changes since the last call to fillComplete.
2946 bool assertNoNonlocalInserts = false;
2947 if (!params.is_null()) {
2949 params->get<bool>("No Nonlocal Changes", assertNoNonlocalInserts);
2950 }
2951
2952 //
2953 // Allocate indices, if they haven't already been allocated
2954 //
2955 if (!indicesAreAllocated()) {
2956 if (hasColMap()) {
2957 // We have a column Map, so use local indices.
2958 allocateIndices(LocalIndices, verbose);
2959 } else {
2960 // We don't have a column Map, so use global indices.
2961 allocateIndices(GlobalIndices, verbose);
2962 }
2963 }
2964
2965 //
2966 // Do global assembly, if requested and if the communicator
2967 // contains more than one process.
2968 //
2971 // This first checks if we need to do global assembly.
2972 // The check costs a single all-reduce.
2973 globalAssemble();
2974 } else {
2975 const size_t numNonlocals = nonlocals_.size();
2976 if (verbose) {
2977 std::ostringstream os;
2978 os << *prefix << "Do not need to call globalAssemble; "
2979 "assertNoNonlocalInserts="
2980 << (assertNoNonlocalInserts ? "true" : "false")
2981 << "numProcs=" << numProcs
2982 << ", nonlocals_.size()=" << numNonlocals << endl;
2983 std::cerr << os.str();
2984 }
2985 const int lclNeededGlobalAssemble =
2986 (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
2987 if (lclNeededGlobalAssemble != 0 && verbose) {
2988 std::ostringstream os;
2989 os << *prefix;
2990 Details::Impl::verbosePrintMap(
2991 os, nonlocals_.begin(), nonlocals_.end(),
2992 nonlocals_.size(), "nonlocals_");
2993 std::cerr << os.str() << endl;
2994 }
2995
2996 if (debug_) {
2997 auto map = this->getMap();
2998 auto comm = map.is_null() ? Teuchos::null : map->getComm();
3000 if (!comm.is_null()) {
3001 using Teuchos::REDUCE_MAX;
3002 using Teuchos::reduceAll;
3004 Teuchos::outArg(gblNeededGlobalAssemble));
3005 }
3007 "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3008 "least one process in the CrsGraph's communicator. This "
3009 "means either that you incorrectly set the "
3010 "\"No Nonlocal Changes\" fillComplete parameter to true, "
3011 "or that you inserted invalid entries. "
3012 "Rerun with the environment variable TPETRA_VERBOSE="
3013 "CrsGraph set to see the entries of nonlocals_ on every "
3014 "MPI process (WARNING: lots of output).");
3015 } else {
3017 "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3018 "calling process. This means either that you incorrectly "
3019 "set the \"No Nonlocal Changes\" fillComplete parameter "
3020 "to true, or that you inserted invalid entries. "
3021 "Rerun with the environment "
3022 "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3023 "of nonlocals_ on every MPI process (WARNING: lots of "
3024 "output).");
3025 }
3026 }
3027
3028 // Set domain and range Map. This may clear the Import / Export
3029 // objects if the new Maps differ from any old ones.
3030 setDomainRangeMaps(domainMap, rangeMap);
3031
3032 // If the graph does not already have a column Map (either from
3033 // the user constructor calling the version of the constructor
3034 // that takes a column Map, or from a previous fillComplete call),
3035 // then create it.
3036 Teuchos::Array<int> remotePIDs(0);
3037 const bool mustBuildColMap = !this->hasColMap();
3038 if (mustBuildColMap) {
3039 this->makeColMap(remotePIDs); // resized on output
3040 }
3041
3042 // Make indices local, if they aren't already.
3043 // The method doesn't do any work if the indices are already local.
3044 const std::pair<size_t, std::string> makeIndicesLocalResult =
3045 this->makeIndicesLocal(verbose);
3046
3047 if (debug_) {
3049 using Teuchos::outArg;
3050 using Teuchos::RCP;
3051 using Teuchos::REDUCE_MIN;
3052 using Teuchos::reduceAll;
3053
3054 RCP<const map_type> map = this->getMap();
3056 if (!map.is_null()) {
3057 comm = map->getComm();
3058 }
3059 if (comm.is_null()) {
3061 makeIndicesLocalResult.second);
3062 } else {
3063 const int lclSuccess = (makeIndicesLocalResult.first == 0);
3064 int gblSuccess = 0; // output argument
3066 if (gblSuccess != 1) {
3067 std::ostringstream os;
3068 gathervPrint(os, makeIndicesLocalResult.second, *comm);
3069 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
3070 }
3071 }
3072 } else {
3073 // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3074 // the error state to makeImportExport or
3075 // computeGlobalConstants, which may do all-reduces and thus may
3076 // have the opportunity to communicate that error state.
3078 makeIndicesLocalResult.second);
3079 }
3080
3081 // If this process has no indices, then CrsGraph considers it
3082 // already trivially sorted and merged. Thus, this method need
3083 // not be called on all processes in the row Map's communicator.
3084 this->sortAndMergeAllIndices(this->isSorted(), this->isMerged());
3085
3086 // Make Import and Export objects, if they haven't been made
3087 // already. If we made a column Map above, reuse information from
3088 // that process to avoid communiation in the Import setup.
3089 this->makeImportExport(remotePIDs, mustBuildColMap);
3090
3091 // Create the KokkosSparse::StaticCrsGraph, if it doesn't already exist.
3092 this->fillLocalGraph(params);
3093
3094 const bool callComputeGlobalConstants = params.get() == nullptr ||
3095 params->get("compute global constants", true);
3097 this->computeGlobalConstants();
3098 } else {
3099 this->computeLocalConstants();
3100 }
3101 this->fillComplete_ = true;
3102 this->checkInternalState();
3103
3104 if (verbose) {
3105 std::ostringstream os;
3106 os << *prefix << "Done" << endl;
3107 std::cerr << os.str();
3108 }
3109}
3110
3111template <class LocalOrdinal, class GlobalOrdinal, class Node>
3113 expertStaticFillComplete(const Teuchos::RCP<const map_type>& domainMap,
3114 const Teuchos::RCP<const map_type>& rangeMap,
3115 const Teuchos::RCP<const import_type>& importer,
3116 const Teuchos::RCP<const export_type>& exporter,
3117 const Teuchos::RCP<Teuchos::ParameterList>& params) {
3118 const char tfecfFuncName[] = "expertStaticFillComplete: ";
3119#ifdef HAVE_TPETRA_MMM_TIMINGS
3120 std::string label;
3121 if (!params.is_null())
3122 label = params->get("Timer Label", label);
3123 std::string prefix = std::string("Tpetra ") + label + std::string(": ");
3124 using Teuchos::TimeMonitor;
3125 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3126#endif
3127
3129 domainMap.is_null() || rangeMap.is_null(),
3130 std::runtime_error, "The input domain Map and range Map must be nonnull.");
3132 isFillComplete() || !hasColMap(), std::runtime_error,
3133 "You may not "
3134 "call this method unless the graph has a column Map.");
3135 auto rowPtrsUnpackedLength = this->getRowPtrsUnpackedDevice().extent(0);
3137 getLocalNumRows() > 0 && rowPtrsUnpackedLength == 0,
3138 std::runtime_error, "The calling process has getLocalNumRows() = " << getLocalNumRows() << " > 0 rows, but the row offsets array has not "
3139 "been set.");
3141 static_cast<size_t>(rowPtrsUnpackedLength) != getLocalNumRows() + 1,
3142 std::runtime_error, "The row offsets array has length " << rowPtrsUnpackedLength << " != getLocalNumRows()+1 = " << (getLocalNumRows() + 1) << ".");
3143
3144 // Note: We don't need to do the following things which are normally done in fillComplete:
3145 // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3146
3147 // Constants from allocateIndices
3148 //
3149 // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3150 // away once the graph is allocated. expertStaticFillComplete
3151 // either presumes that the graph is allocated, or "allocates" it.
3152 //
3153 // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3154 // version of CrsGraph is to allocate in the constructor, not
3155 // lazily on first insert. That will make both
3156 // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3157 numAllocForAllRows_ = 0;
3158 k_numAllocPerRow_ = decltype(k_numAllocPerRow_)();
3159 indicesAreAllocated_ = true;
3160
3161 // Constants from makeIndicesLocal
3162 //
3163 // The graph has a column Map, so its indices had better be local.
3164 indicesAreLocal_ = true;
3165 indicesAreGlobal_ = false;
3166
3167 // set domain/range map: may clear the import/export objects
3168#ifdef HAVE_TPETRA_MMM_TIMINGS
3169 MM = Teuchos::null;
3170 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3171#endif
3172 setDomainRangeMaps(domainMap, rangeMap);
3173
3174 // Presume the user sorted and merged the arrays first
3175 indicesAreSorted_ = true;
3176 noRedundancies_ = true;
3177
3178 // makeImportExport won't create a new importer/exporter if I set one here first.
3179#ifdef HAVE_TPETRA_MMM_TIMINGS
3180 MM = Teuchos::null;
3181 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3182#endif
3183
3184 importer_ = Teuchos::null;
3185 exporter_ = Teuchos::null;
3186 if (importer != Teuchos::null) {
3188 !importer->getSourceMap()->isSameAs(*getDomainMap()) ||
3189 !importer->getTargetMap()->isSameAs(*getColMap()),
3190 std::invalid_argument, ": importer does not match matrix maps.");
3191 importer_ = importer;
3192 }
3193
3194#ifdef HAVE_TPETRA_MMM_TIMINGS
3195 MM = Teuchos::null;
3196 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3197#endif
3198
3199 if (exporter != Teuchos::null) {
3201 !exporter->getSourceMap()->isSameAs(*getRowMap()) ||
3202 !exporter->getTargetMap()->isSameAs(*getRangeMap()),
3203 std::invalid_argument, ": exporter does not match matrix maps.");
3204 exporter_ = exporter;
3205 }
3206
3207#ifdef HAVE_TPETRA_MMM_TIMINGS
3208 MM = Teuchos::null;
3209 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3210#endif
3211 Teuchos::Array<int> remotePIDs(0); // unused output argument
3212 this->makeImportExport(remotePIDs, false);
3213
3214#ifdef HAVE_TPETRA_MMM_TIMINGS
3215 MM = Teuchos::null;
3216 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3217#endif
3218 this->fillLocalGraph(params);
3219
3220 const bool callComputeGlobalConstants = params.get() == nullptr ||
3221 params->get("compute global constants", true);
3222
3224#ifdef HAVE_TPETRA_MMM_TIMINGS
3225 MM = Teuchos::null;
3226 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3227#endif // HAVE_TPETRA_MMM_TIMINGS
3228 this->computeGlobalConstants();
3229 } else {
3230#ifdef HAVE_TPETRA_MMM_TIMINGS
3231 MM = Teuchos::null;
3232 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3233#endif // HAVE_TPETRA_MMM_TIMINGS
3234 this->computeLocalConstants();
3235 }
3236
3237 fillComplete_ = true;
3238
3239#ifdef HAVE_TPETRA_MMM_TIMINGS
3240 MM = Teuchos::null;
3241 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3242#endif
3243 checkInternalState();
3244}
3245
3246template <class LocalOrdinal, class GlobalOrdinal, class Node>
3248 fillLocalGraph(const Teuchos::RCP<Teuchos::ParameterList>& params) {
3249 using ::Tpetra::Details::computeOffsetsFromCounts;
3250 typedef typename local_graph_device_type::row_map_type row_map_type;
3251 typedef typename row_map_type::non_const_type non_const_row_map_type;
3252 typedef typename local_graph_device_type::entries_type::non_const_type lclinds_1d_type;
3253 const char tfecfFuncName[] =
3254 "fillLocalGraph (called from fillComplete or "
3255 "expertStaticFillComplete): ";
3256 const size_t lclNumRows = this->getLocalNumRows();
3257
3258 // This method's goal is to fill in the two arrays (compressed
3259 // sparse row format) that define the sparse graph's structure.
3260
3261 bool requestOptimizedStorage = true;
3262 if (!params.is_null() && !params->get("Optimize Storage", true)) {
3264 }
3265
3266 // The graph's column indices are currently stored in a 1-D
3267 // format, with row offsets in rowPtrsUnpacked_host_ and local column indices
3268 // in k_lclInds1D_.
3269
3270 if (debug_) {
3271 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3272 // The graph's array of row offsets must already be allocated.
3273 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked.extent(0) == 0, std::logic_error,
3274 "rowPtrsUnpacked_host_ has size zero, but shouldn't");
3275 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked.extent(0) != lclNumRows + 1, std::logic_error,
3276 "rowPtrsUnpacked_host_.extent(0) = "
3277 << rowPtrsUnpacked.extent(0) << " != (lclNumRows + 1) = "
3278 << (lclNumRows + 1) << ".");
3279 const size_t numOffsets = rowPtrsUnpacked.extent(0);
3280 const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3281 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numOffsets != 0 &&
3282 lclIndsUnpacked_wdv.extent(0) != valToCheck,
3283 std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3284 " and lclIndsUnpacked_wdv.extent(0)="
3285 << lclIndsUnpacked_wdv.extent(0) << " != rowPtrsUnpacked_host_(" << numOffsets << ")=" << valToCheck << ".");
3286 }
3287
3288 size_t allocSize = 0;
3289 try {
3290 allocSize = this->getLocalAllocationSize();
3291 } catch (std::logic_error& e) {
3292 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error,
3293 "getLocalAllocationSize threw "
3294 "std::logic_error: "
3295 << e.what());
3296 } catch (std::runtime_error& e) {
3297 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
3298 "getLocalAllocationSize threw "
3299 "std::runtime_error: "
3300 << e.what());
3301 } catch (std::exception& e) {
3302 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
3303 "getLocalAllocationSize threw "
3304 "std::exception: "
3305 << e.what());
3306 } catch (...) {
3307 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
3308 "getLocalAllocationSize threw "
3309 "an exception not a subclass of std::exception.");
3310 }
3311
3312 if (this->getLocalNumEntries() != allocSize) {
3313 // Use the nonconst version of row_map_type for ptr_d, because
3314 // the latter is const and we need to modify ptr_d here.
3315 non_const_row_map_type ptr_d;
3316 row_map_type ptr_d_const;
3317
3318 // The graph's current 1-D storage is "unpacked." This means
3319 // the row offsets may differ from what the final row offsets
3320 // should be. This could happen, for example, if the user set
3321 // an upper bound on the number of entries in each row, but
3322 // didn't fill all those entries.
3323
3324 if (debug_) {
3325 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3326 if (rowPtrsUnpacked.extent(0) != 0) {
3327 const size_t numOffsets =
3328 static_cast<size_t>(rowPtrsUnpacked.extent(0));
3329 const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3330 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != size_t(lclIndsUnpacked_wdv.extent(0)),
3331 std::logic_error,
3332 "(Unpacked branch) Before allocating "
3333 "or packing, k_rowPtrs_("
3334 << (numOffsets - 1) << ")="
3335 << valToCheck << " != lclIndsUnpacked_wdv.extent(0)="
3336 << lclIndsUnpacked_wdv.extent(0) << ".");
3337 }
3338 }
3339
3340 // Pack the row offsets into ptr_d, by doing a sum-scan of the
3341 // array of valid entry counts per row (k_numRowEntries_).
3342
3343 // Total number of entries in the matrix on the calling
3344 // process. We will compute this in the loop below. It's
3345 // cheap to compute and useful as a sanity check.
3346 size_t lclTotalNumEntries = 0;
3347 {
3348 // Allocate the packed row offsets array.
3349 ptr_d =
3350 non_const_row_map_type("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3351 ptr_d_const = ptr_d;
3352
3353 // It's ok that k_numRowEntries_ is a host View; the
3354 // function can handle this.
3355 typename num_row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3356 if (debug_) {
3357 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(size_t(numRowEnt_h.extent(0)) != lclNumRows,
3358 std::logic_error,
3359 "(Unpacked branch) "
3360 "numRowEnt_h.extent(0)="
3361 << numRowEnt_h.extent(0)
3362 << " != getLocalNumRows()=" << lclNumRows << "");
3363 }
3364
3365 lclTotalNumEntries = computeOffsetsFromCounts(ptr_d, numRowEnt_h);
3366
3367 if (debug_) {
3368 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(ptr_d.extent(0)) != lclNumRows + 1,
3369 std::logic_error,
3370 "(Unpacked branch) After allocating "
3371 "ptr_d, ptr_d.extent(0) = "
3372 << ptr_d.extent(0)
3373 << " != lclNumRows+1 = " << (lclNumRows + 1) << ".");
3374 const auto valToCheck =
3375 ::Tpetra::Details::getEntryOnHost(ptr_d, lclNumRows);
3376 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != lclTotalNumEntries, std::logic_error,
3377 "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3378 "after filling ptr_d, ptr_d(lclNumRows="
3379 << lclNumRows
3380 << ") = " << valToCheck << " != total number of entries "
3381 "on the calling process = "
3382 << lclTotalNumEntries
3383 << ".");
3384 }
3385 }
3386
3387 // Allocate the array of packed column indices.
3388 lclinds_1d_type ind_d =
3389 lclinds_1d_type("Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3390
3391 // k_rowPtrs_ and lclIndsUnpacked_wdv are currently unpacked. Pack
3392 // them, using the packed row offsets array ptr_d that we
3393 // created above.
3394 //
3395 // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3396 // CrsMatrix?), we need to keep around the unpacked row
3397 // offsets and column indices.
3398
3399 // Pack the column indices from unpacked lclIndsUnpacked_wdv into
3400 // packed ind_d. We will replace lclIndsUnpacked_wdv below.
3401 typedef pack_functor<
3402 typename local_graph_device_type::entries_type::non_const_type,
3403 typename local_inds_dualv_type::t_dev::const_type,
3404 row_map_type,
3405 typename local_graph_device_type::row_map_type>
3406 inds_packer_type;
3407 inds_packer_type f(ind_d,
3408 lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3409 ptr_d, this->getRowPtrsUnpackedDevice());
3410 {
3411 typedef typename decltype(ind_d)::execution_space exec_space;
3412 typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3413 Kokkos::parallel_for(range_type(0, lclNumRows), f);
3414 }
3415
3416 if (debug_) {
3417 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(ptr_d.extent(0) == 0, std::logic_error,
3418 "(\"Optimize Storage\"=true branch) After packing, "
3419 "ptr_d.extent(0)=0.");
3420 if (ptr_d.extent(0) != 0) {
3421 const size_t numOffsets = static_cast<size_t>(ptr_d.extent(0));
3422 const auto valToCheck =
3423 ::Tpetra::Details::getEntryOnHost(ptr_d, numOffsets - 1);
3424 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(valToCheck) != ind_d.extent(0),
3425 std::logic_error,
3426 "(\"Optimize Storage\"=true branch) "
3427 "After packing, ptr_d("
3428 << (numOffsets - 1) << ")="
3429 << valToCheck << " != ind_d.extent(0)="
3430 << ind_d.extent(0) << ".");
3431 }
3432 }
3433 // Build the local graph.
3434 if (requestOptimizedStorage)
3435 setRowPtrs(ptr_d_const);
3436 else
3437 setRowPtrsPacked(ptr_d_const);
3438 lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3439 } else { // We don't have to pack, so just set the pointers.
3440 // Set both packed and unpacked rowptrs to this
3441 this->setRowPtrs(rowPtrsUnpacked_dev_);
3442 lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3443
3444 if (debug_) {
3445 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3446 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3447 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsPacked_dev.extent(0) == 0, std::logic_error,
3448 "(\"Optimize Storage\"=false branch) "
3449 "rowPtrsPacked_dev_.extent(0) = 0.");
3450 if (rowPtrsPacked_dev.extent(0) != 0) {
3451 const size_t numOffsets =
3452 static_cast<size_t>(rowPtrsPacked_dev.extent(0));
3453 const size_t valToCheck =
3454 rowPtrsPacked_host(numOffsets - 1);
3455 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != size_t(lclIndsPacked_wdv.extent(0)),
3456 std::logic_error,
3457 "(\"Optimize Storage\"=false branch) "
3458 "rowPtrsPacked_dev_("
3459 << (numOffsets - 1) << ")="
3460 << valToCheck
3461 << " != lclIndsPacked_wdv.extent(0)="
3462 << lclIndsPacked_wdv.extent(0) << ".");
3463 }
3464 }
3465 }
3466
3467 if (debug_) {
3468 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3469 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3470 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(rowPtrsPacked_dev.extent(0)) != lclNumRows + 1,
3471 std::logic_error, "After packing, rowPtrsPacked_dev_.extent(0) = " << rowPtrsPacked_dev.extent(0) << " != lclNumRows+1 = " << (lclNumRows + 1) << ".");
3472 if (rowPtrsPacked_dev.extent(0) != 0) {
3473 const size_t numOffsets = static_cast<size_t>(rowPtrsPacked_dev.extent(0));
3474 const auto valToCheck = rowPtrsPacked_host(numOffsets - 1);
3475 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(valToCheck) != lclIndsPacked_wdv.extent(0),
3476 std::logic_error, "After packing, rowPtrsPacked_dev_(" << (numOffsets - 1) << ") = " << valToCheck << " != lclIndsPacked_wdv.extent(0) = " << lclIndsPacked_wdv.extent(0) << ".");
3477 }
3478 }
3479
3480 if (requestOptimizedStorage) {
3481 // With optimized storage, we don't need to store
3482 // the array of row entry counts.
3483
3484 // Free graph data structures that are only needed for
3485 // unpacked 1-D storage.
3486 k_numRowEntries_ = num_row_entries_type();
3487
3488 // Keep the new 1-D packed allocations.
3489 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3490
3491 storageStatus_ = Details::STORAGE_1D_PACKED;
3492 }
3493
3494 set_need_sync_host_uvm_access(); // make sure kernel setup of indices is fenced before a host access
3495}
3496
3497template <class LocalOrdinal, class GlobalOrdinal, class Node>
3499 replaceColMap(const Teuchos::RCP<const map_type>& newColMap) {
3500 // NOTE: This safety check matches the code, but not the documentation of Crsgraph
3501 //
3502 // FIXME (mfh 18 Aug 2014) This will break if the calling process
3503 // has no entries, because in that case, currently it is neither
3504 // locally nor globally indexed. This will change once we get rid
3505 // of lazy allocation (so that the constructor allocates indices
3506 // and therefore commits to local vs. global).
3507 const char tfecfFuncName[] = "replaceColMap: ";
3509 isLocallyIndexed() || isGloballyIndexed(), std::runtime_error,
3510 "Requires matching maps and non-static graph.");
3511 colMap_ = newColMap;
3512}
3513
3514template <class LocalOrdinal, class GlobalOrdinal, class Node>
3516 reindexColumns(const Teuchos::RCP<const map_type>& newColMap,
3517 const Teuchos::RCP<const import_type>& newImport,
3518 const bool sortIndicesInEachRow) {
3519 using Teuchos::RCP;
3520 using Teuchos::REDUCE_MIN;
3521 using Teuchos::reduceAll;
3522 typedef GlobalOrdinal GO;
3523 typedef LocalOrdinal LO;
3524 using col_inds_type_dev = typename local_inds_dualv_type::t_dev;
3525 const char tfecfFuncName[] = "reindexColumns: ";
3526
3528 isFillComplete(), std::runtime_error,
3529 "The graph is fill complete "
3530 "(isFillComplete() returns true). You must call resumeFill() before "
3531 "you may call this method.");
3532
3533 // mfh 19 Aug 2014: This method does NOT redistribute data; it
3534 // doesn't claim to do the work of an Import or Export. This
3535 // means that for all processes, the calling process MUST own all
3536 // column indices, in both the old column Map (if it exists) and
3537 // the new column Map. We check this via an all-reduce.
3538 //
3539 // Some processes may be globally indexed, others may be locally
3540 // indexed, and others (that have no graph entries) may be
3541 // neither. This method will NOT change the graph's current
3542 // state. If it's locally indexed, it will stay that way, and
3543 // vice versa. It would easy to add an option to convert indices
3544 // from global to local, so as to save a global-to-local
3545 // conversion pass. However, we don't do this here. The intended
3546 // typical use case is that the graph already has a column Map and
3547 // is locally indexed, and this is the case for which we optimize.
3548
3549 const LO lclNumRows = static_cast<LO>(this->getLocalNumRows());
3550
3551 // Attempt to convert indices to the new column Map's version of
3552 // local. This will fail if on the calling process, the graph has
3553 // indices that are not on that process in the new column Map.
3554 // After the local conversion attempt, we will do an all-reduce to
3555 // see if any processes failed.
3556
3557 // If this is false, then either the graph contains a column index
3558 // which is invalid in the CURRENT column Map, or the graph is
3559 // locally indexed but currently has no column Map. In either
3560 // case, there is no way to convert the current local indices into
3561 // global indices, so that we can convert them into the new column
3562 // Map's local indices. It's possible for this to be true on some
3563 // processes but not others, due to replaceColMap.
3564 bool allCurColIndsValid = true;
3565 // On the calling process, are all valid current column indices
3566 // also in the new column Map on the calling process? In other
3567 // words, does local reindexing suffice, or should the user have
3568 // done an Import or Export instead?
3569 bool localSuffices = true;
3570
3571 {
3572 // Final arrays for the local indices. We will allocate exactly
3573 // one of these ONLY if the graph is locally indexed on the
3574 // calling process, and ONLY if the graph has one or more entries
3575 // (is not empty) on the calling process. In that case, we
3576 // allocate the first (1-D storage) if the graph has a static
3577 // profile, else we allocate the second (2-D storage).
3579
3580 // If indices aren't allocated, that means the calling process
3581 // owns no entries in the graph. Thus, there is nothing to
3582 // convert, and it trivially succeeds locally.
3583 if (indicesAreAllocated()) {
3584 if (isLocallyIndexed()) {
3585 if (hasColMap()) { // locally indexed, and currently has a column Map
3586 const map_type& oldColMap = *(getColMap());
3587
3588 // Allocate storage for the new local indices.
3589 const size_t allocSize = this->getLocalAllocationSize();
3590 auto oldLclInds1D = lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly);
3591 newLclInds1D_dev = col_inds_type_dev("Tpetra::CrsGraph::lclIndsReindexed",
3592 allocSize);
3593 auto oldLclColMap = oldColMap.getLocalMap();
3594 auto newLclColMap = newColMap->getLocalMap();
3595
3596 const auto LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
3597 const auto GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
3598
3599 const int NOT_ALL_LOCAL_INDICES_ARE_VALID = 1;
3600 const int LOCAL_DOES_NOT_SUFFICE = 2;
3601 int errorStatus = 0;
3602 Kokkos::parallel_reduce(
3603 "Tpetra::CrsGraph::reindexColumns",
3604 Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, allocSize),
3605 KOKKOS_LAMBDA(const LocalOrdinal k, int& result) {
3607 if (oldLclCol == LO_INVALID) {
3609 } else {
3610 const GO gblCol = oldLclColMap.getGlobalElement(oldLclCol);
3611 if (gblCol == GO_INVALID) {
3613 } else {
3614 const LocalOrdinal newLclCol = newLclColMap.getLocalElement(gblCol);
3615 if (newLclCol == LO_INVALID) {
3617 } else {
3619 }
3620 }
3621 }
3622 },
3623 Kokkos::LOr<int>(errorStatus));
3626 } else { // locally indexed, but no column Map
3627 // This case is only possible if replaceColMap() was called
3628 // with a null argument on the calling process. It's
3629 // possible, but it means that this method can't possibly
3630 // succeed, since we have no way of knowing how to convert
3631 // the current local indices to global indices.
3632 allCurColIndsValid = false;
3633 }
3634 } else { // globally indexed
3635 // If the graph is globally indexed, we don't need to save
3636 // local indices, but we _do_ need to know whether the current
3637 // global indices are valid in the new column Map. We may
3638 // need to do a getRemoteIndexList call to find this out.
3639 //
3640 // In this case, it doesn't matter whether the graph currently
3641 // has a column Map. We don't need the old column Map to
3642 // convert from global indices to the _new_ column Map's local
3643 // indices. Furthermore, we can use the same code, whether
3644 // the graph is static or dynamic profile.
3645
3646 // Test whether the current global indices are in the new
3647 // column Map on the calling process.
3648 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3649 const RowInfo rowInfo = this->getRowInfo(lclRow);
3650 auto oldGblRowView = this->getGlobalIndsViewHost(rowInfo);
3651 for (size_t k = 0; k < rowInfo.numEntries; ++k) {
3652 const GO gblCol = oldGblRowView(k);
3653 if (!newColMap->isNodeGlobalElement(gblCol)) {
3654 localSuffices = false;
3655 break; // Stop at the first invalid index
3656 }
3657 } // for each entry in the current row
3658 } // for each locally owned row
3659 } // locally or globally indexed
3660 } // whether indices are allocated
3661
3662 // Do an all-reduce to check both possible error conditions.
3663 int lclSuccess[2];
3664 lclSuccess[0] = allCurColIndsValid ? 1 : 0;
3665 lclSuccess[1] = localSuffices ? 1 : 0;
3666 int gblSuccess[2];
3667 gblSuccess[0] = 0;
3668 gblSuccess[1] = 0;
3670 getRowMap().is_null() ? Teuchos::null : getRowMap()->getComm();
3671 if (!comm.is_null()) {
3673 }
3674
3676 gblSuccess[0] == 0, std::runtime_error,
3677 "It is not possible to continue."
3678 " The most likely reason is that the graph is locally indexed, but the "
3679 "column Map is missing (null) on some processes, due to a previous call "
3680 "to replaceColMap().");
3681
3683 gblSuccess[1] == 0, std::runtime_error,
3684 "On some process, the graph "
3685 "contains column indices that are in the old column Map, but not in the "
3686 "new column Map (on that process). This method does NOT redistribute "
3687 "data; it does not claim to do the work of an Import or Export operation."
3688 " This means that for all processess, the calling process MUST own all "
3689 "column indices, in both the old column Map and the new column Map. In "
3690 "this case, you will need to do an Import or Export operation to "
3691 "redistribute data.");
3692
3693 // Commit the results.
3694 if (isLocallyIndexed()) {
3695 lclIndsUnpacked_wdv = local_inds_wdv_type(newLclInds1D_dev);
3696 }
3697 // end of scope for newLclInds1D_dev
3698 // sortAndMergeAllIndices needs host access
3699 }
3700
3701 if (isLocallyIndexed()) {
3702 // We've reindexed, so we don't know if the indices are sorted.
3703 //
3704 // FIXME (mfh 17 Sep 2014) It could make sense to check this,
3705 // since we're already going through all the indices above. We
3706 // could also sort each row in place; that way, we would only
3707 // have to make one pass over the rows.
3708 indicesAreSorted_ = false;
3710 // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
3711 // order to call this method.
3712 //
3713 // FIXME (mfh 17 Sep 2014) This violates the strong exception
3714 // guarantee. It would be better to sort the new index arrays
3715 // before committing them.
3716 const bool sorted = false; // need to resort
3717 const bool merged = true; // no need to merge, since no dups
3718 this->sortAndMergeAllIndices(sorted, merged);
3719 }
3720 }
3721 colMap_ = newColMap;
3722
3723 if (newImport.is_null()) {
3724 // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
3725 // check whether the input Import is null on any process.
3726 //
3727 // If the domain Map hasn't been set yet, we can't compute a new
3728 // Import object. Leave it what it is; it should be null, but
3729 // it doesn't matter. If the domain Map _has_ been set, then
3730 // compute a new Import object if necessary.
3731 if (!domainMap_.is_null()) {
3732 if (!domainMap_->isSameAs(*newColMap)) {
3733 importer_ = Teuchos::rcp(new import_type(domainMap_, newColMap));
3734 } else {
3735 importer_ = Teuchos::null; // don't need an Import
3736 }
3737 }
3738 } else {
3739 // The caller gave us an Import object. Assume that it's valid.
3740 importer_ = newImport;
3741 }
3742}
3743
3744template <class LocalOrdinal, class GlobalOrdinal, class Node>
3746 replaceDomainMap(const Teuchos::RCP<const map_type>& newDomainMap) {
3747 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMap: ";
3749 colMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3750 "this method unless the graph already has a column Map.");
3752 newDomainMap.is_null(), std::invalid_argument,
3753 prefix << "The new domain Map must be nonnull.");
3754
3755 // Create a new importer, if needed
3756 Teuchos::RCP<const import_type> newImporter = Teuchos::null;
3757 if (newDomainMap != colMap_ && (!newDomainMap->isSameAs(*colMap_))) {
3758 newImporter = rcp(new import_type(newDomainMap, colMap_));
3759 }
3760 this->replaceDomainMapAndImporter(newDomainMap, newImporter);
3761}
3762
3763template <class LocalOrdinal, class GlobalOrdinal, class Node>
3765 replaceDomainMapAndImporter(const Teuchos::RCP<const map_type>& newDomainMap,
3766 const Teuchos::RCP<const import_type>& newImporter) {
3767 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
3769 colMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3770 "this method unless the graph already has a column Map.");
3772 newDomainMap.is_null(), std::invalid_argument,
3773 prefix << "The new domain Map must be nonnull.");
3774
3775 if (debug_) {
3776 if (newImporter.is_null()) {
3777 // It's not a good idea to put expensive operations in a macro
3778 // clause, even if they are side effect - free, because macros
3779 // don't promise that they won't evaluate their arguments more
3780 // than once. It's polite for them to do so, but not required.
3781 const bool colSameAsDom = colMap_->isSameAs(*newDomainMap);
3782 TEUCHOS_TEST_FOR_EXCEPTION(!colSameAsDom, std::invalid_argument,
3783 "If the new Import is null, "
3784 "then the new domain Map must be the same as the current column Map.");
3785 } else {
3786 const bool colSameAsTgt =
3787 colMap_->isSameAs(*(newImporter->getTargetMap()));
3788 const bool newDomSameAsSrc =
3789 newDomainMap->isSameAs(*(newImporter->getSourceMap()));
3790 TEUCHOS_TEST_FOR_EXCEPTION(!colSameAsTgt || !newDomSameAsSrc, std::invalid_argument,
3791 "If the "
3792 "new Import is nonnull, then the current column Map must be the same "
3793 "as the new Import's target Map, and the new domain Map must be the "
3794 "same as the new Import's source Map.");
3795 }
3796 }
3797
3798 domainMap_ = newDomainMap;
3799 importer_ = Teuchos::rcp_const_cast<import_type>(newImporter);
3800}
3801
3802template <class LocalOrdinal, class GlobalOrdinal, class Node>
3804 replaceRangeMap(const Teuchos::RCP<const map_type>& newRangeMap) {
3805 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMap: ";
3807 rowMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3808 "this method unless the graph already has a row Map.");
3810 newRangeMap.is_null(), std::invalid_argument,
3811 prefix << "The new range Map must be nonnull.");
3812
3813 // Create a new exporter, if needed
3814 Teuchos::RCP<const export_type> newExporter = Teuchos::null;
3815 if (newRangeMap != rowMap_ && (!newRangeMap->isSameAs(*rowMap_))) {
3816 newExporter = rcp(new export_type(rowMap_, newRangeMap));
3817 }
3818 this->replaceRangeMapAndExporter(newRangeMap, newExporter);
3819}
3820
3821template <class LocalOrdinal, class GlobalOrdinal, class Node>
3823 replaceRangeMapAndExporter(const Teuchos::RCP<const map_type>& newRangeMap,
3824 const Teuchos::RCP<const export_type>& newExporter) {
3825 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
3827 rowMap_.is_null(), std::invalid_argument, prefix << "You may not call "
3828 "this method unless the graph already has a column Map.");
3830 newRangeMap.is_null(), std::invalid_argument,
3831 prefix << "The new domain Map must be nonnull.");
3832
3833 if (debug_) {
3834 if (newExporter.is_null()) {
3835 // It's not a good idea to put expensive operations in a macro
3836 // clause, even if they are side effect - free, because macros
3837 // don't promise that they won't evaluate their arguments more
3838 // than once. It's polite for them to do so, but not required.
3839 const bool rowSameAsRange = rowMap_->isSameAs(*newRangeMap);
3840 TEUCHOS_TEST_FOR_EXCEPTION(!rowSameAsRange, std::invalid_argument,
3841 "If the new Export is null, "
3842 "then the new range Map must be the same as the current row Map.");
3843 } else {
3844 const bool newRangeSameAsTgt =
3845 newRangeMap->isSameAs(*(newExporter->getTargetMap()));
3846 const bool rowSameAsSrc =
3847 rowMap_->isSameAs(*(newExporter->getSourceMap()));
3848 TEUCHOS_TEST_FOR_EXCEPTION(!rowSameAsSrc || !newRangeSameAsTgt, std::invalid_argument,
3849 "If the "
3850 "new Export is nonnull, then the current row Map must be the same "
3851 "as the new Export's source Map, and the new range Map must be the "
3852 "same as the new Export's target Map.");
3853 }
3854 }
3855
3856 rangeMap_ = newRangeMap;
3857 exporter_ = Teuchos::rcp_const_cast<export_type>(newExporter);
3858}
3859
3860template <class LocalOrdinal, class GlobalOrdinal, class Node>
3863 getLocalGraphDevice() const {
3865 lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
3866 this->getRowPtrsPackedDevice());
3867}
3868
3869template <class LocalOrdinal, class GlobalOrdinal, class Node>
3872 getLocalGraphHost() const {
3873 return local_graph_host_type(
3874 lclIndsPacked_wdv.getHostView(Access::ReadWrite),
3875 this->getRowPtrsPackedHost());
3876}
3877
3878template <class LocalOrdinal, class GlobalOrdinal, class Node>
3881 using Teuchos::ArrayView;
3882 using Teuchos::outArg;
3883 using Teuchos::reduceAll;
3884 using ::Tpetra::Details::ProfilingRegion;
3885 typedef global_size_t GST;
3886
3887 ProfilingRegion regionCGC("Tpetra::CrsGraph::computeGlobalConstants");
3888
3889 this->computeLocalConstants();
3890
3891 // Compute global constants from local constants. Processes that
3892 // already have local constants still participate in the
3893 // all-reduces, using their previously computed values.
3894 if (!this->haveGlobalConstants_) {
3895 const Teuchos::Comm<int>& comm = *(this->getComm());
3896 // Promote all the nodeNum* and nodeMaxNum* quantities from
3897 // size_t to global_size_t, when doing the all-reduces for
3898 // globalNum* / globalMaxNum* results.
3899 //
3900 // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
3901 // this in two all-reduces (one for the sum and the other for
3902 // the max), or use a custom MPI_Op that combines the sum and
3903 // the max. The latter might even be slower than two
3904 // all-reduces on modern network hardware. It would also be a
3905 // good idea to use nonblocking all-reduces (MPI 3), so that we
3906 // don't have to wait around for the first one to finish before
3907 // starting the second one.
3908 GST lcl, gbl;
3909 lcl = static_cast<GST>(this->getLocalNumEntries());
3910
3911 reduceAll<int, GST>(comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
3912 this->globalNumEntries_ = gbl;
3913
3914 const GST lclMaxNumRowEnt = static_cast<GST>(this->nodeMaxNumRowEntries_);
3915 reduceAll<int, GST>(comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
3916 outArg(this->globalMaxNumRowEntries_));
3917 this->haveGlobalConstants_ = true;
3918 }
3919}
3920
3921template <class LocalOrdinal, class GlobalOrdinal, class Node>
3924 using ::Tpetra::Details::ProfilingRegion;
3925
3926 ProfilingRegion regionCLC("Tpetra::CrsGraph::computeLocalConstants");
3927 if (this->haveLocalConstants_) {
3928 return;
3929 }
3930
3931 // Reset local properties
3932 this->nodeMaxNumRowEntries_ =
3933 Teuchos::OrdinalTraits<size_t>::invalid();
3934
3935 using LO = local_ordinal_type;
3936
3937 auto ptr = this->getRowPtrsPackedDevice();
3938 const LO lclNumRows = ptr.extent(0) == 0 ? static_cast<LO>(0) : (static_cast<LO>(ptr.extent(0)) - static_cast<LO>(1));
3939
3940 const LO lclMaxNumRowEnt =
3941 ::Tpetra::Details::maxDifference("Tpetra::CrsGraph: nodeMaxNumRowEntries",
3942 ptr, lclNumRows);
3943 this->nodeMaxNumRowEntries_ = static_cast<size_t>(lclMaxNumRowEnt);
3944 this->haveLocalConstants_ = true;
3945}
3946
3947template <class LocalOrdinal, class GlobalOrdinal, class Node>
3948std::pair<size_t, std::string>
3950 makeIndicesLocal(const bool verbose) {
3952 using std::endl;
3953 using Teuchos::arcp;
3954 using Teuchos::Array;
3955 typedef LocalOrdinal LO;
3956 typedef GlobalOrdinal GO;
3957 typedef device_type DT;
3958 typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
3959 typedef typename num_row_entries_type::non_const_value_type num_ent_type;
3960 const char tfecfFuncName[] = "makeIndicesLocal: ";
3961 ProfilingRegion regionMakeIndicesLocal("Tpetra::CrsGraph::makeIndicesLocal");
3962
3963 std::unique_ptr<std::string> prefix;
3964 if (verbose) {
3965 prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
3966 std::ostringstream os;
3967 os << *prefix << "lclNumRows: " << getLocalNumRows() << endl;
3968 std::cerr << os.str();
3969 }
3970
3971 // These are somewhat global properties, so it's safe to have
3972 // exception checks for them, rather than returning an error code.
3973 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap(), std::logic_error,
3974 "The graph does not have a "
3975 "column Map yet. This method should never be called in that case. "
3976 "Please report this bug to the Tpetra developers.");
3977 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->getColMap().is_null(), std::logic_error,
3978 "The graph claims "
3979 "that it has a column Map, because hasColMap() returns true. However, "
3980 "the result of getColMap() is null. This should never happen. Please "
3981 "report this bug to the Tpetra developers.");
3982
3983 // Return value 1: The number of column indices (counting
3984 // duplicates) that could not be converted to local indices,
3985 // because they were not in the column Map on the calling process.
3986 size_t lclNumErrs = 0;
3987 std::ostringstream errStrm; // for return value 2 (error string)
3988
3989 const LO lclNumRows = static_cast<LO>(this->getLocalNumRows());
3990 const map_type& colMap = *(this->getColMap());
3991
3992 if (this->isGloballyIndexed() && lclNumRows != 0) {
3993 // This is a host-accessible View.
3994 typename num_row_entries_type::const_type h_numRowEnt =
3995 this->k_numRowEntries_;
3996
3997 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
3998
3999 // Allocate space for local indices.
4000 if (rowPtrsUnpacked_host.extent(0) == 0) {
4001 errStrm << "Unpacked row pointers (rowPtrsUnpacked_dev_) has length 0. This should never "
4002 "happen here. Please report this bug to the Tpetra developers."
4003 << endl;
4004 // Need to return early.
4005 return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid(),
4006 errStrm.str());
4007 }
4009
4010 // mfh 17 Dec 2016: We don't need initial zero-fill of
4011 // lclIndsUnpacked_wdv, because we will fill it below anyway.
4012 // AllowPadding would only help for aligned access (e.g.,
4013 // for vectorization) if we also were to pad each row to the
4014 // same alignment, so we'll skip AllowPadding for now.
4015
4016 // using Kokkos::AllowPadding;
4017 using Kokkos::view_alloc;
4018 using Kokkos::WithoutInitializing;
4019
4020 // When giving the label as an argument to
4021 // Kokkos::view_alloc, the label must be a string and not a
4022 // char*, else the code won't compile. This is because
4023 // view_alloc also allows a raw pointer as its first
4024 // argument. See
4025 // https://github.com/kokkos/kokkos/issues/434. This is a
4026 // large allocation typically, so the overhead of creating
4027 // an std::string is minor.
4028 const std::string label("Tpetra::CrsGraph::lclInd");
4029 if (verbose) {
4030 std::ostringstream os;
4031 os << *prefix << "(Re)allocate lclInd_wdv: old="
4032 << lclIndsUnpacked_wdv.extent(0) << ", new=" << numEnt << endl;
4033 std::cerr << os.str();
4034 }
4035
4036 local_inds_dualv_type lclInds_dualv =
4037 local_inds_dualv_type(view_alloc(label, WithoutInitializing),
4038 numEnt);
4039 lclIndsUnpacked_wdv = local_inds_wdv_type(lclInds_dualv);
4040
4041 auto lclColMap = colMap.getLocalMap();
4042 // This is a "device mirror" of the host View h_numRowEnt.
4043 //
4044 // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4045 // Device instance is to use its default constructor. See the
4046 // following Kokkos issue:
4047 //
4048 // https://github.com/kokkos/kokkos/issues/442
4049 if (verbose) {
4050 std::ostringstream os;
4051 os << *prefix << "Allocate device mirror k_numRowEnt: "
4052 << h_numRowEnt.extent(0) << endl;
4053 std::cerr << os.str();
4054 }
4055 auto k_numRowEnt =
4056 Kokkos::create_mirror_view_and_copy(device_type(), h_numRowEnt);
4057
4058 using ::Tpetra::Details::convertColumnIndicesFromGlobalToLocal;
4059 lclNumErrs =
4061 lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4062 gblInds_wdv.getDeviceView(Access::ReadOnly),
4063 this->getRowPtrsUnpackedDevice(),
4064 lclColMap,
4065 k_numRowEnt);
4066 if (lclNumErrs != 0) {
4067 const int myRank = [this]() {
4068 auto map = this->getMap();
4069 if (map.is_null()) {
4070 return 0;
4071 } else {
4072 auto comm = map->getComm();
4073 return comm.is_null() ? 0 : comm->getRank();
4074 }
4075 }();
4076 const bool pluralNumErrs = (lclNumErrs != static_cast<size_t>(1));
4077 errStrm << "(Process " << myRank << ") When converting column "
4078 "indices from global to local, we encountered "
4079 << lclNumErrs
4080 << " ind" << (pluralNumErrs ? "ices" : "ex")
4081 << " that do" << (pluralNumErrs ? "es" : "")
4082 << " not live in the column Map on this process." << endl;
4083 }
4084
4085 // We've converted column indices from global to local, so we
4086 // can deallocate the global column indices (which we know are
4087 // in 1-D storage, because the graph has static profile).
4088 if (verbose) {
4089 std::ostringstream os;
4090 os << *prefix << "Free gblInds_wdv: "
4091 << gblInds_wdv.extent(0) << endl;
4092 std::cerr << os.str();
4093 }
4094 gblInds_wdv = global_inds_wdv_type();
4095 } // globallyIndexed() && lclNumRows > 0
4096
4097 this->indicesAreLocal_ = true;
4098 this->indicesAreGlobal_ = false;
4099 this->checkInternalState();
4100
4101 return std::make_pair(lclNumErrs, errStrm.str());
4102}
4103
4104template <class LocalOrdinal, class GlobalOrdinal, class Node>
4106 makeColMap(Teuchos::Array<int>& remotePIDs) {
4108 using std::endl;
4109 const char tfecfFuncName[] = "makeColMap";
4110
4111 ProfilingRegion regionSortAndMerge("Tpetra::CrsGraph::makeColMap");
4112 std::unique_ptr<std::string> prefix;
4113 if (verbose_) {
4114 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4115 std::ostringstream os;
4116 os << *prefix << "Start" << endl;
4117 std::cerr << os.str();
4118 }
4119
4120 // this->colMap_ should be null at this point, but we accept the
4121 // future possibility that it might not be (esp. if we decide
4122 // later to support graph structure changes after first
4123 // fillComplete, which CrsGraph does not currently (as of 12 Feb
4124 // 2017) support).
4125 Teuchos::RCP<const map_type> colMap = this->colMap_;
4126 const bool sortEachProcsGids =
4127 this->sortGhostsAssociatedWithEachProcessor_;
4128
4129 // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4130 // per-process error code. If an error does occur on a process,
4131 // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4132 // notice that error. This is the caller's responsibility. For
4133 // now, we only propagate (to all processes) and report the error
4134 // in debug mode. In the future, we need to add the local/global
4135 // error handling scheme used in BlockCrsMatrix to this class.
4136 if (debug_) {
4137 using Teuchos::outArg;
4138 using Teuchos::REDUCE_MIN;
4139 using Teuchos::reduceAll;
4140
4141 std::ostringstream errStrm;
4142 const int lclErrCode =
4143 Details::makeColMap(colMap, remotePIDs,
4144 getDomainMap(), *this, sortEachProcsGids, &errStrm);
4145 auto comm = this->getComm();
4146 if (!comm.is_null()) {
4147 const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4148 int gblSuccess = 0; // output argument
4151 if (gblSuccess != 1) {
4152 std::ostringstream os;
4153 Details::gathervPrint(os, errStrm.str(), *comm);
4154 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
4155 ": An error happened on at "
4156 "least one process in the CrsGraph's communicator. "
4157 "Here are all processes' error messages:"
4158 << std::endl
4159 << os.str());
4160 }
4161 }
4162 } else {
4163 (void)Details::makeColMap(colMap, remotePIDs,
4164 getDomainMap(), *this, sortEachProcsGids, nullptr);
4165 }
4166 // See above. We want to admit the possibility of makeColMap
4167 // actually revising an existing column Map, even though that
4168 // doesn't currently (as of 10 May 2017) happen.
4169 this->colMap_ = colMap;
4170
4171 checkInternalState();
4172 if (verbose_) {
4173 std::ostringstream os;
4174 os << *prefix << "Done" << endl;
4175 std::cerr << os.str();
4176 }
4177}
4178
4179template <class LocalOrdinal, class GlobalOrdinal, class Node>
4181 sortAndMergeAllIndices(const bool sorted, const bool merged) {
4182 using std::endl;
4183 using LO = LocalOrdinal;
4184 using host_execution_space =
4185 typename Kokkos::View<LO*, device_type>::host_mirror_type::
4186 execution_space;
4187 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4188 const char tfecfFuncName[] = "sortAndMergeAllIndices";
4189 Details::ProfilingRegion regionSortAndMerge("Tpetra::CrsGraph::sortAndMergeAllIndices");
4190
4191 std::unique_ptr<std::string> prefix;
4192 if (verbose_) {
4193 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4194 std::ostringstream os;
4195 os << *prefix << "Start: "
4196 << "sorted=" << (sorted ? "true" : "false")
4197 << ", merged=" << (merged ? "true" : "false") << endl;
4198 std::cerr << os.str();
4199 }
4200 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isGloballyIndexed(), std::logic_error,
4201 "This method may only be called after makeIndicesLocal.");
4202 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!merged && this->isStorageOptimized(), std::logic_error,
4203 "The graph is already storage optimized, so we shouldn't be "
4204 "merging any indices. "
4205 "Please report this bug to the Tpetra developers.");
4206
4207 if (!sorted || !merged) {
4208 const LO lclNumRows(this->getLocalNumRows());
4209 auto range = range_type(0, lclNumRows);
4210
4211 if (verbose_) {
4212 size_t totalNumDups = 0;
4213 // Sync and mark-modified the local indices before disabling WDV tracking
4214 lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4216 Kokkos::parallel_reduce(
4217 range,
4218 [this, sorted, merged](const LO lclRow, size_t& numDups) {
4219 const RowInfo rowInfo = this->getRowInfo(lclRow);
4220 numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4221 },
4222 totalNumDups);
4224 std::ostringstream os;
4225 os << *prefix << "totalNumDups=" << totalNumDups << endl;
4226 std::cerr << os.str();
4227 } else {
4228 // make sure that host rowptrs have been created before we enter the parallel region
4229 (void)this->getRowPtrsUnpackedHost();
4230 // Sync and mark-modified the local indices before disabling WDV tracking
4231 lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4233 Kokkos::parallel_for(range,
4234 [this, sorted, merged](const LO lclRow) {
4235 const RowInfo rowInfo = this->getRowInfo(lclRow);
4236 this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4237 });
4239 }
4240 this->indicesAreSorted_ = true; // we just sorted every row
4241 this->noRedundancies_ = true; // we just merged every row
4242 }
4243
4244 if (verbose_) {
4245 std::ostringstream os;
4246 os << *prefix << "Done" << endl;
4247 std::cerr << os.str();
4248 }
4249}
4250
4251template <class LocalOrdinal, class GlobalOrdinal, class Node>
4253 makeImportExport(Teuchos::Array<int>& remotePIDs,
4254 const bool useRemotePIDs) {
4255 using Teuchos::ParameterList;
4256 using Teuchos::RCP;
4257 using Teuchos::rcp;
4258 using ::Tpetra::Details::ProfilingRegion;
4259 const char tfecfFuncName[] = "makeImportExport: ";
4260 ProfilingRegion regionMIE("Tpetra::CrsGraph::makeImportExport");
4261
4262 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap(), std::logic_error,
4263 "This method may not be called unless the graph has a column Map.");
4264 RCP<ParameterList> params = this->getNonconstParameterList(); // could be null
4265
4266 // Don't do any checks to see if we need to create the Import, if
4267 // it exists already.
4268 //
4269 // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4270 // change CrsGraph in the future to allow changing the column
4271 // Map after fillComplete. For now, the column Map is fixed
4272 // after the first fillComplete call.
4273 if (importer_.is_null()) {
4274 // Create the Import instance if necessary.
4275 if (domainMap_ != colMap_ && (!domainMap_->isSameAs(*colMap_))) {
4276 if (params.is_null() || !params->isSublist("Import")) {
4277 if (useRemotePIDs) {
4278 importer_ = rcp(new import_type(domainMap_, colMap_, remotePIDs));
4279 } else {
4280 importer_ = rcp(new import_type(domainMap_, colMap_));
4281 }
4282 } else {
4284 if (useRemotePIDs) {
4286 rcp(new import_type(domainMap_, colMap_, remotePIDs,
4287 importSublist));
4288 importer_ = newImp;
4289 } else {
4290 importer_ = rcp(new import_type(domainMap_, colMap_, importSublist));
4291 }
4292 }
4293 }
4294 }
4295
4296 // Don't do any checks to see if we need to create the Export, if
4297 // it exists already.
4298 if (exporter_.is_null()) {
4299 // Create the Export instance if necessary.
4300 if (rangeMap_ != rowMap_ && !rangeMap_->isSameAs(*rowMap_)) {
4301 if (params.is_null() || !params->isSublist("Export")) {
4302 exporter_ = rcp(new export_type(rowMap_, rangeMap_));
4303 } else {
4305 exporter_ = rcp(new export_type(rowMap_, rangeMap_, exportSublist));
4306 }
4307 }
4308 }
4309}
4310
4311template <class LocalOrdinal, class GlobalOrdinal, class Node>
4312std::string
4314 description() const {
4315 std::ostringstream oss;
4316 oss << dist_object_type::description();
4317 if (isFillComplete()) {
4318 oss << "{status = fill complete"
4319 << ", global rows = " << getGlobalNumRows()
4320 << ", global cols = " << getGlobalNumCols()
4321 << ", global num entries = " << getGlobalNumEntries()
4322 << "}";
4323 } else {
4324 oss << "{status = fill not complete"
4325 << ", global rows = " << getGlobalNumRows()
4326 << "}";
4327 }
4328 return oss.str();
4329}
4330
4331template <class LocalOrdinal, class GlobalOrdinal, class Node>
4333 describe(Teuchos::FancyOStream& out,
4334 const Teuchos::EVerbosityLevel verbLevel) const {
4335 using std::endl;
4336 using std::setw;
4337 using Teuchos::ArrayView;
4338 using Teuchos::Comm;
4339 using Teuchos::RCP;
4340 using Teuchos::VERB_DEFAULT;
4341 using Teuchos::VERB_EXTREME;
4342 using Teuchos::VERB_HIGH;
4343 using Teuchos::VERB_LOW;
4344 using Teuchos::VERB_MEDIUM;
4345 using Teuchos::VERB_NONE;
4346
4347 Teuchos::EVerbosityLevel vl = verbLevel;
4348 if (vl == VERB_DEFAULT) vl = VERB_LOW;
4349 RCP<const Comm<int>> comm = this->getComm();
4350 const int myImageID = comm->getRank(),
4351 numImages = comm->getSize();
4352 size_t width = 1;
4353 for (size_t dec = 10; dec < getGlobalNumRows(); dec *= 10) {
4354 ++width;
4355 }
4356 width = std::max<size_t>(width, static_cast<size_t>(11)) + 2;
4357 Teuchos::OSTab tab(out);
4358 // none: print nothing
4359 // low: print O(1) info from node 0
4360 // medium: print O(P) info, num entries per node
4361 // high: print O(N) info, num entries per row
4362 // extreme: print O(NNZ) info: print graph indices
4363 //
4364 // for medium and higher, print constituent objects at specified verbLevel
4365 if (vl != VERB_NONE) {
4366 if (myImageID == 0) out << this->description() << std::endl;
4367 // O(1) globals, minus what was already printed by description()
4368 if (isFillComplete() && myImageID == 0) {
4369 out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4370 }
4371 // constituent objects
4372 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4373 if (myImageID == 0) out << "\nRow map: " << std::endl;
4374 rowMap_->describe(out, vl);
4375 if (colMap_ != Teuchos::null) {
4376 if (myImageID == 0) out << "\nColumn map: " << std::endl;
4377 colMap_->describe(out, vl);
4378 }
4379 if (domainMap_ != Teuchos::null) {
4380 if (myImageID == 0) out << "\nDomain map: " << std::endl;
4381 domainMap_->describe(out, vl);
4382 }
4383 if (rangeMap_ != Teuchos::null) {
4384 if (myImageID == 0) out << "\nRange map: " << std::endl;
4385 rangeMap_->describe(out, vl);
4386 }
4387 }
4388 // O(P) data
4389 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4390 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4391 if (myImageID == imageCtr) {
4392 out << "Node ID = " << imageCtr << std::endl
4393 << "Node number of entries = " << this->getLocalNumEntries() << std::endl
4394 << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4395 if (!indicesAreAllocated()) {
4396 out << "Indices are not allocated." << std::endl;
4397 }
4398 }
4399 comm->barrier();
4400 comm->barrier();
4401 comm->barrier();
4402 }
4403 }
4404 // O(N) and O(NNZ) data
4405 if (vl == VERB_HIGH || vl == VERB_EXTREME) {
4406 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4407 if (myImageID == imageCtr) {
4408 out << std::setw(width) << "Node ID"
4409 << std::setw(width) << "Global Row"
4410 << std::setw(width) << "Num Entries";
4411 if (vl == VERB_EXTREME) {
4412 out << " Entries";
4413 }
4414 out << std::endl;
4415 const LocalOrdinal lclNumRows =
4416 static_cast<LocalOrdinal>(this->getLocalNumRows());
4417 for (LocalOrdinal r = 0; r < lclNumRows; ++r) {
4418 const RowInfo rowinfo = this->getRowInfo(r);
4419 GlobalOrdinal gid = rowMap_->getGlobalElement(r);
4420 out << std::setw(width) << myImageID
4421 << std::setw(width) << gid
4422 << std::setw(width) << rowinfo.numEntries;
4423 if (vl == VERB_EXTREME) {
4424 out << " ";
4425 if (isGloballyIndexed()) {
4426 auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4427 for (size_t j = 0; j < rowinfo.numEntries; ++j) {
4428 GlobalOrdinal colgid = rowview[j + rowinfo.offset1D];
4429 out << colgid << " ";
4430 }
4431 } else if (isLocallyIndexed()) {
4432 auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4433 for (size_t j = 0; j < rowinfo.numEntries; ++j) {
4434 LocalOrdinal collid = rowview[j + rowinfo.offset1D];
4435 out << colMap_->getGlobalElement(collid) << " ";
4436 }
4437 }
4438 }
4439 out << std::endl;
4440 }
4441 }
4442 comm->barrier();
4443 comm->barrier();
4444 comm->barrier();
4445 }
4446 }
4447 }
4448}
4449
4450template <class LocalOrdinal, class GlobalOrdinal, class Node>
4452 checkSizes(const SrcDistObject& /* source */) {
4453 // It's not clear what kind of compatibility checks on sizes can
4454 // be performed here. Epetra_CrsGraph doesn't check any sizes for
4455 // compatibility.
4456 return true;
4457}
4458
4459template <class LocalOrdinal, class GlobalOrdinal, class Node>
4462 const size_t numSameIDs,
4463 const Kokkos::DualView<const local_ordinal_type*,
4465 const Kokkos::DualView<const local_ordinal_type*,
4467 const CombineMode /*CM*/) {
4468 using std::endl;
4469 using LO = local_ordinal_type;
4470 using GO = global_ordinal_type;
4472 const char tfecfFuncName[] = "copyAndPermute: ";
4473 const bool verbose = verbose_;
4474
4475 std::unique_ptr<std::string> prefix;
4476 if (verbose) {
4477 prefix = this->createPrefix("CrsGraph", "copyAndPermute");
4478 std::ostringstream os;
4479 os << *prefix << endl;
4480 std::cerr << os.str();
4481 }
4482
4484 std::runtime_error, "permuteToLIDs.extent(0) = " << permuteToLIDs.extent(0) << " != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0) << ".");
4485
4486 // We know from checkSizes that the source object is a
4487 // row_graph_type, so we don't need to check again.
4489 dynamic_cast<const row_graph_type&>(source);
4490
4491 if (verbose) {
4492 std::ostringstream os;
4493 os << *prefix << "Compute padding" << endl;
4494 std::cerr << os.str();
4495 }
4496 auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4497 permuteToLIDs, permuteFromLIDs, verbose);
4498 applyCrsPadding(*padding, verbose);
4499
4500 // If the source object is actually a CrsGraph, we can use view
4501 // mode instead of copy mode to access the entries in each row,
4502 // if the graph is not fill complete.
4503 const this_CRS_type* srcCrsGraph =
4504 dynamic_cast<const this_CRS_type*>(&source);
4505
4506 const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4507 const map_type& tgtRowMap = *(getRowMap());
4508 const bool src_filled = srcRowGraph.isFillComplete();
4509 nonconst_global_inds_host_view_type row_copy;
4510 LO myid = 0;
4511
4512 //
4513 // "Copy" part of "copy and permute."
4514 //
4515 if (src_filled || srcCrsGraph == nullptr) {
4516 if (verbose) {
4517 std::ostringstream os;
4518 os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
4519 std::cerr << os.str();
4520 }
4521 // If the source graph is fill complete, we can't use view mode,
4522 // because the data might be stored in a different format not
4523 // compatible with the expectations of view mode. Also, if the
4524 // source graph is not a CrsGraph, we can't use view mode,
4525 // because RowGraph only provides copy mode access to the data.
4526 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4527 const GO gid = srcRowMap.getGlobalElement(myid);
4528 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow(gid);
4529 Kokkos::resize(row_copy, row_length);
4530 size_t check_row_length = 0;
4531 srcRowGraph.getGlobalRowCopy(gid, row_copy, check_row_length);
4532 this->insertGlobalIndices(gid, row_length, row_copy.data());
4533 }
4534 } else {
4535 if (verbose) {
4536 std::ostringstream os;
4537 os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
4538 std::cerr << os.str();
4539 }
4540 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4541 const GO gid = srcRowMap.getGlobalElement(myid);
4542 global_inds_host_view_type row;
4543 srcCrsGraph->getGlobalRowView(gid, row);
4544 this->insertGlobalIndices(gid, row.extent(0), row.data());
4545 }
4546 }
4547
4548 //
4549 // "Permute" part of "copy and permute."
4550 //
4551 auto permuteToLIDs_h = permuteToLIDs.view_host();
4552 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
4553
4554 if (src_filled || srcCrsGraph == nullptr) {
4555 for (LO i = 0; i < static_cast<LO>(permuteToLIDs_h.extent(0)); ++i) {
4556 const GO mygid = tgtRowMap.getGlobalElement(permuteToLIDs_h[i]);
4557 const GO srcgid = srcRowMap.getGlobalElement(permuteFromLIDs_h[i]);
4558 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow(srcgid);
4559 Kokkos::resize(row_copy, row_length);
4560 size_t check_row_length = 0;
4561 srcRowGraph.getGlobalRowCopy(srcgid, row_copy, check_row_length);
4562 this->insertGlobalIndices(mygid, row_length, row_copy.data());
4563 }
4564 } else {
4565 for (LO i = 0; i < static_cast<LO>(permuteToLIDs_h.extent(0)); ++i) {
4566 const GO mygid = tgtRowMap.getGlobalElement(permuteToLIDs_h[i]);
4567 const GO srcgid = srcRowMap.getGlobalElement(permuteFromLIDs_h[i]);
4568 global_inds_host_view_type row;
4569 srcCrsGraph->getGlobalRowView(srcgid, row);
4570 this->insertGlobalIndices(mygid, row.extent(0), row.data());
4571 }
4572 }
4573
4574 if (verbose) {
4575 std::ostringstream os;
4576 os << *prefix << "Done" << endl;
4577 std::cerr << os.str();
4578 }
4579}
4580
4581template <class LocalOrdinal, class GlobalOrdinal, class Node>
4583 applyCrsPadding(const padding_type& padding,
4584 const bool verbose) {
4587 using std::endl;
4588 using LO = local_ordinal_type;
4589 using row_ptrs_type =
4590 typename local_graph_device_type::row_map_type::non_const_type;
4591 using range_policy =
4592 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4593 const char tfecfFuncName[] = "applyCrsPadding";
4594 ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
4595
4596 std::unique_ptr<std::string> prefix;
4597 if (verbose) {
4598 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4599 std::ostringstream os;
4600 os << *prefix << "padding: ";
4601 padding.print(os);
4602 os << endl;
4603 std::cerr << os.str();
4604 }
4605 const int myRank = !verbose ? -1 : [&]() {
4606 auto map = this->getMap();
4607 if (map.is_null()) {
4608 return -1;
4609 }
4610 auto comm = map->getComm();
4611 if (comm.is_null()) {
4612 return -1;
4613 }
4614 return comm->getRank();
4615 }();
4616
4617 // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
4618 // row_ptrs_beg or allocate row_ptrs_end unless the allocation
4619 // size needs to increase. That should be the job of
4620 // padCrsArrays.
4621
4622 // Assume global indexing we don't have any indices yet
4623 if (!indicesAreAllocated()) {
4624 if (verbose) {
4625 std::ostringstream os;
4626 os << *prefix << "Call allocateIndices" << endl;
4627 std::cerr << os.str();
4628 }
4629 allocateIndices(GlobalIndices, verbose);
4630 }
4631 TEUCHOS_ASSERT(indicesAreAllocated());
4632
4633 // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
4634 // would use it directly.
4635
4636 auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
4637 if (verbose) {
4638 std::ostringstream os;
4639 os << *prefix << "Allocate row_ptrs_beg: "
4640 << rowPtrsUnpacked_dev.extent(0) << endl;
4641 std::cerr << os.str();
4642 }
4643 using Kokkos::view_alloc;
4644 using Kokkos::WithoutInitializing;
4645 row_ptrs_type row_ptrs_beg(
4646 view_alloc("row_ptrs_beg", WithoutInitializing),
4647 rowPtrsUnpacked_dev.extent(0));
4648 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
4649 Kokkos::deep_copy(execution_space(), row_ptrs_beg, rowPtrsUnpacked_dev);
4650
4651 const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) : size_t(row_ptrs_beg.extent(0) - 1);
4652 if (verbose) {
4653 std::ostringstream os;
4654 os << *prefix << "Allocate row_ptrs_end: " << N << endl;
4655 std::cerr << os.str();
4656 }
4657 row_ptrs_type row_ptrs_end(
4658 view_alloc("row_ptrs_end", WithoutInitializing), N);
4659 row_ptrs_type num_row_entries;
4660
4661 const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
4662
4663 execution_space().fence(); // we need above deep_copy to be done
4664
4665 if (refill_num_row_entries) { // Case 1: Unpacked storage
4666 // We can't assume correct *this capture until C++17, and it's
4667 // likely more efficient just to capture what we need anyway.
4668 num_row_entries =
4669 row_ptrs_type(view_alloc("num_row_entries", WithoutInitializing), N);
4670 Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
4671 Kokkos::parallel_for(
4672 "Fill end row pointers", range_policy(0, N),
4673 KOKKOS_LAMBDA(const size_t i) {
4674 row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
4675 });
4676 } else {
4677 // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
4678 // storage, we don't need row_ptr_end to be separate allocation;
4679 // could just have it alias row_ptr_beg+1.
4680 Kokkos::parallel_for(
4681 "Fill end row pointers", range_policy(0, N),
4682 KOKKOS_LAMBDA(const size_t i) {
4683 row_ptrs_end(i) = row_ptrs_beg(i + 1);
4684 });
4685 }
4686
4687 if (isGloballyIndexed()) {
4688 padCrsArrays(row_ptrs_beg, row_ptrs_end, gblInds_wdv,
4689 padding, myRank, verbose);
4690 } else {
4691 padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
4692 padding, myRank, verbose);
4693 }
4694
4695 if (refill_num_row_entries) {
4696 Kokkos::parallel_for(
4697 "Fill num entries", range_policy(0, N),
4698 KOKKOS_LAMBDA(const size_t i) {
4699 num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
4700 });
4701 Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
4702 }
4703 if (verbose) {
4704 std::ostringstream os;
4705 os << *prefix << "Reassign k_rowPtrs_; old size: "
4706 << rowPtrsUnpacked_dev.extent(0) << ", new size: "
4707 << row_ptrs_beg.extent(0) << endl;
4708 std::cerr << os.str();
4709 TEUCHOS_ASSERT(rowPtrsUnpacked_dev.extent(0) == row_ptrs_beg.extent(0));
4710 }
4711
4712 setRowPtrsUnpacked(row_ptrs_beg);
4713}
4714
4715template <class LocalOrdinal, class GlobalOrdinal, class Node>
4716std::unique_ptr<
4717 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4718CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4719 computeCrsPadding(
4720 const RowGraph<LocalOrdinal, GlobalOrdinal, Node>& source,
4721 const size_t numSameIDs,
4722 const Kokkos::DualView<const local_ordinal_type*,
4723 buffer_device_type>& permuteToLIDs,
4724 const Kokkos::DualView<const local_ordinal_type*,
4725 buffer_device_type>& permuteFromLIDs,
4726 const bool verbose) const {
4727 using LO = local_ordinal_type;
4728 using std::endl;
4729
4730 std::unique_ptr<std::string> prefix;
4731 if (verbose) {
4732 prefix = this->createPrefix("CrsGraph",
4733 "computeCrsPadding(same & permute)");
4734 std::ostringstream os;
4735 os << *prefix << "{numSameIDs: " << numSameIDs
4736 << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
4737 << endl;
4738 std::cerr << os.str();
4739 }
4740
4741 const int myRank = [&]() {
4742 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
4743 return comm.is_null() ? -1 : comm->getRank();
4744 }();
4745 std::unique_ptr<padding_type> padding(
4746 new padding_type(myRank, numSameIDs,
4747 permuteFromLIDs.extent(0)));
4748
4749 computeCrsPaddingForSameIDs(*padding, source,
4750 static_cast<LO>(numSameIDs));
4751 computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
4752 permuteFromLIDs);
4753 return padding;
4754}
4755
4756template <class LocalOrdinal, class GlobalOrdinal, class Node>
4757void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4758 computeCrsPaddingForSameIDs(
4759 padding_type& padding,
4760 const RowGraph<local_ordinal_type, global_ordinal_type,
4761 node_type>& source,
4762 const local_ordinal_type numSameIDs) const {
4763 using LO = local_ordinal_type;
4764 using GO = global_ordinal_type;
4765 using Details::Impl::getRowGraphGlobalRow;
4766 using std::endl;
4767 const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
4768
4769 std::unique_ptr<std::string> prefix;
4770 const bool verbose = verbose_;
4771 if (verbose) {
4772 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4773 std::ostringstream os;
4774 os << *prefix << "numSameIDs: " << numSameIDs << endl;
4775 std::cerr << os.str();
4776 }
4777
4778 if (numSameIDs == 0) {
4779 return;
4780 }
4781
4782 const map_type& srcRowMap = *(source.getRowMap());
4783 const map_type& tgtRowMap = *rowMap_;
4784 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
4785 const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
4786 const bool src_is_unique =
4787 srcCrs == nullptr ? false : srcCrs->isMerged();
4788 const bool tgt_is_unique = this->isMerged();
4789
4790 std::vector<GO> srcGblColIndsScratch;
4791 std::vector<GO> tgtGblColIndsScratch;
4792
4793 execute_sync_host_uvm_access(); // protect host UVM access
4794 for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
4795 const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
4796 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
4797 auto srcGblColInds = getRowGraphGlobalRow(
4798 srcGblColIndsScratch, source, srcGblRowInd);
4799 auto tgtGblColInds = getRowGraphGlobalRow(
4800 tgtGblColIndsScratch, *this, tgtGblRowInd);
4801 padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
4802 tgtGblColInds.size(), tgt_is_unique,
4803 srcGblColInds.getRawPtr(),
4804 srcGblColInds.size(), src_is_unique);
4805 }
4806 if (verbose) {
4807 std::ostringstream os;
4808 os << *prefix << "Done" << endl;
4809 std::cerr << os.str();
4810 }
4811}
4812
4813template <class LocalOrdinal, class GlobalOrdinal, class Node>
4814void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4815 computeCrsPaddingForPermutedIDs(
4816 padding_type& padding,
4817 const RowGraph<local_ordinal_type, global_ordinal_type,
4818 node_type>& source,
4819 const Kokkos::DualView<const local_ordinal_type*,
4820 buffer_device_type>& permuteToLIDs,
4821 const Kokkos::DualView<const local_ordinal_type*,
4822 buffer_device_type>& permuteFromLIDs) const {
4823 using LO = local_ordinal_type;
4824 using GO = global_ordinal_type;
4825 using Details::Impl::getRowGraphGlobalRow;
4826 using std::endl;
4827 const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
4828
4829 std::unique_ptr<std::string> prefix;
4830 const bool verbose = verbose_;
4831 if (verbose) {
4832 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4833 std::ostringstream os;
4834 os << *prefix << "permuteToLIDs.extent(0): "
4835 << permuteToLIDs.extent(0)
4836 << ", permuteFromLIDs.extent(0): "
4837 << permuteFromLIDs.extent(0) << endl;
4838 std::cerr << os.str();
4839 }
4840
4841 if (permuteToLIDs.extent(0) == 0) {
4842 return;
4843 }
4844
4845 const map_type& srcRowMap = *(source.getRowMap());
4846 const map_type& tgtRowMap = *rowMap_;
4847 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
4848 const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
4849 const bool src_is_unique =
4850 srcCrs == nullptr ? false : srcCrs->isMerged();
4851 const bool tgt_is_unique = this->isMerged();
4852
4853 TEUCHOS_ASSERT(!permuteToLIDs.need_sync_host());
4854 auto permuteToLIDs_h = permuteToLIDs.view_host();
4855 TEUCHOS_ASSERT(!permuteFromLIDs.need_sync_host());
4856 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
4857
4858 std::vector<GO> srcGblColIndsScratch;
4859 std::vector<GO> tgtGblColIndsScratch;
4860 const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
4861
4862 execute_sync_host_uvm_access(); // protect host UVM access
4863 for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
4864 const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
4865 const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
4866 auto srcGblColInds = getRowGraphGlobalRow(
4867 srcGblColIndsScratch, source, srcGblRowInd);
4868 const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
4869 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
4870 auto tgtGblColInds = getRowGraphGlobalRow(
4871 tgtGblColIndsScratch, *this, tgtGblRowInd);
4872 padding.update_permute(whichPermute, tgtLclRowInd,
4873 tgtGblColInds.getRawPtr(),
4874 tgtGblColInds.size(), tgt_is_unique,
4875 srcGblColInds.getRawPtr(),
4876 srcGblColInds.size(), src_is_unique);
4877 }
4878
4879 if (verbose) {
4880 std::ostringstream os;
4881 os << *prefix << "Done" << endl;
4882 std::cerr << os.str();
4883 }
4884}
4885
4886template <class LocalOrdinal, class GlobalOrdinal, class Node>
4887std::unique_ptr<
4888 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4889CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4890 computeCrsPaddingForImports(
4891 const Kokkos::DualView<const local_ordinal_type*,
4892 buffer_device_type>& importLIDs,
4893 Kokkos::DualView<packet_type*, buffer_device_type> imports,
4894 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
4895 const bool verbose) const {
4896 using Details::Impl::getRowGraphGlobalRow;
4897 using std::endl;
4898 using LO = local_ordinal_type;
4899 using GO = global_ordinal_type;
4900 const char tfecfFuncName[] = "computeCrsPaddingForImports";
4901
4902 std::unique_ptr<std::string> prefix;
4903 if (verbose) {
4904 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4905 std::ostringstream os;
4906 os << *prefix << "importLIDs.extent(0): "
4907 << importLIDs.extent(0)
4908 << ", imports.extent(0): "
4909 << imports.extent(0)
4910 << ", numPacketsPerLID.extent(0): "
4911 << numPacketsPerLID.extent(0) << endl;
4912 std::cerr << os.str();
4913 }
4914
4915 const LO numImports = static_cast<LO>(importLIDs.extent(0));
4916 const int myRank = [&]() {
4917 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
4918 return comm.is_null() ? -1 : comm->getRank();
4919 }();
4920 std::unique_ptr<padding_type> padding(
4921 new padding_type(myRank, numImports));
4922
4923 if (imports.need_sync_host()) {
4924 imports.sync_host();
4925 }
4926 auto imports_h = imports.view_host();
4927 if (numPacketsPerLID.need_sync_host()) {
4928 numPacketsPerLID.sync_host();
4929 }
4930 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
4931
4932 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
4933 auto importLIDs_h = importLIDs.view_host();
4934
4935 const map_type& tgtRowMap = *rowMap_;
4936 // Always merge source column indices, since isMerged() is
4937 // per-process state, and we don't know its value on other
4938 // processes that sent us data.
4939 constexpr bool src_is_unique = false;
4940 const bool tgt_is_unique = isMerged();
4941
4942 std::vector<GO> tgtGblColIndsScratch;
4943 size_t offset = 0;
4944 execute_sync_host_uvm_access(); // protect host UVM access
4945 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
4946 // CrsGraph packs just global column indices, while CrsMatrix
4947 // packs bytes (first the number of entries in the row, then the
4948 // global column indices, then other stuff like the matrix
4949 // values in that row).
4950 const LO origSrcNumEnt =
4951 static_cast<LO>(numPacketsPerLID_h[whichImport]);
4952 GO* const srcGblColInds = imports_h.data() + offset;
4953
4954 const LO tgtLclRowInd = importLIDs_h[whichImport];
4955 const GO tgtGblRowInd =
4956 tgtRowMap.getGlobalElement(tgtLclRowInd);
4957 auto tgtGblColInds = getRowGraphGlobalRow(
4958 tgtGblColIndsScratch, *this, tgtGblRowInd);
4959 const size_t origTgtNumEnt(tgtGblColInds.size());
4960
4961 padding->update_import(whichImport, tgtLclRowInd,
4962 tgtGblColInds.getRawPtr(),
4963 origTgtNumEnt, tgt_is_unique,
4964 srcGblColInds,
4965 origSrcNumEnt, src_is_unique);
4966 offset += origSrcNumEnt;
4967 }
4968
4969 if (verbose) {
4970 std::ostringstream os;
4971 os << *prefix << "Done" << endl;
4972 std::cerr << os.str();
4973 }
4974 return padding;
4975}
4976
4977template <class LocalOrdinal, class GlobalOrdinal, class Node>
4978std::unique_ptr<
4979 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4980CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4981 computePaddingForCrsMatrixUnpack(
4982 const Kokkos::DualView<const local_ordinal_type*,
4983 buffer_device_type>& importLIDs,
4984 Kokkos::DualView<char*, buffer_device_type> imports,
4985 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
4986 const bool verbose) const {
4987 using Details::PackTraits;
4988 using Details::Impl::getRowGraphGlobalRow;
4989 using std::endl;
4990 using LO = local_ordinal_type;
4991 using GO = global_ordinal_type;
4992 const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
4993
4994 std::unique_ptr<std::string> prefix;
4995 if (verbose) {
4996 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4997 std::ostringstream os;
4998 os << *prefix << "importLIDs.extent(0): "
4999 << importLIDs.extent(0)
5000 << ", imports.extent(0): "
5001 << imports.extent(0)
5002 << ", numPacketsPerLID.extent(0): "
5003 << numPacketsPerLID.extent(0) << endl;
5004 std::cerr << os.str();
5005 }
5006 const bool extraVerbose =
5007 verbose && Details::Behavior::verbose("CrsPadding");
5008
5009 const LO numImports = static_cast<LO>(importLIDs.extent(0));
5010 TEUCHOS_ASSERT(LO(numPacketsPerLID.extent(0)) >= numImports);
5011 const int myRank = [&]() {
5012 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
5013 return comm.is_null() ? -1 : comm->getRank();
5014 }();
5015 std::unique_ptr<padding_type> padding(
5016 new padding_type(myRank, numImports));
5017
5018 if (imports.need_sync_host()) {
5019 imports.sync_host();
5020 }
5021 auto imports_h = imports.view_host();
5022 if (numPacketsPerLID.need_sync_host()) {
5023 numPacketsPerLID.sync_host();
5024 }
5025 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5026
5027 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
5028 auto importLIDs_h = importLIDs.view_host();
5029
5030 const map_type& tgtRowMap = *rowMap_;
5031 // Always merge source column indices, since isMerged() is
5032 // per-process state, and we don't know its value on other
5033 // processes that sent us data.
5034 constexpr bool src_is_unique = false;
5035 const bool tgt_is_unique = isMerged();
5036
5037 std::vector<GO> srcGblColIndsScratch;
5038 std::vector<GO> tgtGblColIndsScratch;
5039 size_t offset = 0;
5040 execute_sync_host_uvm_access(); // protect host UVM access
5041 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5042 // CrsGraph packs just global column indices, while CrsMatrix
5043 // packs bytes (first the number of entries in the row, then the
5044 // global column indices, then other stuff like the matrix
5045 // values in that row).
5046 const size_t numBytes = numPacketsPerLID_h[whichImport];
5047 if (extraVerbose) {
5048 std::ostringstream os;
5049 os << *prefix << "whichImport=" << whichImport
5050 << ", numImports=" << numImports
5051 << ", numBytes=" << numBytes << endl;
5052 std::cerr << os.str();
5053 }
5054 if (numBytes == 0) {
5055 continue; // special case: no entries to unpack for this row
5056 }
5057 LO origSrcNumEnt = 0;
5058 const size_t numEntBeg = offset;
5059 const size_t numEntLen =
5060 PackTraits<LO>::packValueCount(origSrcNumEnt);
5061 TEUCHOS_ASSERT(numBytes >= numEntLen);
5062 TEUCHOS_ASSERT(imports_h.extent(0) >= numEntBeg + numEntLen);
5063 PackTraits<LO>::unpackValue(origSrcNumEnt,
5064 imports_h.data() + numEntBeg);
5065 if (extraVerbose) {
5066 std::ostringstream os;
5067 os << *prefix << "whichImport=" << whichImport
5068 << ", numImports=" << numImports
5069 << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5070 std::cerr << os.str();
5071 }
5072 TEUCHOS_ASSERT(origSrcNumEnt >= LO(0));
5073 TEUCHOS_ASSERT(numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)));
5074 const size_t gidsBeg = numEntBeg + numEntLen;
5075 if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5076 srcGblColIndsScratch.resize(origSrcNumEnt);
5077 }
5078 GO* const srcGblColInds = srcGblColIndsScratch.data();
5079 PackTraits<GO>::unpackArray(srcGblColInds,
5080 imports_h.data() + gidsBeg,
5081 origSrcNumEnt);
5082 const LO tgtLclRowInd = importLIDs_h[whichImport];
5083 const GO tgtGblRowInd =
5084 tgtRowMap.getGlobalElement(tgtLclRowInd);
5085 auto tgtGblColInds = getRowGraphGlobalRow(
5086 tgtGblColIndsScratch, *this, tgtGblRowInd);
5087 const size_t origNumTgtEnt(tgtGblColInds.size());
5088
5089 if (extraVerbose) {
5090 std::ostringstream os;
5091 os << *prefix << "whichImport=" << whichImport
5092 << ", numImports=" << numImports
5093 << ": Call padding->update_import" << endl;
5094 std::cerr << os.str();
5095 }
5096 padding->update_import(whichImport, tgtLclRowInd,
5097 tgtGblColInds.getRawPtr(),
5098 origNumTgtEnt, tgt_is_unique,
5099 srcGblColInds,
5100 origSrcNumEnt, src_is_unique);
5101 offset += numBytes;
5102 }
5103
5104 if (verbose) {
5105 std::ostringstream os;
5106 os << *prefix << "Done" << endl;
5107 std::cerr << os.str();
5108 }
5109 return padding;
5110}
5111
5112template <class LocalOrdinal, class GlobalOrdinal, class Node>
5113void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5114 packAndPrepare(const SrcDistObject& source,
5115 const Kokkos::DualView<const local_ordinal_type*,
5116 buffer_device_type>& exportLIDs,
5117 Kokkos::DualView<packet_type*,
5118 buffer_device_type>& exports,
5119 Kokkos::DualView<size_t*,
5120 buffer_device_type>
5121 numPacketsPerLID,
5122 size_t& constantNumPackets) {
5124 using GO = global_ordinal_type;
5125 using std::endl;
5126 using crs_graph_type =
5127 CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5128 const char tfecfFuncName[] = "packAndPrepare: ";
5129 ProfilingRegion region_papn("Tpetra::CrsGraph::packAndPrepare");
5130
5131 const bool verbose = verbose_;
5132 std::unique_ptr<std::string> prefix;
5133 if (verbose) {
5134 prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5135 std::ostringstream os;
5136 os << *prefix << "Start" << endl;
5137 std::cerr << os.str();
5138 }
5139
5140 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(exportLIDs.extent(0) != numPacketsPerLID.extent(0),
5141 std::runtime_error,
5142 "exportLIDs.extent(0) = " << exportLIDs.extent(0)
5143 << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
5144 << ".");
5145 const row_graph_type* srcRowGraphPtr =
5146 dynamic_cast<const row_graph_type*>(&source);
5147 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(srcRowGraphPtr == nullptr, std::invalid_argument,
5148 "Source of an Export "
5149 "or Import operation to a CrsGraph must be a RowGraph with the same "
5150 "template parameters.");
5151 // We don't check whether src_graph has had fillComplete called,
5152 // because it doesn't matter whether the *source* graph has been
5153 // fillComplete'd. The target graph can not be fillComplete'd yet.
5154 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillComplete(), std::runtime_error,
5155 "The target graph of an Import or Export must not be fill complete.");
5156
5157 const crs_graph_type* srcCrsGraphPtr =
5158 dynamic_cast<const crs_graph_type*>(&source);
5159
5160 if (srcCrsGraphPtr == nullptr) {
5161 using Teuchos::ArrayView;
5162 using LO = local_ordinal_type;
5163
5164 if (verbose) {
5165 std::ostringstream os;
5166 os << *prefix << "Source is a RowGraph but not a CrsGraph"
5167 << endl;
5168 std::cerr << os.str();
5169 }
5170 // RowGraph::pack serves the "old" DistObject interface. It
5171 // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5172 // entails deep-copying the exports buffer on output. RowGraph
5173 // is a convenience interface when not a CrsGraph, so we accept
5174 // the performance hit.
5175 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5176 auto exportLIDs_h = exportLIDs.view_host();
5177 ArrayView<const LO> exportLIDs_av(exportLIDs_h.data(),
5178 exportLIDs_h.extent(0));
5179 Teuchos::Array<GO> exports_a;
5180
5181 numPacketsPerLID.clear_sync_state();
5182 numPacketsPerLID.modify_host();
5183 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5184 ArrayView<size_t> numPacketsPerLID_av(numPacketsPerLID_h.data(),
5185 numPacketsPerLID_h.extent(0));
5186 srcRowGraphPtr->pack(exportLIDs_av, exports_a, numPacketsPerLID_av,
5187 constantNumPackets);
5188 const size_t newSize = static_cast<size_t>(exports_a.size());
5189 if (static_cast<size_t>(exports.extent(0)) != newSize) {
5190 using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5191 exports = exports_dv_type("exports", newSize);
5192 }
5193 Kokkos::View<const packet_type*, Kokkos::HostSpace,
5194 Kokkos::MemoryUnmanaged>
5195 exports_a_h(exports_a.getRawPtr(), newSize);
5196 exports.clear_sync_state();
5197 exports.modify_host();
5198 // DEEP_COPY REVIEW - NOT TESTED
5199 Kokkos::deep_copy(exports.view_host(), exports_a_h);
5200 }
5201 // packCrsGraphNew requires k_rowPtrsPacked_ to be set
5202 else if (!getColMap().is_null() &&
5203 (this->getRowPtrsPackedDevice().extent(0) != 0 ||
5204 getRowMap()->getLocalNumElements() == 0)) {
5205 if (verbose) {
5206 std::ostringstream os;
5207 os << *prefix << "packCrsGraphNew path" << endl;
5208 std::cerr << os.str();
5209 }
5210 using export_pids_type =
5211 Kokkos::DualView<const int*, buffer_device_type>;
5212 export_pids_type exportPIDs; // not filling it; needed for syntax
5213 using LO = local_ordinal_type;
5214 using NT = node_type;
5216 packCrsGraphNew<LO, GO, NT>(*srcCrsGraphPtr, exportLIDs, exportPIDs,
5217 exports, numPacketsPerLID,
5218 constantNumPackets, false);
5219 } else {
5220 srcCrsGraphPtr->packFillActiveNew(exportLIDs, exports, numPacketsPerLID,
5221 constantNumPackets);
5222 }
5223
5224 if (verbose) {
5225 std::ostringstream os;
5226 os << *prefix << "Done" << endl;
5227 std::cerr << os.str();
5228 }
5229}
5230
5231template <class LocalOrdinal, class GlobalOrdinal, class Node>
5233 pack(const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5234 Teuchos::Array<GlobalOrdinal>& exports,
5235 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5236 size_t& constantNumPackets) const {
5237 auto col_map = this->getColMap();
5238 // packCrsGraph requires k_rowPtrsPacked to be set
5239 if (!col_map.is_null() && (this->getRowPtrsPackedDevice().extent(0) != 0 || getRowMap()->getLocalNumElements() == 0)) {
5243 } else {
5244 this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5246 }
5247}
5248
5249template <class LocalOrdinal, class GlobalOrdinal, class Node>
5251 packFillActive(const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5252 Teuchos::Array<GlobalOrdinal>& exports,
5253 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5254 size_t& constantNumPackets) const {
5255 using std::endl;
5256 using LO = LocalOrdinal;
5257 using GO = GlobalOrdinal;
5258 using host_execution_space =
5259 typename Kokkos::View<size_t*, device_type>::
5260 host_mirror_type::execution_space;
5261 const char tfecfFuncName[] = "packFillActive: ";
5262 const bool verbose = verbose_;
5263
5264 const auto numExportLIDs = exportLIDs.size();
5265 std::unique_ptr<std::string> prefix;
5266 if (verbose) {
5267 prefix = this->createPrefix("CrsGraph", "allocateIndices");
5268 std::ostringstream os;
5269 os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5270 std::cerr << os.str();
5271 }
5272 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs != numPacketsPerLID.size(), std::runtime_error,
5273 "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5274 " = "
5275 << numPacketsPerLID.size() << ".");
5276
5277 const map_type& rowMap = *(this->getRowMap());
5278 const map_type* const colMapPtr = this->colMap_.getRawPtr();
5279 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() && colMapPtr == nullptr, std::logic_error,
5280 "This graph claims to be locally indexed, but its column Map is nullptr. "
5281 "This should never happen. Please report this bug to the Tpetra "
5282 "developers.");
5283
5284 // We may pack different amounts of data for different rows.
5285 constantNumPackets = 0;
5286
5287 // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5288 // it might be now, but we might as well be safe).
5289 size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr();
5290 const LO* const exportLIDs_raw = exportLIDs.getRawPtr();
5291
5292 // Count the total number of packets (column indices, in the case
5293 // of a CrsGraph) to pack. While doing so, set
5294 // numPacketsPerLID[i] to the number of entries owned by the
5295 // calling process in (local) row exportLIDs[i] of the graph, that
5296 // the caller wants us to send out.
5297 Kokkos::RangePolicy<host_execution_space, LO> inputRange(0, numExportLIDs);
5298 size_t totalNumPackets = 0;
5299 size_t errCount = 0;
5300 // lambdas turn what they capture const, so we can't
5301 // atomic_add(&errCount,1). Instead, we need a View to modify.
5302 typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5303 host_device_type;
5304 Kokkos::View<size_t, host_device_type> errCountView(&errCount);
5305 constexpr size_t ONE = 1;
5306
5307 execute_sync_host_uvm_access(); // protect host UVM access
5308 Kokkos::parallel_reduce(
5309 "Tpetra::CrsGraph::pack: totalNumPackets",
5310 inputRange,
5311 [=, *this](const LO& i, size_t& curTotalNumPackets) {
5312 const GO gblRow = rowMap.getGlobalElement(exportLIDs_raw[i]);
5313 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid()) {
5314 Kokkos::atomic_add(&errCountView(), ONE);
5315 numPacketsPerLID_raw[i] = 0;
5316 } else {
5317 const size_t numEnt = this->getNumEntriesInGlobalRow(gblRow);
5318 numPacketsPerLID_raw[i] = numEnt;
5319 curTotalNumPackets += numEnt;
5320 }
5321 },
5322 totalNumPackets);
5323
5324 if (verbose) {
5325 std::ostringstream os;
5326 os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5327 std::cerr << os.str();
5328 }
5329 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5330 "totalNumPackets count encountered "
5331 "one or more errors! errCount = "
5332 << errCount
5333 << ", totalNumPackets = " << totalNumPackets << ".");
5334 errCount = 0;
5335
5336 // Allocate space for all the column indices to pack.
5337 exports.resize(totalNumPackets);
5338
5339 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->supportsRowViews(), std::logic_error,
5340 "this->supportsRowViews() returns false; this should never happen. "
5341 "Please report this bug to the Tpetra developers.");
5342
5343 // Loop again over the rows to export, and pack rows of indices
5344 // into the output buffer.
5345
5346 if (verbose) {
5347 std::ostringstream os;
5348 os << *prefix << "Pack into exports" << endl;
5349 std::cerr << os.str();
5350 }
5351
5352 // Teuchos::ArrayView may not be thread safe, or may not be
5353 // efficiently thread safe. Better to use the raw pointer.
5354 GO* const exports_raw = exports.getRawPtr();
5355 errCount = 0;
5356 Kokkos::parallel_scan("Tpetra::CrsGraph::pack: pack from views",
5357 inputRange, [=, &prefix, *this](const LO i, size_t& exportsOffset, const bool final) {
5358 const size_t curOffset = exportsOffset;
5359 const GO gblRow = rowMap.getGlobalElement(exportLIDs_raw[i]);
5360 const RowInfo rowInfo =
5361 this->getRowInfoFromGlobalRowIndex(gblRow);
5362
5363 using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5364 if (rowInfo.localRow == TDO::invalid()) {
5365 if (verbose) {
5366 std::ostringstream os;
5367 os << *prefix << ": INVALID rowInfo: i=" << i
5368 << ", lclRow=" << exportLIDs_raw[i] << endl;
5369 std::cerr << os.str();
5370 }
5371 Kokkos::atomic_add(&errCountView(), ONE);
5372 } else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5373 if (verbose) {
5374 std::ostringstream os;
5375 os << *prefix << ": UH OH! For i=" << i << ", lclRow="
5376 << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5377 "(= "
5378 << curOffset << ") + numEnt (= " << rowInfo.numEntries
5379 << ") > totalNumPackets (= " << totalNumPackets << ")."
5380 << endl;
5381 std::cerr << os.str();
5382 }
5383 Kokkos::atomic_add(&errCountView(), ONE);
5384 } else {
5385 const LO numEnt = static_cast<LO>(rowInfo.numEntries);
5386 if (this->isLocallyIndexed()) {
5387 auto lclColInds = getLocalIndsViewHost(rowInfo);
5388 if (final) {
5389 for (LO k = 0; k < numEnt; ++k) {
5390 const LO lclColInd = lclColInds(k);
5391 const GO gblColInd = colMapPtr->getGlobalElement(lclColInd);
5392 // Pack it, even if it's wrong. Let the receiving
5393 // process deal with it. Otherwise, we'll miss out
5394 // on any correct data.
5395 exports_raw[curOffset + k] = gblColInd;
5396 } // for each entry in the row
5397 } // final pass?
5398 exportsOffset = curOffset + numEnt;
5399 } else if (this->isGloballyIndexed()) {
5400 auto gblColInds = getGlobalIndsViewHost(rowInfo);
5401 if (final) {
5402 for (LO k = 0; k < numEnt; ++k) {
5403 const GO gblColInd = gblColInds(k);
5404 // Pack it, even if it's wrong. Let the receiving
5405 // process deal with it. Otherwise, we'll miss out
5406 // on any correct data.
5407 exports_raw[curOffset + k] = gblColInd;
5408 } // for each entry in the row
5409 } // final pass?
5410 exportsOffset = curOffset + numEnt;
5411 }
5412 // If neither globally nor locally indexed, then the graph
5413 // has no entries in this row (or indeed, in any row on this
5414 // process) to pack.
5415 }
5416 });
5417
5418 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5419 "Packing encountered "
5420 "one or more errors! errCount = "
5421 << errCount
5422 << ", totalNumPackets = " << totalNumPackets << ".");
5423
5424 if (verbose) {
5425 std::ostringstream os;
5426 os << *prefix << "Done" << endl;
5427 std::cerr << os.str();
5428 }
5429}
5430
5431template <class LocalOrdinal, class GlobalOrdinal, class Node>
5432void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5433 packFillActiveNew(const Kokkos::DualView<const local_ordinal_type*,
5434 buffer_device_type>& exportLIDs,
5435 Kokkos::DualView<packet_type*,
5436 buffer_device_type>& exports,
5437 Kokkos::DualView<size_t*,
5438 buffer_device_type>
5439 numPacketsPerLID,
5440 size_t& constantNumPackets) const {
5441 using std::endl;
5442 using LO = local_ordinal_type;
5443 using GO = global_ordinal_type;
5444 using host_execution_space = typename Kokkos::View<size_t*,
5445 device_type>::host_mirror_type::execution_space;
5446 using host_device_type =
5447 Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5448 using exports_dv_type =
5449 Kokkos::DualView<packet_type*, buffer_device_type>;
5450 const char tfecfFuncName[] = "packFillActiveNew: ";
5451 const bool verbose = verbose_;
5452
5453 const auto numExportLIDs = exportLIDs.extent(0);
5454 std::unique_ptr<std::string> prefix;
5455 if (verbose) {
5456 prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
5457 std::ostringstream os;
5458 os << *prefix << "numExportLIDs: " << numExportLIDs
5459 << ", numPacketsPerLID.extent(0): "
5460 << numPacketsPerLID.extent(0) << endl;
5461 std::cerr << os.str();
5462 }
5463 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs != numPacketsPerLID.extent(0), std::runtime_error,
5464 "exportLIDs.extent(0) = " << numExportLIDs
5465 << " != numPacketsPerLID.extent(0) = "
5466 << numPacketsPerLID.extent(0) << ".");
5467 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5468 auto exportLIDs_h = exportLIDs.view_host();
5469
5470 const map_type& rowMap = *(this->getRowMap());
5471 const map_type* const colMapPtr = this->colMap_.getRawPtr();
5472 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() && colMapPtr == nullptr, std::logic_error,
5473 "This graph claims to be locally indexed, but its column Map is nullptr. "
5474 "This should never happen. Please report this bug to the Tpetra "
5475 "developers.");
5476
5477 // We may pack different amounts of data for different rows.
5478 constantNumPackets = 0;
5479
5480 numPacketsPerLID.clear_sync_state();
5481 numPacketsPerLID.modify_host();
5482 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5483
5484 // Count the total number of packets (column indices, in the case
5485 // of a CrsGraph) to pack. While doing so, set
5486 // numPacketsPerLID[i] to the number of entries owned by the
5487 // calling process in (local) row exportLIDs[i] of the graph, that
5488 // the caller wants us to send out.
5489 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5490 range_type inputRange(0, numExportLIDs);
5491 size_t totalNumPackets = 0;
5492 size_t errCount = 0;
5493 // lambdas turn what they capture const, so we can't
5494 // atomic_add(&errCount,1). Instead, we need a View to modify.
5495 Kokkos::View<size_t, host_device_type> errCountView(&errCount);
5496 constexpr size_t ONE = 1;
5497
5498 if (verbose) {
5499 std::ostringstream os;
5500 os << *prefix << "Compute totalNumPackets" << endl;
5501 std::cerr << os.str();
5502 }
5503
5504 execute_sync_host_uvm_access(); // protect host UVM access
5505 totalNumPackets = 0;
5506 for (size_t i = 0; i < numExportLIDs; ++i) {
5507 const LO lclRow = exportLIDs_h[i];
5508 const GO gblRow = rowMap.getGlobalElement(lclRow);
5509 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid()) {
5510 if (verbose) {
5511 std::ostringstream os;
5512 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5513 << " not in row Map on this process" << endl;
5514 std::cerr << os.str();
5515 }
5516 Kokkos::atomic_add(&errCountView(), ONE);
5517 numPacketsPerLID_h(i) = 0;
5518 } else {
5519 const size_t numEnt = this->getNumEntriesInGlobalRow(gblRow);
5520 numPacketsPerLID_h(i) = numEnt;
5521 totalNumPackets += numEnt;
5522 }
5523 }
5524
5525 if (verbose) {
5526 std::ostringstream os;
5527 os << *prefix << "totalNumPackets: " << totalNumPackets
5528 << ", errCount: " << errCount << endl;
5529 std::cerr << os.str();
5530 }
5531 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5532 "totalNumPackets count encountered "
5533 "one or more errors! totalNumPackets: "
5534 << totalNumPackets
5535 << ", errCount: " << errCount << ".");
5536
5537 // Allocate space for all the column indices to pack.
5538 if (size_t(exports.extent(0)) < totalNumPackets) {
5539 // FIXME (mfh 09 Apr 2019) Create without initializing.
5540 exports = exports_dv_type("exports", totalNumPackets);
5541 }
5542
5543 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->supportsRowViews(), std::logic_error,
5544 "this->supportsRowViews() returns false; this should never happen. "
5545 "Please report this bug to the Tpetra developers.");
5546
5547 // Loop again over the rows to export, and pack rows of indices
5548 // into the output buffer.
5549
5550 if (verbose) {
5551 std::ostringstream os;
5552 os << *prefix << "Pack into exports buffer" << endl;
5553 std::cerr << os.str();
5554 }
5555
5556 exports.clear_sync_state();
5557 exports.modify_host();
5558 auto exports_h = exports.view_host();
5559
5560 errCount = 0;
5561
5562 // The following parallel_scan needs const host access to lclIndsUnpacked_wdv
5563 // (if locally indexed) or gblInds_wdv (if globally indexed).
5564 if (isLocallyIndexed())
5565 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5566 else if (isGloballyIndexed())
5567 gblInds_wdv.getHostView(Access::ReadOnly);
5568
5570 Kokkos::parallel_scan("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5571 inputRange, [=, &prefix, *this](const LO i, size_t& exportsOffset, const bool final) {
5572 const size_t curOffset = exportsOffset;
5573 const LO lclRow = exportLIDs_h(i);
5574 const GO gblRow = rowMap.getGlobalElement(lclRow);
5575 if (gblRow == Details::OrdinalTraits<GO>::invalid()) {
5576 if (verbose) {
5577 std::ostringstream os;
5578 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5579 << " not in row Map on this process" << endl;
5580 std::cerr << os.str();
5581 }
5582 Kokkos::atomic_add(&errCountView(), ONE);
5583 return;
5584 }
5585
5586 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(gblRow);
5587 if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid()) {
5588 if (verbose) {
5589 std::ostringstream os;
5590 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5591 << ", gblRow=" << gblRow << ": invalid rowInfo"
5592 << endl;
5593 std::cerr << os.str();
5594 }
5595 Kokkos::atomic_add(&errCountView(), ONE);
5596 return;
5597 }
5598
5599 if (curOffset + rowInfo.numEntries > totalNumPackets) {
5600 if (verbose) {
5601 std::ostringstream os;
5602 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5603 << ", gblRow=" << gblRow << ", curOffset (= "
5604 << curOffset << ") + numEnt (= " << rowInfo.numEntries
5605 << ") > totalNumPackets (= " << totalNumPackets
5606 << ")." << endl;
5607 std::cerr << os.str();
5608 }
5609 Kokkos::atomic_add(&errCountView(), ONE);
5610 return;
5611 }
5612
5613 const LO numEnt = static_cast<LO>(rowInfo.numEntries);
5614 if (this->isLocallyIndexed()) {
5615 auto lclColInds = getLocalIndsViewHost(rowInfo);
5616 if (final) {
5617 for (LO k = 0; k < numEnt; ++k) {
5618 const LO lclColInd = lclColInds(k);
5619 const GO gblColInd = colMapPtr->getGlobalElement(lclColInd);
5620 // Pack it, even if it's wrong. Let the receiving
5621 // process deal with it. Otherwise, we'll miss out
5622 // on any correct data.
5623 exports_h(curOffset + k) = gblColInd;
5624 } // for each entry in the row
5625 } // final pass?
5626 exportsOffset = curOffset + numEnt;
5627 } else if (this->isGloballyIndexed()) {
5628 auto gblColInds = getGlobalIndsViewHost(rowInfo);
5629 if (final) {
5630 for (LO k = 0; k < numEnt; ++k) {
5631 const GO gblColInd = gblColInds(k);
5632 // Pack it, even if it's wrong. Let the receiving
5633 // process deal with it. Otherwise, we'll miss out
5634 // on any correct data.
5635 exports_h(curOffset + k) = gblColInd;
5636 } // for each entry in the row
5637 } // final pass?
5638 exportsOffset = curOffset + numEnt;
5639 }
5640 // If neither globally nor locally indexed, then the graph
5641 // has no entries in this row (or indeed, in any row on this
5642 // process) to pack.
5643 });
5645
5646 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5647 // (errCount != 0, std::logic_error, "Packing encountered "
5648 // "one or more errors! errCount = " << errCount
5649 // << ", totalNumPackets = " << totalNumPackets << ".");
5650
5651 if (verbose) {
5652 std::ostringstream os;
5653 os << *prefix << "errCount=" << errCount << "; Done" << endl;
5654 std::cerr << os.str();
5655 }
5656}
5657
5658template <class LocalOrdinal, class GlobalOrdinal, class Node>
5660 unpackAndCombine(const Kokkos::DualView<const local_ordinal_type*,
5662 Kokkos::DualView<packet_type*,
5664 imports,
5665 Kokkos::DualView<size_t*,
5668 const size_t /* constantNumPackets */,
5669 const CombineMode /* combineMode */) {
5671 using std::endl;
5672 using LO = local_ordinal_type;
5673 using GO = global_ordinal_type;
5674 const char tfecfFuncName[] = "unpackAndCombine";
5675
5676 ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
5677 const bool verbose = verbose_;
5678
5679 std::unique_ptr<std::string> prefix;
5680 if (verbose) {
5681 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5682 std::ostringstream os;
5683 os << *prefix << "Start" << endl;
5684 std::cerr << os.str();
5685 }
5686 {
5687 auto padding = computeCrsPaddingForImports(
5688 importLIDs, imports, numPacketsPerLID, verbose);
5689 applyCrsPadding(*padding, verbose);
5690 if (verbose) {
5691 std::ostringstream os;
5692 os << *prefix << "Done computing & applying padding" << endl;
5693 std::cerr << os.str();
5694 }
5695 }
5696
5697 // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
5698 // reasonable meaning, whether or not the matrix is fill complete.
5699 // It's just more work to implement.
5700
5701 // We are not checking the value of the CombineMode input
5702 // argument. For CrsGraph, we only support import/export
5703 // operations if fillComplete has not yet been called. Any
5704 // incoming column-indices are inserted into the target graph. In
5705 // this context, CombineMode values of ADD vs INSERT are
5706 // equivalent. What is the meaning of REPLACE for CrsGraph? If a
5707 // duplicate column-index is inserted, it will be compressed out
5708 // when fillComplete is called.
5709 //
5710 // Note: I think REPLACE means that an existing row is replaced by
5711 // the imported row, i.e., the existing indices are cleared. CGB,
5712 // 6/17/2010
5713
5715 std::runtime_error, ": importLIDs.extent(0) = " << importLIDs.extent(0) << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0) << ".");
5716 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillComplete(), std::runtime_error,
5717 ": Import or Export operations are not allowed on a target "
5718 "CrsGraph that is fillComplete.");
5719
5720 const size_t numImportLIDs(importLIDs.extent(0));
5721 if (numPacketsPerLID.need_sync_host()) {
5722 numPacketsPerLID.sync_host();
5723 }
5724 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5725 if (imports.need_sync_host()) {
5726 imports.sync_host();
5727 }
5728 auto imports_h = imports.view_host();
5729 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
5730 auto importLIDs_h = importLIDs.view_host();
5731
5732 // If we're inserting in local indices, let's pre-allocate
5733 Teuchos::Array<LO> lclColInds;
5734 if (isLocallyIndexed()) {
5735 if (verbose) {
5736 std::ostringstream os;
5737 os << *prefix << "Preallocate local indices scratch" << endl;
5738 std::cerr << os.str();
5739 }
5740 size_t maxNumInserts = 0;
5741 for (size_t i = 0; i < numImportLIDs; ++i) {
5743 }
5744 if (verbose) {
5745 std::ostringstream os;
5746 os << *prefix << "Local indices scratch size: "
5747 << maxNumInserts << endl;
5748 std::cerr << os.str();
5749 }
5750 lclColInds.resize(maxNumInserts);
5751 } else {
5752 if (verbose) {
5753 std::ostringstream os;
5754 os << *prefix;
5755 if (isGloballyIndexed()) {
5756 os << "Graph is globally indexed";
5757 } else {
5758 os << "Graph is neither locally nor globally indexed";
5759 }
5760 os << endl;
5761 std::cerr << os.str();
5762 }
5763 }
5764
5765 TEUCHOS_ASSERT(!rowMap_.is_null());
5766 const map_type& rowMap = *rowMap_;
5767
5768 try {
5769 size_t importsOffset = 0;
5770 for (size_t i = 0; i < numImportLIDs; ++i) {
5771 if (verbose) {
5772 std::ostringstream os;
5773 os << *prefix << "i=" << i << ", numImportLIDs="
5774 << numImportLIDs << endl;
5775 std::cerr << os.str();
5776 }
5777 // We can only unpack into owned rows, since we only have
5778 // local row indices.
5779 const LO lclRow = importLIDs_h[i];
5780 const GO gblRow = rowMap.getGlobalElement(lclRow);
5781 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
5782 std::logic_error, "importLIDs[i=" << i << "]=" << lclRow << " is not in the row Map on the calling "
5783 "process.");
5784 const LO numEnt = numPacketsPerLID_h[i];
5785 const GO* const gblColInds = (numEnt == 0) ? nullptr : imports_h.data() + importsOffset;
5786 if (!isLocallyIndexed()) {
5787 insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
5788 } else {
5789 // FIXME (mfh 09 Feb 2020) Now would be a good time to do
5790 // column Map filtering.
5791 for (LO j = 0; j < numEnt; j++) {
5792 lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
5793 }
5794 insertLocalIndices(lclRow, numEnt, lclColInds.data());
5795 }
5797 }
5798 } catch (std::exception& e) {
5799 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
5800 "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
5801 "exception: "
5802 << endl
5803 << e.what());
5804 }
5805
5806 if (verbose) {
5807 std::ostringstream os;
5808 os << *prefix << "Done" << endl;
5809 std::cerr << os.str();
5810 }
5811}
5812
5813template <class LocalOrdinal, class GlobalOrdinal, class Node>
5815 removeEmptyProcessesInPlace(const Teuchos::RCP<const map_type>& newMap) {
5816 using Teuchos::Comm;
5817 using Teuchos::null;
5818 using Teuchos::ParameterList;
5819 using Teuchos::RCP;
5820
5821 // We'll set all the state "transactionally," so that this method
5822 // satisfies the strong exception guarantee. This object's state
5823 // won't be modified until the end of this method.
5827
5828 rowMap = newMap;
5830 (newMap.is_null()) ? null : newMap->getComm();
5831
5832 if (!domainMap_.is_null()) {
5833 if (domainMap_.getRawPtr() == rowMap_.getRawPtr()) {
5834 // Common case: original domain and row Maps are identical.
5835 // In that case, we need only replace the original domain Map
5836 // with the new Map. This ensures that the new domain and row
5837 // Maps _stay_ identical.
5838 domainMap = newMap;
5839 } else {
5840 domainMap = domainMap_->replaceCommWithSubset(newComm);
5841 }
5842 }
5843 if (!rangeMap_.is_null()) {
5844 if (rangeMap_.getRawPtr() == rowMap_.getRawPtr()) {
5845 // Common case: original range and row Maps are identical. In
5846 // that case, we need only replace the original range Map with
5847 // the new Map. This ensures that the new range and row Maps
5848 // _stay_ identical.
5849 rangeMap = newMap;
5850 } else {
5851 rangeMap = rangeMap_->replaceCommWithSubset(newComm);
5852 }
5853 }
5854 if (!colMap_.is_null()) {
5855 colMap = colMap_->replaceCommWithSubset(newComm);
5856 }
5857
5858 // (Re)create the Export and / or Import if necessary.
5859 if (!newComm.is_null()) {
5860 RCP<ParameterList> params = this->getNonconstParameterList(); // could be null
5861 //
5862 // The operations below are collective on the new communicator.
5863 //
5864 // (Re)create the Export object if necessary. If I haven't
5865 // called fillComplete yet, I don't have a rangeMap, so I must
5866 // first check if the _original_ rangeMap is not null. Ditto
5867 // for the Import object and the domain Map.
5868 if (!rangeMap_.is_null() &&
5869 rangeMap != rowMap &&
5870 !rangeMap->isSameAs(*rowMap)) {
5871 if (params.is_null() || !params->isSublist("Export")) {
5873 } else {
5876 }
5877 }
5878 // (Re)create the Import object if necessary.
5879 if (!domainMap_.is_null() &&
5880 domainMap != colMap &&
5881 !domainMap->isSameAs(*colMap)) {
5882 if (params.is_null() || !params->isSublist("Import")) {
5884 } else {
5887 }
5888 }
5889 } // if newComm is not null
5890
5891 // Defer side effects until the end. If no destructors throw
5892 // exceptions (they shouldn't anyway), then this method satisfies
5893 // the strong exception guarantee.
5894 exporter_ = exporter;
5895 importer_ = importer;
5896 rowMap_ = rowMap;
5897 // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
5898 // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
5899 // the same object. We might want to get rid of this redundant
5900 // pointer sometime, but for now, we'll leave it alone and just
5901 // set map_ to the same object.
5902 this->map_ = rowMap;
5903 domainMap_ = domainMap;
5904 rangeMap_ = rangeMap;
5905 colMap_ = colMap;
5906}
5907
5908template <class LocalOrdinal, class GlobalOrdinal, class Node>
5910 getLocalDiagOffsets(const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const {
5911 using std::endl;
5912 using LO = LocalOrdinal;
5913 using GO = GlobalOrdinal;
5914 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
5915 const bool verbose = verbose_;
5916
5917 std::unique_ptr<std::string> prefix;
5918 if (verbose) {
5919 prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
5920 std::ostringstream os;
5921 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
5922 << endl;
5923 std::cerr << os.str();
5924 }
5925
5926 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!hasColMap(), std::runtime_error, "The graph must have a column Map.");
5927 const LO lclNumRows = static_cast<LO>(this->getLocalNumRows());
5928 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<LO>(offsets.extent(0)) < lclNumRows,
5929 std::invalid_argument, "offsets.extent(0) = " << offsets.extent(0) << " < getLocalNumRows() = " << lclNumRows << ".");
5930
5931 const map_type& rowMap = *(this->getRowMap());
5932 const map_type& colMap = *(this->getColMap());
5933
5934 // We only use these in debug mode, but since debug mode is a
5935 // run-time option, they need to exist here. That's why we create
5936 // the vector with explicit size zero, to avoid overhead if debug
5937 // mode is off.
5938 bool allRowMapDiagEntriesInColMap = true;
5939 bool allDiagEntriesFound = true;
5940 bool allOffsetsCorrect = true;
5941 bool noOtherWeirdness = true;
5942 using wrong_offsets_type = std::vector<std::pair<LO, size_t>>;
5944
5945 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
5946 // the subset of Map functionality that we need below.
5947 auto lclRowMap = rowMap.getLocalMap();
5948 auto lclColMap = colMap.getLocalMap();
5949
5950 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
5951 // setup, at least on the host. For CUDA, we have to use LocalMap
5952 // (that comes from each of the two Maps).
5953
5954 const bool sorted = this->isSorted();
5955 if (isFillComplete()) {
5956 auto lclGraph = this->getLocalGraphDevice();
5957 ::Tpetra::Details::getGraphDiagOffsets(offsets, lclRowMap, lclColMap,
5958 lclGraph.row_map,
5959 lclGraph.entries, sorted);
5960 } else {
5961 // NOTE (mfh 22 Feb 2017): We have to run this code on host,
5962 // since the graph is not fill complete. The previous version
5963 // of this code assumed UVM; this version does not.
5964 auto offsets_h = Kokkos::create_mirror_view(offsets);
5965
5966 for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
5967 // Find the diagonal entry. Since the row Map and column Map
5968 // may differ, we have to compare global row and column
5969 // indices, not local.
5970 const GO gblRowInd = lclRowMap.getGlobalElement(lclRowInd);
5971 const GO gblColInd = gblRowInd;
5972 const LO lclColInd = lclColMap.getLocalElement(gblColInd);
5973
5974 if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid()) {
5976 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid();
5977 } else {
5978 const RowInfo rowInfo = this->getRowInfo(lclRowInd);
5979 if (static_cast<LO>(rowInfo.localRow) == lclRowInd &&
5980 rowInfo.numEntries > 0) {
5981 auto colInds = this->getLocalIndsViewHost(rowInfo);
5982 const size_t hint = 0; // not needed for this algorithm
5983 const size_t offset =
5984 KokkosSparse::findRelOffset(colInds, rowInfo.numEntries,
5987
5988 if (debug_) {
5989 // Now that we have what we think is an offset, make sure
5990 // that it really does point to the diagonal entry. Offsets
5991 // are _relative_ to each row, not absolute (for the whole
5992 // (local) graph).
5993 typename local_inds_dualv_type::t_host::const_type lclColInds;
5994 try {
5995 lclColInds = this->getLocalIndsViewHost(rowInfo);
5996 } catch (...) {
5997 noOtherWeirdness = false;
5998 }
5999 // Don't continue with error checking if the above failed.
6000 if (noOtherWeirdness) {
6001 const size_t numEnt = lclColInds.extent(0);
6002 if (offset >= numEnt) {
6003 // Offsets are relative to each row, so this means that
6004 // the offset is out of bounds.
6005 allOffsetsCorrect = false;
6006 wrongOffsets.push_back(std::make_pair(lclRowInd, offset));
6007 } else {
6008 const LO actualLclColInd = lclColInds(offset);
6009 const GO actualGblColInd = lclColMap.getGlobalElement(actualLclColInd);
6010 if (actualGblColInd != gblColInd) {
6011 allOffsetsCorrect = false;
6012 wrongOffsets.push_back(std::make_pair(lclRowInd, offset));
6013 }
6014 }
6015 }
6016 } // debug_
6017 } else { // either row is empty, or something went wrong w/ getRowInfo()
6018 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid();
6019 allDiagEntriesFound = false;
6020 }
6021 } // whether lclColInd is a valid local column index
6022 } // for each local row
6023 // DEEP_COPY REVIEW - NOT TESTED
6024 Kokkos::deep_copy(offsets, offsets_h);
6025 } // whether the graph is fill complete
6026
6027 if (verbose && wrongOffsets.size() != 0) {
6028 std::ostringstream os;
6029 os << *prefix << "Wrong offsets: [";
6030 for (size_t k = 0; k < wrongOffsets.size(); ++k) {
6031 os << "(" << wrongOffsets[k].first << ","
6032 << wrongOffsets[k].second << ")";
6033 if (k + 1 < wrongOffsets.size()) {
6034 os << ", ";
6035 }
6036 }
6037 os << "]" << endl;
6038 std::cerr << os.str();
6039 }
6040
6041 if (debug_) {
6042 using std::endl;
6043 using Teuchos::reduceAll;
6044 Teuchos::RCP<const Teuchos::Comm<int>> comm = this->getComm();
6045 const bool localSuccess =
6047 const int numResults = 5;
6048 int lclResults[5];
6050 lclResults[1] = allDiagEntriesFound ? 1 : 0;
6051 lclResults[2] = allOffsetsCorrect ? 1 : 0;
6052 lclResults[3] = noOtherWeirdness ? 1 : 0;
6053 // min-all-reduce will compute least rank of all the processes
6054 // that didn't succeed.
6055 lclResults[4] = !localSuccess ? comm->getRank() : comm->getSize();
6056
6057 int gblResults[5];
6058 gblResults[0] = 0;
6059 gblResults[1] = 0;
6060 gblResults[2] = 0;
6061 gblResults[3] = 0;
6062 gblResults[4] = 0;
6063 reduceAll<int, int>(*comm, Teuchos::REDUCE_MIN,
6065
6066 if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1 || gblResults[3] != 1) {
6067 std::ostringstream os; // build error message
6068 os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6069 "possibly among others): "
6070 << endl;
6071 if (gblResults[0] == 0) {
6072 os << " - The column Map does not contain at least one diagonal entry "
6073 "of the graph."
6074 << endl;
6075 }
6076 if (gblResults[1] == 0) {
6077 os << " - On one or more processes, some row does not contain a "
6078 "diagonal entry."
6079 << endl;
6080 }
6081 if (gblResults[2] == 0) {
6082 os << " - On one or more processes, some offsets are incorrect."
6083 << endl;
6084 }
6085 if (gblResults[3] == 0) {
6086 os << " - One or more processes had some other error."
6087 << endl;
6088 }
6089 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6090 }
6091 } // debug_
6092}
6093
6094template <class LocalOrdinal, class GlobalOrdinal, class Node>
6096 getLocalOffRankOffsets(offset_device_view_type& offsets) const {
6097 using std::endl;
6098 const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
6099 const bool verbose = verbose_;
6100
6101 std::unique_ptr<std::string> prefix;
6102 if (verbose) {
6103 prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
6104 std::ostringstream os;
6105 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6106 << endl;
6107 std::cerr << os.str();
6108 }
6109
6110 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!hasColMap(), std::runtime_error, "The graph must have a column Map.");
6111 // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.
6112
6113 const size_t lclNumRows = this->getLocalNumRows();
6114
6115 if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows + 1) {
6116 offsets = k_offRankOffsets_;
6117 return;
6118 }
6119 haveLocalOffRankOffsets_ = false;
6120
6121 const map_type& colMap = *(this->getColMap());
6122 const map_type& domMap = *(this->getDomainMap());
6123
6124 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6125 // the subset of Map functionality that we need below.
6126 auto lclColMap = colMap.getLocalMap();
6127 auto lclDomMap = domMap.getLocalMap();
6128
6129 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6130 // setup, at least on the host. For CUDA, we have to use LocalMap
6131 // (that comes from each of the two Maps).
6132
6133 TEUCHOS_ASSERT(this->isSorted());
6134 if (isFillComplete()) {
6135 k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows + 1);
6136 auto lclGraph = this->getLocalGraphDevice();
6137 ::Tpetra::Details::getGraphOffRankOffsets(k_offRankOffsets_,
6139 lclGraph);
6140 offsets = k_offRankOffsets_;
6141 haveLocalOffRankOffsets_ = true;
6142 } else {
6143 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, "Can't get off-rank offsets for non-fill-complete graph");
6144 }
6145}
6146
6147namespace { // (anonymous)
6148
6149// mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6150// below). The point is to avoid the deep copy between the input
6151// Teuchos::ArrayRCP and the internally used Kokkos::View. We
6152// can't use UVM to avoid the deep copy with CUDA, because the
6153// ArrayRCP is a host pointer, while the input to the graph's
6154// getLocalDiagOffsets method is a device pointer. Assigning a
6155// host pointer to a device pointer is incorrect unless the host
6156// pointer points to host pinned memory. The goal is to get rid
6157// of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6158// copy for backwards compatibility.
6159//
6160// We have to use template magic because
6161// "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6162// if device_type::memory_space is not Kokkos::HostSpace (as is
6163// the case with CUDA).
6164
6165template <class DeviceType,
6166 const bool memSpaceIsHostSpace =
6167 std::is_same<typename DeviceType::memory_space,
6168 Kokkos::HostSpace>::value>
6169struct HelpGetLocalDiagOffsets {};
6170
6171template <class DeviceType>
6172struct HelpGetLocalDiagOffsets<DeviceType, true> {
6173 typedef DeviceType device_type;
6174 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6175 Kokkos::MemoryUnmanaged>
6176 device_offsets_type;
6177 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6178 Kokkos::MemoryUnmanaged>
6179 host_offsets_type;
6180
6181 static device_offsets_type
6182 getDeviceOffsets(const host_offsets_type& hostOffsets) {
6183 // Host and device are the same; no need to allocate a
6184 // temporary device View.
6185 return hostOffsets;
6186 }
6187
6188 static void
6189 copyBackIfNeeded(const host_offsets_type& /* hostOffsets */,
6190 const device_offsets_type& /* deviceOffsets */) { /* copy back not needed; host and device are the same */
6191 }
6192};
6193
6194template <class DeviceType>
6195struct HelpGetLocalDiagOffsets<DeviceType, false> {
6196 typedef DeviceType device_type;
6197 // We have to do a deep copy, since host memory space != device
6198 // memory space. Thus, the device View is managed (we need to
6199 // allocate a temporary device View).
6200 typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6201 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6202 Kokkos::MemoryUnmanaged>
6203 host_offsets_type;
6204
6205 static device_offsets_type
6206 getDeviceOffsets(const host_offsets_type& hostOffsets) {
6207 // Host memory space != device memory space, so we must
6208 // allocate a temporary device View for the graph.
6209 return device_offsets_type("offsets", hostOffsets.extent(0));
6210 }
6211
6212 static void
6213 copyBackIfNeeded(const host_offsets_type& hostOffsets,
6214 const device_offsets_type& deviceOffsets) {
6215 // DEEP_COPY REVIEW - NOT TESTED
6216 Kokkos::deep_copy(hostOffsets, deviceOffsets);
6217 }
6218};
6219} // namespace
6220
6221template <class LocalOrdinal, class GlobalOrdinal, class Node>
6223 getLocalDiagOffsets(Teuchos::ArrayRCP<size_t>& offsets) const {
6224 typedef LocalOrdinal LO;
6225 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6226 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->hasColMap(), std::runtime_error,
6227 "The graph does not yet have a column Map.");
6228 const LO myNumRows = static_cast<LO>(this->getLocalNumRows());
6229 if (static_cast<LO>(offsets.size()) != myNumRows) {
6230 // NOTE (mfh 21 Jan 2016) This means that the method does not
6231 // satisfy the strong exception guarantee (no side effects
6232 // unless successful).
6233 offsets.resize(myNumRows);
6234 }
6235
6236 // mfh 21 Jan 2016: This method unfortunately takes a
6237 // Teuchos::ArrayRCP, which is host memory. The graph wants a
6238 // device pointer. We can't access host memory from the device;
6239 // that's the wrong direction for UVM. (It's the right direction
6240 // for inefficient host pinned memory, but we don't want to use
6241 // that here.) Thus, if device memory space != host memory space,
6242 // we allocate and use a temporary device View to get the offsets.
6243 // If the two spaces are equal, the template magic makes the deep
6244 // copy go away.
6246 typedef typename helper_type::host_offsets_type host_offsets_type;
6247 // Unmanaged host View that views the output array.
6248 host_offsets_type hostOffsets(offsets.getRawPtr(), myNumRows);
6249 // Allocate temp device View if host != device, else reuse host array.
6250 auto deviceOffsets = helper_type::getDeviceOffsets(hostOffsets);
6251 // NOT recursion; this calls the overload that takes a device View.
6252 this->getLocalDiagOffsets(deviceOffsets);
6253 helper_type::copyBackIfNeeded(hostOffsets, deviceOffsets);
6254}
6255
6256template <class LocalOrdinal, class GlobalOrdinal, class Node>
6258 supportsRowViews() const {
6259 return true;
6260}
6261
6262template <class LocalOrdinal, class GlobalOrdinal, class Node>
6265 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6266 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>>& domainTransfer,
6267 const Teuchos::RCP<const map_type>& domainMap,
6268 const Teuchos::RCP<const map_type>& rangeMap,
6269 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6270 using Teuchos::ArrayRCP;
6271 using Teuchos::ArrayView;
6272 using Teuchos::Comm;
6273 using Teuchos::ParameterList;
6274 using Teuchos::rcp;
6275 using Teuchos::RCP;
6280#ifdef HAVE_TPETRA_MMM_TIMINGS
6281 using std::string;
6282 using Teuchos::TimeMonitor;
6283#endif
6284
6285 using LO = LocalOrdinal;
6286 using GO = GlobalOrdinal;
6287 using NT = node_type;
6290
6291 const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6292
6293#ifdef HAVE_TPETRA_MMM_TIMINGS
6294 string label;
6295 if (!params.is_null()) label = params->get("Timer Label", label);
6296 string prefix2 = string("Tpetra ") + label + std::string(": CrsGraph TAFC ");
6298 rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("Pack-1"))));
6299#endif
6300
6301 // Make sure that the input argument rowTransfer is either an
6302 // Import or an Export. Import and Export are the only two
6303 // subclasses of Transfer that we defined, but users might
6304 // (unwisely, for now at least) decide to implement their own
6305 // subclasses. Exclude this possibility.
6306 const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
6307 const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
6309 xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
6310 prefix << "The 'rowTransfer' input argument must be either an Import or "
6311 "an Export, and its template parameters must match the corresponding "
6312 "template parameters of the CrsGraph.");
6313
6314 // Make sure that the input argument domainTransfer is either an
6315 // Import or an Export. Import and Export are the only two
6316 // subclasses of Transfer that we defined, but users might
6317 // (unwisely, for now at least) decide to implement their own
6318 // subclasses. Exclude this possibility.
6319 Teuchos::RCP<const import_type> xferDomainAsImport =
6320 Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
6321 Teuchos::RCP<const export_type> xferDomainAsExport =
6322 Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
6323
6324 if (!domainTransfer.is_null()) {
6326 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6327 prefix << "The 'domainTransfer' input argument must be either an "
6328 "Import or an Export, and its template parameters must match the "
6329 "corresponding template parameters of the CrsGraph.");
6330
6332 (xferAsImport != nullptr || !xferDomainAsImport.is_null()) &&
6333 ((xferAsImport != nullptr && xferDomainAsImport.is_null()) ||
6334 (xferAsImport == nullptr && !xferDomainAsImport.is_null())),
6335 std::invalid_argument,
6336 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6337 "must be of the same type (either Import or Export).");
6338
6340 (xferAsExport != nullptr || !xferDomainAsExport.is_null()) &&
6341 ((xferAsExport != nullptr && xferDomainAsExport.is_null()) ||
6342 (xferAsExport == nullptr && !xferDomainAsExport.is_null())),
6343 std::invalid_argument,
6344 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6345 "must be of the same type (either Import or Export).");
6346
6347 } // domainTransfer != null
6348
6349 // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6350 // if the source Map is not distributed but the target Map is?
6351 const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6352
6353 //
6354 // Get the caller's parameters
6355 //
6356
6357 bool reverseMode = false; // Are we in reverse mode?
6358 bool restrictComm = false; // Do we need to restrict the communicator?
6359 RCP<ParameterList> graphparams; // parameters for the destination graph
6360 if (!params.is_null()) {
6361 reverseMode = params->get("Reverse Mode", reverseMode);
6362 restrictComm = params->get("Restrict Communicator", restrictComm);
6363 graphparams = sublist(params, "CrsGraph");
6364 }
6365
6366 // Get the new domain and range Maps. We need some of them for error
6367 // checking, now that we have the reverseMode parameter.
6368 RCP<const map_type> MyRowMap = reverseMode ? rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6369 RCP<const map_type> MyColMap; // create this below
6370 RCP<const map_type> MyDomainMap = !domainMap.is_null() ? domainMap : getDomainMap();
6371 RCP<const map_type> MyRangeMap = !rangeMap.is_null() ? rangeMap : getRangeMap();
6372 RCP<const map_type> BaseRowMap = MyRowMap;
6373 RCP<const map_type> BaseDomainMap = MyDomainMap;
6374
6375 // If the user gave us a nonnull destGraph, then check whether it's
6376 // "pristine." That means that it has no entries.
6377 //
6378 // FIXME (mfh 15 May 2014) If this is not true on all processes,
6379 // then this exception test may hang. It would be better to
6380 // forward an error flag to the next communication phase.
6381 if (!destGraph.is_null()) {
6382 // FIXME (mfh 15 May 2014): The Epetra idiom for checking
6383 // whether a graph or matrix has no entries on the calling
6384 // process, is that it is neither locally nor globally indexed.
6385 // This may change eventually with the Kokkos refactor version
6386 // of Tpetra, so it would be better just to check the quantity
6387 // of interest directly. Note that with the Kokkos refactor
6388 // version of Tpetra, asking for the total number of entries in
6389 // a graph or matrix that is not fill complete might require
6390 // computation (kernel launch), since it is not thread scalable
6391 // to update a count every time an entry is inserted.
6392 const bool NewFlag =
6393 !destGraph->isLocallyIndexed() && !destGraph->isGloballyIndexed();
6394 TEUCHOS_TEST_FOR_EXCEPTION(!NewFlag, std::invalid_argument,
6395 prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
6396 "if its graph is empty (neither locally nor globally indexed).");
6397
6398 // FIXME (mfh 15 May 2014) At some point, we want to change
6399 // graphs and matrices so that their DistObject Map
6400 // (this->getMap()) may differ from their row Map. This will
6401 // make redistribution for 2-D distributions more efficient. I
6402 // hesitate to change this check, because I'm not sure how much
6403 // the code here depends on getMap() and getRowMap() being the
6404 // same.
6405 TEUCHOS_TEST_FOR_EXCEPTION(
6406 !destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6407 prefix << "The (row) Map of the input argument 'destGraph' is not the "
6408 "same as the (row) Map specified by the input argument 'rowTransfer'.");
6409
6410 TEUCHOS_TEST_FOR_EXCEPTION(
6411 !destGraph->checkSizes(*this), std::invalid_argument,
6412 prefix << "You provided a nonnull destination graph, but checkSizes() "
6413 "indicates that it is not a legal legal target for redistribution from "
6414 "the source graph (*this). This may mean that they do not have the "
6415 "same dimensions.");
6416 }
6417
6418 // If forward mode (the default), then *this's (row) Map must be
6419 // the same as the source Map of the Transfer. If reverse mode,
6420 // then *this's (row) Map must be the same as the target Map of
6421 // the Transfer.
6422 //
6423 // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6424 // and matrices so that their DistObject Map (this->getMap()) may
6425 // differ from their row Map. This will make redistribution for
6426 // 2-D distributions more efficient. I hesitate to change this
6427 // check, because I'm not sure how much the code here depends on
6428 // getMap() and getRowMap() being the same.
6429 TEUCHOS_TEST_FOR_EXCEPTION(
6430 !(reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6431 std::invalid_argument, prefix << "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6432
6433 TEUCHOS_TEST_FOR_EXCEPTION(
6434 !(!reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6435 std::invalid_argument, prefix << "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6436
6437 // checks for domainTransfer
6438 TEUCHOS_TEST_FOR_EXCEPTION(
6439 !xferDomainAsImport.is_null() && !xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
6440 std::invalid_argument,
6441 prefix << "The target map of the 'domainTransfer' input argument must be "
6442 "the same as the rebalanced domain map 'domainMap'");
6443
6444 TEUCHOS_TEST_FOR_EXCEPTION(
6445 !xferDomainAsExport.is_null() && !xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
6446 std::invalid_argument,
6447 prefix << "The source map of the 'domainTransfer' input argument must be "
6448 "the same as the rebalanced domain map 'domainMap'");
6449
6450 // The basic algorithm here is:
6451 //
6452 // 1. Call the moral equivalent of "Distor.do" to handle the import.
6453 // 2. Copy all the Imported and Copy/Permuted data into the raw
6454 // CrsGraph pointers, still using GIDs.
6455 // 3. Call an optimized version of MakeColMap that avoids the
6456 // Directory lookups (since the importer knows who owns all the
6457 // GIDs) AND reindexes to LIDs.
6458 // 4. Call expertStaticFillComplete()
6459
6460 // Get information from the Importer
6461 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6462 ArrayView<const LO> ExportLIDs = reverseMode ? rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
6463 ArrayView<const LO> RemoteLIDs = reverseMode ? rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
6464 ArrayView<const LO> PermuteToLIDs = reverseMode ? rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
6465 ArrayView<const LO> PermuteFromLIDs = reverseMode ? rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
6466 Distributor& Distor = rowTransfer.getDistributor();
6467
6468 // Owning PIDs
6469 Teuchos::Array<int> SourcePids;
6470 Teuchos::Array<int> TargetPids;
6471 int MyPID = getComm()->getRank();
6472
6473 // Temp variables for sub-communicators
6474 RCP<const map_type> ReducedRowMap, ReducedColMap,
6475 ReducedDomainMap, ReducedRangeMap;
6476 RCP<const Comm<int>> ReducedComm;
6477
6478 // If the user gave us a null destGraph, then construct the new
6479 // destination graph. We will replace its column Map later.
6480 if (destGraph.is_null()) {
6481 destGraph = rcp(new this_CRS_type(MyRowMap, 0, graphparams));
6482 }
6483
6484 /***************************************************/
6485 /***** 1) First communicator restriction phase ****/
6486 /***************************************************/
6487 if (restrictComm) {
6488 ReducedRowMap = MyRowMap->removeEmptyProcesses();
6489 ReducedComm = ReducedRowMap.is_null() ? Teuchos::null : ReducedRowMap->getComm();
6490 destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
6491
6492 ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ? ReducedRowMap : MyDomainMap->replaceCommWithSubset(ReducedComm);
6493 ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ? ReducedRowMap : MyRangeMap->replaceCommWithSubset(ReducedComm);
6494
6495 // Reset the "my" maps
6496 MyRowMap = ReducedRowMap;
6497 MyDomainMap = ReducedDomainMap;
6498 MyRangeMap = ReducedRangeMap;
6499
6500 // Update my PID, if we've restricted the communicator
6501 if (!ReducedComm.is_null()) {
6502 MyPID = ReducedComm->getRank();
6503 } else {
6504 MyPID = -2; // For debugging
6505 }
6506 } else {
6507 ReducedComm = MyRowMap->getComm();
6508 }
6509
6510 /***************************************************/
6511 /***** 2) From Tpera::DistObject::doTransfer() ****/
6512 /***************************************************/
6513#ifdef HAVE_TPETRA_MMM_TIMINGS
6514 MM = Teuchos::null;
6515 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("ImportSetup"))));
6516#endif
6517 // Get the owning PIDs
6518 RCP<const import_type> MyImporter = getImporter();
6519
6520 // check whether domain maps of source graph and base domain map is the same
6521 bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
6522
6523 if (!restrictComm && !MyImporter.is_null() && bSameDomainMap) {
6524 // Same domain map as source graph
6525 //
6526 // NOTE: This won't work for restrictComm (because the Import
6527 // doesn't know the restricted PIDs), though writing an
6528 // optimized version for that case would be easy (Import an
6529 // IntVector of the new PIDs). Might want to add this later.
6530 Import_Util::getPids(*MyImporter, SourcePids, false);
6531 } else if (restrictComm && !MyImporter.is_null() && bSameDomainMap) {
6532 // Same domain map as source graph (restricted communicator)
6533 // We need one import from the domain to the column map
6534 ivector_type SourceDomain_pids(getDomainMap(), true);
6535 ivector_type SourceCol_pids(getColMap());
6536 // SourceDomain_pids contains the restricted pids
6537 SourceDomain_pids.putScalar(MyPID);
6538
6539 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6540 SourcePids.resize(getColMap()->getLocalNumElements());
6541 SourceCol_pids.get1dCopy(SourcePids());
6542 } else if (MyImporter.is_null() && bSameDomainMap) {
6543 // Graph has no off-process entries
6544 SourcePids.resize(getColMap()->getLocalNumElements());
6545 SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
6546 } else if (!MyImporter.is_null() &&
6547 !domainTransfer.is_null()) {
6548 // general implementation for rectangular matrices with
6549 // domain map different than SourceGraph domain map.
6550 // User has to provide a DomainTransfer object. We need
6551 // to communications (import/export)
6552
6553 // TargetDomain_pids lives on the rebalanced new domain map
6554 ivector_type TargetDomain_pids(domainMap);
6555 TargetDomain_pids.putScalar(MyPID);
6556
6557 // SourceDomain_pids lives on the non-rebalanced old domain map
6558 ivector_type SourceDomain_pids(getDomainMap());
6559
6560 // SourceCol_pids lives on the non-rebalanced old column map
6561 ivector_type SourceCol_pids(getColMap());
6562
6563 if (!reverseMode && !xferDomainAsImport.is_null()) {
6564 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6565 } else if (reverseMode && !xferDomainAsExport.is_null()) {
6566 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6567 } else if (!reverseMode && !xferDomainAsExport.is_null()) {
6568 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6569 } else if (reverseMode && !xferDomainAsImport.is_null()) {
6570 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6571 } else {
6572 TEUCHOS_TEST_FOR_EXCEPTION(
6573 true, std::logic_error,
6574 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6575 }
6576 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6577 SourcePids.resize(getColMap()->getLocalNumElements());
6578 SourceCol_pids.get1dCopy(SourcePids());
6579 } else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
6580 getDomainMap()->isSameAs(*getRowMap())) {
6581 // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
6582 ivector_type TargetRow_pids(domainMap);
6583 ivector_type SourceRow_pids(getRowMap());
6584 ivector_type SourceCol_pids(getColMap());
6585
6586 TargetRow_pids.putScalar(MyPID);
6587 if (!reverseMode && xferAsImport != nullptr) {
6588 SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
6589 } else if (reverseMode && xferAsExport != nullptr) {
6590 SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
6591 } else if (!reverseMode && xferAsExport != nullptr) {
6592 SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
6593 } else if (reverseMode && xferAsImport != nullptr) {
6594 SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
6595 } else {
6596 TEUCHOS_TEST_FOR_EXCEPTION(
6597 true, std::logic_error,
6598 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6599 }
6600 SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
6601 SourcePids.resize(getColMap()->getLocalNumElements());
6602 SourceCol_pids.get1dCopy(SourcePids());
6603 } else {
6604 TEUCHOS_TEST_FOR_EXCEPTION(
6605 true, std::invalid_argument,
6606 prefix << "This method only allows either domainMap == getDomainMap(), "
6607 "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
6608 }
6609
6610 // Tpetra-specific stuff
6611 size_t constantNumPackets = destGraph->constantNumberOfPackets();
6612 if (constantNumPackets == 0) {
6613 destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
6614 RemoteLIDs.size());
6615 } else {
6616 // There are a constant number of packets per element. We
6617 // already know (from the number of "remote" (incoming)
6618 // elements) how many incoming elements we expect, so we can
6619 // resize the buffer accordingly.
6620 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
6621 destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
6622 }
6623
6624 {
6625 // packAndPrepare* methods modify numExportPacketsPerLID_.
6626 destGraph->numExportPacketsPerLID_.modify_host();
6627 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
6628 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6629
6630 // Pack & Prepare w/ owning PIDs
6631 packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
6632 numExportPacketsPerLID, ExportLIDs,
6633 SourcePids, constantNumPackets);
6634 }
6635
6636 // Do the exchange of remote data.
6637#ifdef HAVE_TPETRA_MMM_TIMINGS
6638 MM = Teuchos::null;
6639 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("Transfer"))));
6640#endif
6641
6642 if (communication_needed) {
6643 if (reverseMode) {
6644 if (constantNumPackets == 0) { // variable number of packets per LID
6645 // Make sure that host has the latest version, since we're
6646 // using the version on host. If host has the latest
6647 // version, syncing to host does nothing.
6648 destGraph->numExportPacketsPerLID_.sync_host();
6649 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6650 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6651 destGraph->numImportPacketsPerLID_.sync_host();
6652 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
6653 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6654
6655 Distor.doReversePostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
6656 destGraph->numImportPacketsPerLID_.view_host());
6657 size_t totalImportPackets = 0;
6658 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
6659 totalImportPackets += numImportPacketsPerLID[i];
6660 }
6661
6662 // Reallocation MUST go before setting the modified flag,
6663 // because it may clear out the flags.
6664 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
6665 destGraph->imports_.modify_host();
6666 auto hostImports = destGraph->imports_.view_host();
6667 // This is a legacy host pack/unpack path, so use the host
6668 // version of exports_.
6669 destGraph->exports_.sync_host();
6670 auto hostExports = destGraph->exports_.view_host();
6671 Distor.doReversePostsAndWaits(hostExports,
6672 numExportPacketsPerLID,
6673 hostImports,
6674 numImportPacketsPerLID);
6675 } else { // constant number of packets per LI
6676 destGraph->imports_.modify_host();
6677 auto hostImports = destGraph->imports_.view_host();
6678 // This is a legacy host pack/unpack path, so use the host
6679 // version of exports_.
6680 destGraph->exports_.sync_host();
6681 auto hostExports = destGraph->exports_.view_host();
6682 Distor.doReversePostsAndWaits(hostExports,
6683 constantNumPackets,
6684 hostImports);
6685 }
6686 } else { // forward mode (the default)
6687 if (constantNumPackets == 0) { // variable number of packets per LID
6688 // Make sure that host has the latest version, since we're
6689 // using the version on host. If host has the latest
6690 // version, syncing to host does nothing.
6691 destGraph->numExportPacketsPerLID_.sync_host();
6692 destGraph->numImportPacketsPerLID_.sync_host();
6693 Distor.doPostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
6694 destGraph->numImportPacketsPerLID_.view_host());
6695
6696 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
6697 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6698 size_t totalImportPackets = 0;
6699 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
6700 totalImportPackets += numImportPacketsPerLID[i];
6701 }
6702
6703 // Reallocation MUST go before setting the modified flag,
6704 // because it may clear out the flags.
6705 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
6706 destGraph->imports_.modify_host();
6707 auto hostImports = destGraph->imports_.view_host();
6708 // This is a legacy host pack/unpack path, so use the host
6709 // version of exports_.
6710 destGraph->exports_.sync_host();
6711 auto hostExports = destGraph->exports_.view_host();
6712 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6713 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6714 Distor.doPostsAndWaits(hostExports, numExportPacketsPerLID, hostImports, numImportPacketsPerLID);
6715 } else { // constant number of packets per LID
6716 destGraph->imports_.modify_host();
6717 auto hostImports = destGraph->imports_.view_host();
6718 // This is a legacy host pack/unpack path, so use the host
6719 // version of exports_.
6720 destGraph->exports_.sync_host();
6721 auto hostExports = destGraph->exports_.view_host();
6722 Distor.doPostsAndWaits(hostExports, constantNumPackets, hostImports);
6723 }
6724 }
6725 }
6726
6727 /*********************************************************************/
6728 /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
6729 /*********************************************************************/
6730
6731#ifdef HAVE_TPETRA_MMM_TIMINGS
6732 MM = Teuchos::null;
6733 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("Unpack-1"))));
6734#endif
6735
6736 // Backwards compatibility measure. We'll use this again below.
6737 destGraph->numImportPacketsPerLID_.sync_host();
6738 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
6739 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
6740 destGraph->imports_.sync_host();
6741 Teuchos::ArrayView<const packet_type> hostImports =
6742 getArrayViewFromDualView(destGraph->imports_);
6743 size_t mynnz =
6744 unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
6745 numImportPacketsPerLID,
6746 constantNumPackets, INSERT,
6747 NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
6748 size_t N = BaseRowMap->getLocalNumElements();
6749
6750 // Allocations
6751 ArrayRCP<size_t> CSR_rowptr(N + 1);
6752 ArrayRCP<GO> CSR_colind_GID;
6753 ArrayRCP<LO> CSR_colind_LID;
6754 CSR_colind_GID.resize(mynnz);
6755
6756 // If LO and GO are the same, we can reuse memory when
6757 // converting the column indices from global to local indices.
6758 if (typeid(LO) == typeid(GO)) {
6759 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
6760 } else {
6761 CSR_colind_LID.resize(mynnz);
6762 }
6763
6764 // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
6765 // unpackAndCombine method on a "CrsArrays" object? This passing
6766 // in a huge list of arrays is icky. Can't we have a bit of an
6767 // abstraction? Implementing a concrete DistObject subclass only
6768 // takes five methods.
6769 unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
6770 numImportPacketsPerLID, constantNumPackets,
6771 INSERT, NumSameIDs, PermuteToLIDs,
6772 PermuteFromLIDs, N, mynnz, MyPID,
6773 CSR_rowptr(), CSR_colind_GID(),
6774 SourcePids(), TargetPids);
6775
6776 /**************************************************************/
6777 /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
6778 /**************************************************************/
6779#ifdef HAVE_TPETRA_MMM_TIMINGS
6780 MM = Teuchos::null;
6781 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("Unpack-2"))));
6782#endif
6783 // Call an optimized version of makeColMap that avoids the
6784 // Directory lookups (since the Import object knows who owns all
6785 // the GIDs).
6786 Teuchos::Array<int> RemotePids;
6787 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
6788 CSR_colind_LID(),
6789 CSR_colind_GID(),
6790 BaseDomainMap,
6791 TargetPids, RemotePids,
6792 MyColMap);
6793
6794 /*******************************************************/
6795 /**** 4) Second communicator restriction phase ****/
6796 /*******************************************************/
6797 if (restrictComm) {
6798 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ? ReducedRowMap : MyColMap->replaceCommWithSubset(ReducedComm);
6799 MyColMap = ReducedColMap; // Reset the "my" maps
6800 }
6801
6802 // Replace the col map
6803 destGraph->replaceColMap(MyColMap);
6804
6805 // Short circuit if the processor is no longer in the communicator
6806 //
6807 // NOTE: Epetra replaces modifies all "removed" processes so they
6808 // have a dummy (serial) Map that doesn't touch the original
6809 // communicator. Duplicating that here might be a good idea.
6810 if (ReducedComm.is_null()) {
6811 return;
6812 }
6813
6814 /***************************************************/
6815 /**** 5) Sort ****/
6816 /***************************************************/
6817 if ((!reverseMode && xferAsImport != nullptr) ||
6818 (reverseMode && xferAsExport != nullptr)) {
6819 Import_Util::sortCrsEntries(CSR_rowptr(),
6820 CSR_colind_LID());
6821 } else if ((!reverseMode && xferAsExport != nullptr) ||
6822 (reverseMode && xferAsImport != nullptr)) {
6823 Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
6824 CSR_colind_LID());
6825 if (CSR_rowptr[N] != mynnz) {
6826 CSR_colind_LID.resize(CSR_rowptr[N]);
6827 }
6828 } else {
6829 TEUCHOS_TEST_FOR_EXCEPTION(
6830 true, std::logic_error,
6831 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6832 }
6833 /***************************************************/
6834 /**** 6) Reset the colmap and the arrays ****/
6835 /***************************************************/
6836
6837 // Call constructor for the new graph (restricted as needed)
6838 //
6839 destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
6840
6841 /***************************************************/
6842 /**** 7) Build Importer & Call ESFC ****/
6843 /***************************************************/
6844 // Pre-build the importer using the existing PIDs
6845 Teuchos::ParameterList esfc_params;
6846#ifdef HAVE_TPETRA_MMM_TIMINGS
6847 MM = Teuchos::null;
6848 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("CreateImporter"))));
6849#endif
6850 RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
6851#ifdef HAVE_TPETRA_MMM_TIMINGS
6852 MM = Teuchos::null;
6853 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2 + string("ESFC"))));
6854
6855 esfc_params.set("Timer Label", prefix + std::string("TAFC"));
6856#endif
6857 if (!params.is_null())
6858 esfc_params.set("compute global constants", params->get("compute global constants", true));
6859
6860 destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
6861 MyImport, Teuchos::null, rcp(&esfc_params, false));
6862}
6863
6864template <class LocalOrdinal, class GlobalOrdinal, class Node>
6867 const import_type& importer,
6868 const Teuchos::RCP<const map_type>& domainMap,
6869 const Teuchos::RCP<const map_type>& rangeMap,
6870 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6871 transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
6872}
6873
6874template <class LocalOrdinal, class GlobalOrdinal, class Node>
6877 const import_type& rowImporter,
6879 const Teuchos::RCP<const map_type>& domainMap,
6880 const Teuchos::RCP<const map_type>& rangeMap,
6881 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6882 transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
6883}
6884
6885template <class LocalOrdinal, class GlobalOrdinal, class Node>
6888 const export_type& exporter,
6889 const Teuchos::RCP<const map_type>& domainMap,
6890 const Teuchos::RCP<const map_type>& rangeMap,
6891 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6892 transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
6893}
6894
6895template <class LocalOrdinal, class GlobalOrdinal, class Node>
6898 const export_type& rowExporter,
6900 const Teuchos::RCP<const map_type>& domainMap,
6901 const Teuchos::RCP<const map_type>& rangeMap,
6902 const Teuchos::RCP<Teuchos::ParameterList>& params) const {
6903 transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
6904}
6905
6906template <class LocalOrdinal, class GlobalOrdinal, class Node>
6909 std::swap(graph.need_sync_host_uvm_access, this->need_sync_host_uvm_access);
6910
6911 std::swap(graph.rowMap_, this->rowMap_);
6912 std::swap(graph.colMap_, this->colMap_);
6913 std::swap(graph.rangeMap_, this->rangeMap_);
6914 std::swap(graph.domainMap_, this->domainMap_);
6915
6916 std::swap(graph.importer_, this->importer_);
6917 std::swap(graph.exporter_, this->exporter_);
6918
6919 std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
6920
6921 std::swap(graph.globalNumEntries_, this->globalNumEntries_);
6922 std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
6923
6924 std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
6925
6926 std::swap(graph.rowPtrsPacked_dev_, this->rowPtrsPacked_dev_);
6927 std::swap(graph.rowPtrsPacked_host_, this->rowPtrsPacked_host_);
6928
6929 std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_);
6930 std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_);
6931 std::swap(graph.packedUnpackedRowPtrsMatch_, this->packedUnpackedRowPtrsMatch_);
6932
6933 std::swap(graph.k_offRankOffsets_, this->k_offRankOffsets_);
6934
6935 std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv);
6936 std::swap(graph.gblInds_wdv, this->gblInds_wdv);
6937 std::swap(graph.lclIndsPacked_wdv, this->lclIndsPacked_wdv);
6938
6939 std::swap(graph.storageStatus_, this->storageStatus_);
6940
6941 std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
6942 std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
6943 std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
6944 std::swap(graph.fillComplete_, this->fillComplete_);
6945 std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
6946 std::swap(graph.noRedundancies_, this->noRedundancies_);
6947 std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
6948 std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
6949 std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_);
6950
6951 std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
6952
6953 std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
6954 std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
6955 std::swap(graph.nonlocals_, this->nonlocals_); // std::map
6956}
6957
6958template <class LocalOrdinal, class GlobalOrdinal, class Node>
6961 auto compare_nonlocals = [&](const nonlocals_type& m1, const nonlocals_type& m2) {
6962 bool output = true;
6963 output = m1.size() == m2.size() ? output : false;
6964 for (auto& it_m : m1) {
6965 size_t key = it_m.first;
6966 output = m2.find(key) != m2.end() ? output : false;
6967 if (output) {
6968 auto v1 = m1.find(key)->second;
6969 auto v2 = m2.find(key)->second;
6970 std::sort(v1.begin(), v1.end());
6971 std::sort(v2.begin(), v2.end());
6972
6973 output = v1.size() == v2.size() ? output : false;
6974 for (size_t i = 0; output && i < v1.size(); i++) {
6975 output = v1[i] == v2[i] ? output : false;
6976 }
6977 }
6978 }
6979 return output;
6980 };
6981
6982 bool output = true;
6983
6984 output = this->rowMap_->isSameAs(*(graph.rowMap_)) ? output : false;
6985 output = this->colMap_->isSameAs(*(graph.colMap_)) ? output : false;
6986 output = this->rangeMap_->isSameAs(*(graph.rangeMap_)) ? output : false;
6987 output = this->domainMap_->isSameAs(*(graph.domainMap_)) ? output : false;
6988
6989 output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
6990
6991 output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
6992 output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
6993
6994 output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
6995
6996 output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
6997
6998 output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
6999 output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
7000 output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
7001 output = this->fillComplete_ == graph.fillComplete_ ? output : false;
7002 output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
7003 output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7004 output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7005 output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7006 output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false;
7007 output = this->sortGhostsAssociatedWithEachProcessor_ == graph.sortGhostsAssociatedWithEachProcessor_ ? output : false;
7008
7009 // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7010 // nonlocals_ isa std::map<GO, std::vector<GO> >
7011 output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7012
7013 // Compare k_numAllocPerRow_ isa Kokkos::View::host_mirror_type
7014 // - since this is a host_mirror_type type, it should be in host memory already
7015 output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7016 if (output && this->k_numAllocPerRow_.extent(0) > 0) {
7017 for (size_t i = 0; output && i < this->k_numAllocPerRow_.extent(0); i++)
7018 output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7019 }
7020
7021 // Compare k_numRowEntries_ isa Kokkos::View::host_mirror_type
7022 // - since this is a host_mirror_type type, it should be in host memory already
7023 output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7024 if (output && this->k_numRowEntries_.extent(0) > 0) {
7025 for (size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7026 output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7027 }
7028
7029 // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7030 {
7031 auto rowPtrsThis = this->getRowPtrsUnpackedHost();
7032 auto rowPtrsGraph = graph.getRowPtrsUnpackedHost();
7033 output = rowPtrsThis.extent(0) == rowPtrsGraph.extent(0) ? output : false;
7034 for (size_t i = 0; output && i < rowPtrsThis.extent(0); i++)
7035 output = rowPtrsThis(i) == rowPtrsGraph(i) ? output : false;
7036 }
7037
7038 // Compare lclIndsUnpacked_wdv isa Kokkos::View<LocalOrdinal*, ...>
7039 output = this->lclIndsUnpacked_wdv.extent(0) == graph.lclIndsUnpacked_wdv.extent(0) ? output : false;
7040 if (output && this->lclIndsUnpacked_wdv.extent(0) > 0) {
7041 auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7042 auto indGraph = graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7043 for (size_t i = 0; output && i < indThis.extent(0); i++)
7044 output = indThis(i) == indGraph(i) ? output : false;
7045 }
7046
7047 // Compare gblInds_wdv isa Kokkos::View<GlobalOrdinal*, ...>
7048 output = this->gblInds_wdv.extent(0) == graph.gblInds_wdv.extent(0) ? output : false;
7049 if (output && this->gblInds_wdv.extent(0) > 0) {
7050 auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7051 auto indtGraph = graph.gblInds_wdv.getHostView(Access::ReadOnly);
7052 for (size_t i = 0; output && i < indtThis.extent(0); i++)
7053 output = indtThis(i) == indtGraph(i) ? output : false;
7054 }
7055
7056 // Check lclGraph_ isa
7057 // KokkosSparse::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7058 // KokkosSparse::StaticCrsGraph has 3 data members in it:
7059 // Kokkos::View<size_type*, ...> row_map
7060 // (local_graph_device_type::row_map_type)
7061 // Kokkos::View<data_type*, ...> entries
7062 // (local_graph_device_type::entries_type)
7063 // Kokkos::View<size_type*, ...> row_block_offsets
7064 // (local_graph_device_type::row_block_type)
7065 // There is currently no KokkosSparse::StaticCrsGraph comparison function
7066 // that's built-in, so we will just compare
7067 // the three data items here. This can be replaced if Kokkos ever
7068 // puts in its own comparison routine.
7069 local_graph_host_type thisLclGraph = this->getLocalGraphHost();
7070 local_graph_host_type graphLclGraph = graph.getLocalGraphHost();
7071
7072 output = thisLclGraph.row_map.extent(0) == graphLclGraph.row_map.extent(0)
7073 ? output
7074 : false;
7075 if (output && thisLclGraph.row_map.extent(0) > 0) {
7078 for (size_t i = 0; output && i < lclGraph_rowmap_host_this.extent(0); i++)
7080 ? output
7081 : false;
7082 }
7083
7084 output = thisLclGraph.entries.extent(0) == graphLclGraph.entries.extent(0)
7085 ? output
7086 : false;
7087 if (output && thisLclGraph.entries.extent(0) > 0) {
7090 for (size_t i = 0; output && i < lclGraph_entries_host_this.extent(0); i++)
7092 ? output
7093 : false;
7094 }
7095
7096 output =
7097 thisLclGraph.row_block_offsets.extent(0) ==
7098 graphLclGraph.row_block_offsets.extent(0)
7099 ? output
7100 : false;
7101 if (output && thisLclGraph.row_block_offsets.extent(0) > 0) {
7102 auto lclGraph_rbo_host_this = thisLclGraph.row_block_offsets;
7103 auto lclGraph_rbo_host_graph = graphLclGraph.row_block_offsets;
7104 for (size_t i = 0; output && i < lclGraph_rbo_host_this.extent(0); i++)
7106 ? output
7107 : false;
7108 }
7109
7110 // For Importer and Exporter, we don't need to explicitly check them since
7111 // they will be consistent with the maps.
7112 // Note: importer_ isa Teuchos::RCP<const import_type>
7113 // exporter_ isa Teuchos::RCP<const export_type>
7114
7115 return output;
7116}
7117
7118} // namespace Tpetra
7119
7120//
7121// Explicit instantiation macros
7122//
7123// Must be expanded from within the Tpetra namespace!
7124//
7125
7126#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7127 template <> \
7128 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7129 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7130 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7131 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7132 CrsGraph<LO, GO, NODE>::node_type>& importer, \
7133 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7134 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7135 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7136 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7137 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7138 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7139 const Teuchos::RCP<Teuchos::ParameterList>& params);
7140
7141#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7142 template <> \
7143 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7144 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7145 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7146 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7147 CrsGraph<LO, GO, NODE>::node_type>& rowImporter, \
7148 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7149 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7150 CrsGraph<LO, GO, NODE>::node_type>& domainImporter, \
7151 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7152 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7153 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7154 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7155 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7156 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7157 const Teuchos::RCP<Teuchos::ParameterList>& params);
7158
7159#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7160 template <> \
7161 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7162 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7163 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7164 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7165 CrsGraph<LO, GO, NODE>::node_type>& exporter, \
7166 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7167 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7168 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7169 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7170 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7171 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7172 const Teuchos::RCP<Teuchos::ParameterList>& params);
7173
7174#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7175 template <> \
7176 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7177 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7178 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7179 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7180 CrsGraph<LO, GO, NODE>::node_type>& rowExporter, \
7181 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7182 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7183 CrsGraph<LO, GO, NODE>::node_type>& domainExporter, \
7184 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7185 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7186 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7187 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7188 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7189 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7190 const Teuchos::RCP<Teuchos::ParameterList>& params);
7191
7192#define TPETRA_CRSGRAPH_INSTANT(LO, GO, NODE) \
7193 template class CrsGraph<LO, GO, NODE>; \
7194 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7195 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7196 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7197 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE)
7198
7199#endif // TPETRA_CRSGRAPH_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular,...
Functions for manipulating CRS arrays.
Declaration of a function that prints strings from each process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Utility functions for packing and unpacking sparse matrix entries.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
Stand-alone utility functions and macros.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
local_inds_dualv_type::t_dev::const_type getLocalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
GlobalOrdinal global_ordinal_type
The type of the graph's global indices.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
std::pair< size_t, std::string > makeIndicesLocal(const bool verbose=false)
Convert column indices from global to local.
local_inds_device_view_type getLocalIndicesDevice() const
Get a device view of the packed column indicies.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void globalAssemble()
Communicate nonlocal contributions to other processes.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
global_inds_wdv_type gblInds_wdv
Global ordinals of column indices for all rows.
size_t sortAndMergeRowIndices(const RowInfo &rowInfo, const bool sorted, const bool merged)
Sort and merge duplicate column indices in the given row.
bool hasColMap() const override
Whether the graph has a column Map.
LocalOrdinal local_ordinal_type
The type of the graph's local indices.
std::string description() const override
Return a one-line human-readable description of this object.
bool isStorageOptimized() const
Returns true if storage has been optimized.
void getGlobalRowCopy(global_ordinal_type gblRow, nonconst_global_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, const Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given parameters.
void computeLocalConstants()
Compute local constants, if they have not yet been computed.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
typename Node::device_type device_type
This class' Kokkos device type.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
global_inds_dualv_type::t_dev::const_type getGlobalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
typename local_graph_device_type::HostMirror local_graph_host_type
The type of the part of the sparse graph on each MPI process.
KokkosSparse::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
row_ptrs_host_view_type getLocalRowPtrsHost() const
Get a host view of the packed row offsets.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
void setAllIndices(const typename local_graph_device_type::row_map_type &rowPointers, const typename local_graph_device_type::entries_type::non_const_type &columnIndices)
Set the graph's data directly, using 1-D storage.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
local_inds_host_view_type getLocalIndicesHost() const
Get a host view of the packed column indicies.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
void getGlobalRowView(const global_ordinal_type gblRow, global_inds_host_view_type &gblColInds) const override
Get a const view of the given global row's global column indices.
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
void makeColMap(Teuchos::Array< int > &remotePIDs)
Make and set the graph's column Map.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph's communicator.
void checkInternalState() const
Throw an exception if the internal state is not consistent.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
row_ptrs_device_view_type getLocalRowPtrsDevice() const
Get a device view of the packed row offsets.
void getLocalRowCopy(local_ordinal_type gblRow, nonconst_local_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
size_t getLocalNumEntries() const override
The local number of entries in the graph.
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
local_inds_wdv_type lclIndsPacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true Built during fillCo...
size_t getLocalNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets) const override
void getLocalOffRankOffsets(offset_device_view_type &offsets) const
Get offsets of the off-rank entries in the graph.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
typename row_graph_type::local_inds_device_view_type local_inds_device_view_type
The Kokkos::View type for views of local ordinals on device and host.
Kokkos::View< constsize_t *, device_type >::host_mirror_type k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
Node node_type
This class' Kokkos Node type.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
local_inds_dualv_type::t_host getLocalIndsViewHostNonConst(const RowInfo &rowinfo)
Get a ReadWrite locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(m...
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
void computeGlobalConstants()
Compute global constants, if they have not yet been computed.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
void setLocallyModified()
Report that we made a local modification to its structure.
size_t getLocalAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current Range Map with the given objects.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
void getLocalRowView(const LocalOrdinal lclRow, local_inds_host_view_type &lclColInds) const override
Get a const view of the given local row's local column indices.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
size_t getLocalNumRows() const override
Returns the number of graph rows owned on the calling node.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph's current column Map with the given Map.
Struct that holds views of the contents of a CrsMatrix.
Teuchos::RCP< const map_type > colMap
Col map for the original version of the matrix.
Teuchos::RCP< const map_type > domainMap
Domain map for original matrix.
Teuchos::RCP< const map_type > rowMap
Desired row map for "imported" version of the matrix.
Teuchos::RCP< const map_type > origRowMap
Original row map of matrix.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
"Local" part of Map suitable for Kokkos kernels.
Sets up and executes a communication plan for a Tpetra DistObject.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
An abstract interface for graphs accessed by rows.
Abstract base class for objects that can be the source of an Import or Export operation.
Implementation details of Tpetra.
int local_ordinal_type
Default value of Scalar template parameter.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a CrsGraph's global column indices into local column indices.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
CombineMode
Rule for combining data in an Import or Export.
@ INSERT
Insert new values that don't currently exist.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.