2334 std::ostringstream
os;
2335 os <<
"You attempted to insert entries in owned row " <<
gblRow
2336 <<
", at the following column indices: [";
2344 <<
"Of those, the following indices are not in "
2345 "the column Map on this process: [";
2346 for (
size_t k = 0; k < badColInds.size(); ++k) {
2347 os << badColInds[k];
2348 if (k +
size_t(1) < badColInds.size()) {
2353 <<
"Since the matrix has a column Map already, "
2354 "it is invalid to insert entries at those locations.";
2355 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::invalid_argument, os.str());
2359 this->insertGlobalIndicesImpl(lclRow, inputGblColInds, numInputInds);
2361 this->insertGlobalIndicesIntoNonownedRows(gblRow, inputGblColInds,
2366template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2374template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2381 const char tfecfFuncName[] =
"insertGlobalIndicesFiltered: ";
2384 "Graph indices are local; use insertLocalIndices().");
2390 "You are not allowed to call this method if fill is not active. "
2391 "If fillComplete has been called, you must first call resumeFill "
2392 "before you may insert indices.");
2393 if (!indicesAreAllocated()) {
2394 allocateIndices(GlobalIndices, verbose_);
2399 if (!colMap_.is_null()) {
2410 if (
lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid()) {
2433template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2450template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2455 !isFillActive(), std::runtime_error,
"requires that fill is active.");
2457 isStorageOptimized(), std::runtime_error,
2458 "cannot remove indices after optimizeStorage() has been called.");
2460 isGloballyIndexed(), std::runtime_error,
"graph indices are global.");
2462 !rowMap_->isNodeLocalElement(
lrow), std::runtime_error,
2463 "Local row " <<
lrow <<
" is not in the row Map on the calling process.");
2464 if (!indicesAreAllocated()) {
2465 allocateIndices(LocalIndices, verbose_);
2468 if (k_numRowEntries_.extent(0) != 0) {
2469 this->k_numRowEntries_(
lrow) = 0;
2474 !indicesAreAllocated() ||
2475 !isLocallyIndexed(),
2477 "Violated stated post-conditions. Please contact Tpetra team.");
2481template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2484 const typename local_graph_device_type::entries_type::non_const_type&
columnIndices) {
2486 ProfilingRegion
region(
"Tpetra::CrsGraph::setAllIndices");
2489 !hasColMap() || getColMap().
is_null(), std::runtime_error,
2490 "The graph must have a column Map before you may call this method.");
2497 std::runtime_error,
"Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2501 std::runtime_error,
"rowPointers.size() = " <<
rowPtrLen <<
" != this->getLocalNumRows()+1 = " << (
numLocalRows + 1) <<
".");
2506 using exec_space =
typename local_graph_device_type::execution_space;
2509 Kokkos::parallel_reduce(
2510 Kokkos::RangePolicy<exec_space>(0,
columnIndices.extent(0)),
2517 auto comm = this->getComm();
2524 message = std::string(
"ERROR, rank ") + std::to_string(comm->getRank()) +
", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2527 throw std::invalid_argument(
"CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2531 if (debug_ && this->isSorted()) {
2534 using exec_space =
typename local_graph_device_type::execution_space;
2535 using size_type =
typename local_graph_device_type::size_type;
2536 Kokkos::parallel_reduce(
2550 auto comm = this->getComm();
2551 Teuchos::reduceAll<int, int>(*comm, Teuchos::REDUCE_MAX,
notSorted,
2557 message = std::string(
"ERROR, rank ") + std::to_string(comm->getRank()) +
", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2560 throw std::invalid_argument(
"CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2564 indicesAreAllocated_ =
true;
2565 indicesAreLocal_ =
true;
2566 indicesAreSorted_ =
true;
2567 noRedundancies_ =
true;
2569 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
2572 set_need_sync_host_uvm_access();
2576 storageStatus_ = Details::STORAGE_1D_PACKED;
2581 numAllocForAllRows_ = 0;
2582 k_numAllocPerRow_ =
decltype(k_numAllocPerRow_)();
2584 checkInternalState();
2587template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2592 typedef typename local_graph_device_type::row_map_type
row_map_type;
2593 typedef typename row_map_type::array_layout
layout_type;
2596 Kokkos::MemoryUnmanaged>
2601 constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2606 if constexpr (
same) {
2614 std::is_same<
typename row_map_type::memory_space,
2615 Kokkos::HostSpace>::value;
2637 Kokkos::View<LocalOrdinal*, layout_type, device_type>
k_ind =
2638 Kokkos::Compat::getKokkosViewDeepCopy<device_type>(
columnIndices());
2642template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2646 using Teuchos::Comm;
2647 using Teuchos::outArg;
2650 using Teuchos::REDUCE_MAX;
2651 using Teuchos::REDUCE_MIN;
2652 using Teuchos::reduceAll;
2656 using size_type =
typename Teuchos::Array<GO>::size_type;
2662 std::unique_ptr<std::string>
prefix;
2664 prefix = this->createPrefix(
"CrsGraph",
"globalAssemble");
2665 std::ostringstream
os;
2667 std::cerr <<
os.str();
2672 "Fill must be active before "
2673 "you may call this method.");
2688 std::ostringstream
os;
2690 std::cerr <<
os.str();
2693 }
else if (verbose_) {
2694 std::ostringstream
os;
2695 os << *
prefix <<
"At least 1 process has nonlocal rows"
2697 std::cerr <<
os.str();
2716 for (
auto mapIter = this->nonlocals_.begin();
2717 mapIter != this->nonlocals_.end();
2747 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
2752 std::ostringstream
os;
2753 os << *
prefix <<
"nonlocalRowMap->getIndexBase()="
2755 std::cerr <<
os.str();
2767 for (
auto mapIter = this->nonlocals_.begin();
2768 mapIter != this->nonlocals_.end();
2777 std::ostringstream
os;
2779 std::cerr <<
os.str();
2795 std::ostringstream
os;
2797 std::cerr <<
os.str();
2804 std::ostringstream
os;
2805 os << *
prefix <<
"Original row Map is NOT 1-to-1" <<
endl;
2806 std::cerr <<
os.str();
2823 std::ostringstream
os;
2825 std::cerr <<
os.str();
2836 std::ostringstream
os;
2838 std::cerr <<
os.str();
2850 checkInternalState();
2852 std::ostringstream
os;
2854 std::cerr <<
os.str();
2858template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2861 clearGlobalConstants();
2862 if (
params != Teuchos::null) this->setParameterList(
params);
2864 indicesAreSorted_ =
true;
2865 noRedundancies_ =
true;
2866 fillComplete_ =
false;
2869template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2882 Teuchos::RCP<const map_type>
domMap = this->getDomainMap();
2884 domMap = this->getRowMap();
2886 Teuchos::RCP<const map_type>
ranMap = this->getRangeMap();
2888 ranMap = this->getRowMap();
2893template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2895 fillComplete(
const Teuchos::RCP<const map_type>& domainMap,
2896 const Teuchos::RCP<const map_type>&
rangeMap,
2897 const Teuchos::RCP<Teuchos::ParameterList>&
params) {
2901 const bool verbose = verbose_;
2905 std::unique_ptr<std::string>
prefix;
2907 prefix = this->createPrefix(
"CrsGraph",
"fillComplete");
2908 std::ostringstream
os;
2910 std::cerr <<
os.str();
2914 "Graph fill state must be active (isFillActive() "
2915 "must be true) before calling fillComplete().");
2917 const int numProcs = getComm()->getSize();
2926 if (
params->isParameter(
"sort column map ghost gids")) {
2927 sortGhostsAssociatedWithEachProcessor_ =
2928 params->get<
bool>(
"sort column map ghost gids",
2929 sortGhostsAssociatedWithEachProcessor_);
2930 }
else if (
params->isParameter(
"Sort column Map ghost GIDs")) {
2931 sortGhostsAssociatedWithEachProcessor_ =
2932 params->get<
bool>(
"Sort column Map ghost GIDs",
2933 sortGhostsAssociatedWithEachProcessor_);
2948 if (!indicesAreAllocated()) {
2951 allocateIndices(LocalIndices, verbose);
2954 allocateIndices(GlobalIndices, verbose);
2970 std::ostringstream
os;
2971 os << *
prefix <<
"Do not need to call globalAssemble; "
2972 "assertNoNonlocalInserts="
2976 std::cerr <<
os.str();
2981 std::ostringstream
os;
2983 Details::Impl::verbosePrintMap(
2984 os, nonlocals_.begin(), nonlocals_.end(),
2985 nonlocals_.size(),
"nonlocals_");
2986 std::cerr <<
os.str() <<
endl;
2990 auto map = this->getMap();
2991 auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
2993 if (!comm.is_null()) {
2994 using Teuchos::REDUCE_MAX;
2995 using Teuchos::reduceAll;
3001 "least one process in the CrsGraph's communicator. This "
3002 "means either that you incorrectly set the "
3003 "\"No Nonlocal Changes\" fillComplete parameter to true, "
3004 "or that you inserted invalid entries. "
3005 "Rerun with the environment variable TPETRA_VERBOSE="
3006 "CrsGraph set to see the entries of nonlocals_ on every "
3007 "MPI process (WARNING: lots of output).");
3010 "nonlocals_.size()=" <<
numNonlocals <<
" != 0 on the "
3011 "calling process. This means either that you incorrectly "
3012 "set the \"No Nonlocal Changes\" fillComplete parameter "
3013 "to true, or that you inserted invalid entries. "
3014 "Rerun with the environment "
3015 "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3016 "of nonlocals_ on every MPI process (WARNING: lots of "
3029 Teuchos::Array<int> remotePIDs(0);
3032 this->makeColMap(remotePIDs);
3038 this->makeIndicesLocal(verbose);
3042 using Teuchos::outArg;
3044 using Teuchos::REDUCE_MIN;
3045 using Teuchos::reduceAll;
3049 if (!
map.is_null()) {
3050 comm =
map->getComm();
3052 if (comm.is_null()) {
3060 std::ostringstream
os;
3077 this->sortAndMergeAllIndices(this->isSorted(), this->isMerged());
3085 this->fillLocalGraph(
params);
3088 params->get(
"compute global constants",
true);
3090 this->computeGlobalConstants();
3092 this->computeLocalConstants();
3094 this->fillComplete_ =
true;
3095 this->checkInternalState();
3098 std::ostringstream
os;
3100 std::cerr <<
os.str();
3104template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3107 const Teuchos::RCP<const map_type>&
rangeMap,
3108 const Teuchos::RCP<const import_type>&
importer,
3109 const Teuchos::RCP<const export_type>&
exporter,
3110 const Teuchos::RCP<Teuchos::ParameterList>&
params) {
3116 std::runtime_error,
"The input domain Map and range Map must be nonnull.");
3118 isFillComplete() || !hasColMap(), std::runtime_error,
3120 "call this method unless the graph has a column Map.");
3124 std::runtime_error,
"The calling process has getLocalNumRows() = " << getLocalNumRows() <<
" > 0 rows, but the row offsets array has not "
3128 std::runtime_error,
"The row offsets array has length " <<
rowPtrsUnpackedLength <<
" != getLocalNumRows()+1 = " << (getLocalNumRows() + 1) <<
".");
3143 numAllocForAllRows_ = 0;
3144 k_numAllocPerRow_ =
decltype(k_numAllocPerRow_)();
3145 indicesAreAllocated_ =
true;
3150 indicesAreLocal_ =
true;
3151 indicesAreGlobal_ =
false;
3159 indicesAreSorted_ =
true;
3160 noRedundancies_ =
true;
3166 importer_ = Teuchos::null;
3167 exporter_ = Teuchos::null;
3170 !
importer->getSourceMap()->isSameAs(*getDomainMap()) ||
3171 !
importer->getTargetMap()->isSameAs(*getColMap()),
3172 std::invalid_argument,
": importer does not match matrix maps.");
3181 !
exporter->getSourceMap()->isSameAs(*getRowMap()) ||
3182 !
exporter->getTargetMap()->isSameAs(*getRangeMap()),
3183 std::invalid_argument,
": exporter does not match matrix maps.");
3189 Teuchos::Array<int> remotePIDs(0);
3190 this->makeImportExport(remotePIDs,
false);
3194 this->fillLocalGraph(
params);
3197 params->get(
"compute global constants",
true);
3202 this->computeGlobalConstants();
3206 this->computeLocalConstants();
3209 fillComplete_ =
true;
3213 checkInternalState();
3216template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3219 using ::Tpetra::Details::computeOffsetsFromCounts;
3220 typedef typename local_graph_device_type::row_map_type
row_map_type;
3222 typedef typename local_graph_device_type::entries_type::non_const_type
lclinds_1d_type;
3224 "fillLocalGraph (called from fillComplete or "
3225 "expertStaticFillComplete): ";
3226 const size_t lclNumRows = this->getLocalNumRows();
3234 if (!
params.is_null() && !
params->get(
"Optimize Storage",
true)) {
3243 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3245 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked.extent(0) == 0, std::logic_error,
3246 "rowPtrsUnpacked_host_ has size zero, but shouldn't");
3247 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked.extent(0) != lclNumRows + 1, std::logic_error,
3248 "rowPtrsUnpacked_host_.extent(0) = "
3249 << rowPtrsUnpacked.extent(0) <<
" != (lclNumRows + 1) = "
3250 << (lclNumRows + 1) <<
".");
3251 const size_t numOffsets = rowPtrsUnpacked.extent(0);
3252 const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3253 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numOffsets != 0 &&
3254 lclIndsUnpacked_wdv.extent(0) != valToCheck,
3255 std::logic_error,
"numOffsets=" << numOffsets <<
" != 0 "
3256 " and lclIndsUnpacked_wdv.extent(0)="
3257 << lclIndsUnpacked_wdv.extent(0) <<
" != rowPtrsUnpacked_host_(" << numOffsets <<
")=" << valToCheck <<
".");
3260 size_t allocSize = 0;
3262 allocSize = this->getLocalAllocationSize();
3263 }
catch (std::logic_error& e) {
3264 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::logic_error,
3265 "getLocalAllocationSize threw "
3266 "std::logic_error: "
3268 }
catch (std::runtime_error& e) {
3269 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
3270 "getLocalAllocationSize threw "
3271 "std::runtime_error: "
3273 }
catch (std::exception& e) {
3274 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
3275 "getLocalAllocationSize threw "
3279 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
3280 "getLocalAllocationSize threw "
3281 "an exception not a subclass of std::exception.");
3284 if (this->getLocalNumEntries() != allocSize) {
3287 non_const_row_map_type ptr_d;
3288 row_map_type ptr_d_const;
3297 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3298 if (rowPtrsUnpacked.extent(0) != 0) {
3299 const size_t numOffsets =
3300 static_cast<size_t>(rowPtrsUnpacked.extent(0));
3301 const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3302 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck !=
size_t(lclIndsUnpacked_wdv.extent(0)),
3304 "(Unpacked branch) Before allocating "
3305 "or packing, k_rowPtrs_("
3306 << (numOffsets - 1) <<
")="
3307 << valToCheck <<
" != lclIndsUnpacked_wdv.extent(0)="
3308 << lclIndsUnpacked_wdv.extent(0) <<
".");
3318 size_t lclTotalNumEntries = 0;
3322 non_const_row_map_type(
"Tpetra::CrsGraph::ptr", lclNumRows + 1);
3323 ptr_d_const = ptr_d;
3327 typename num_row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3329 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
size_t(numRowEnt_h.extent(0)) != lclNumRows,
3331 "(Unpacked branch) "
3332 "numRowEnt_h.extent(0)="
3333 << numRowEnt_h.extent(0)
3334 <<
" != getLocalNumRows()=" << lclNumRows <<
"");
3340 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
static_cast<size_t>(ptr_d.extent(0)) != lclNumRows + 1,
3342 "(Unpacked branch) After allocating "
3343 "ptr_d, ptr_d.extent(0) = "
3345 <<
" != lclNumRows+1 = " << (lclNumRows + 1) <<
".");
3346 const auto valToCheck =
3347 ::Tpetra::Details::getEntryOnHost(ptr_d, lclNumRows);
3348 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != lclTotalNumEntries, std::logic_error,
3349 "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3350 "after filling ptr_d, ptr_d(lclNumRows="
3352 <<
") = " << valToCheck <<
" != total number of entries "
3353 "on the calling process = "
3354 << lclTotalNumEntries
3360 lclinds_1d_type ind_d =
3361 lclinds_1d_type(
"Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3373 typedef pack_functor<
3374 typename local_graph_device_type::entries_type::non_const_type,
3375 typename local_inds_dualv_type::t_dev::const_type,
3377 typename local_graph_device_type::row_map_type>
3379 inds_packer_type f(ind_d,
3380 lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3381 ptr_d, this->getRowPtrsUnpackedDevice());
3383 typedef typename decltype(ind_d)::execution_space exec_space;
3384 typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3385 Kokkos::parallel_for(range_type(0, lclNumRows), f);
3389 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(ptr_d.extent(0) == 0, std::logic_error,
3390 "(\"Optimize Storage\"=true branch) After packing, "
3391 "ptr_d.extent(0)=0.");
3392 if (ptr_d.extent(0) != 0) {
3393 const size_t numOffsets =
static_cast<size_t>(ptr_d.extent(0));
3394 const auto valToCheck =
3395 ::Tpetra::Details::getEntryOnHost(ptr_d, numOffsets - 1);
3396 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
static_cast<size_t>(valToCheck) != ind_d.extent(0),
3398 "(\"Optimize Storage\"=true branch) "
3399 "After packing, ptr_d("
3400 << (numOffsets - 1) <<
")="
3401 << valToCheck <<
" != ind_d.extent(0)="
3402 << ind_d.extent(0) <<
".");
3406 if (requestOptimizedStorage)
3407 setRowPtrs(ptr_d_const);
3409 setRowPtrsPacked(ptr_d_const);
3410 lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3413 this->setRowPtrs(rowPtrsUnpacked_dev_);
3414 lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3417 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3418 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3419 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsPacked_dev.extent(0) == 0, std::logic_error,
3420 "(\"Optimize Storage\"=false branch) "
3421 "rowPtrsPacked_dev_.extent(0) = 0.");
3422 if (rowPtrsPacked_dev.extent(0) != 0) {
3423 const size_t numOffsets =
3424 static_cast<size_t>(rowPtrsPacked_dev.extent(0));
3425 const size_t valToCheck =
3426 rowPtrsPacked_host(numOffsets - 1);
3427 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck !=
size_t(lclIndsPacked_wdv.extent(0)),
3429 "(\"Optimize Storage\"=false branch) "
3430 "rowPtrsPacked_dev_("
3431 << (numOffsets - 1) <<
")="
3433 <<
" != lclIndsPacked_wdv.extent(0)="
3434 << lclIndsPacked_wdv.extent(0) <<
".");
3440 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3441 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3442 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
static_cast<size_t>(rowPtrsPacked_dev.extent(0)) != lclNumRows + 1,
3443 std::logic_error,
"After packing, rowPtrsPacked_dev_.extent(0) = " << rowPtrsPacked_dev.extent(0) <<
" != lclNumRows+1 = " << (lclNumRows + 1) <<
".");
3444 if (rowPtrsPacked_dev.extent(0) != 0) {
3445 const size_t numOffsets =
static_cast<size_t>(rowPtrsPacked_dev.extent(0));
3446 const auto valToCheck = rowPtrsPacked_host(numOffsets - 1);
3447 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
static_cast<size_t>(valToCheck) != lclIndsPacked_wdv.extent(0),
3448 std::logic_error,
"After packing, rowPtrsPacked_dev_(" << (numOffsets - 1) <<
") = " << valToCheck <<
" != lclIndsPacked_wdv.extent(0) = " << lclIndsPacked_wdv.extent(0) <<
".");
3452 if (requestOptimizedStorage) {
3458 k_numRowEntries_ = num_row_entries_type();
3461 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3463 storageStatus_ = Details::STORAGE_1D_PACKED;
3466 set_need_sync_host_uvm_access();
3469template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3481 isLocallyIndexed() || isGloballyIndexed(), std::runtime_error,
3482 "Requires matching maps and non-static graph.");
3486template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3489 const Teuchos::RCP<const import_type>&
newImport,
3492 using Teuchos::REDUCE_MIN;
3493 using Teuchos::reduceAll;
3500 isFillComplete(), std::runtime_error,
3501 "The graph is fill complete "
3502 "(isFillComplete() returns true). You must call resumeFill() before "
3503 "you may call this method.");
3521 const LO
lclNumRows =
static_cast<LO
>(this->getLocalNumRows());
3555 if (indicesAreAllocated()) {
3556 if (isLocallyIndexed()) {
3561 const size_t allocSize = this->getLocalAllocationSize();
3562 auto oldLclInds1D = lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly);
3568 const auto LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
3569 const auto GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
3574 Kokkos::parallel_reduce(
3575 "Tpetra::CrsGraph::reindexColumns",
3576 Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, allocSize),
3623 for (
size_t k = 0;
k <
rowInfo.numEntries; ++
k) {
3642 getRowMap().is_null() ? Teuchos::null : getRowMap()->getComm();
3643 if (!comm.is_null()) {
3649 "It is not possible to continue."
3650 " The most likely reason is that the graph is locally indexed, but the "
3651 "column Map is missing (null) on some processes, due to a previous call "
3652 "to replaceColMap().");
3656 "On some process, the graph "
3657 "contains column indices that are in the old column Map, but not in the "
3658 "new column Map (on that process). This method does NOT redistribute "
3659 "data; it does not claim to do the work of an Import or Export operation."
3660 " This means that for all processess, the calling process MUST own all "
3661 "column indices, in both the old column Map and the new column Map. In "
3662 "this case, you will need to do an Import or Export operation to "
3663 "redistribute data.");
3666 if (isLocallyIndexed()) {
3673 if (isLocallyIndexed()) {
3680 indicesAreSorted_ =
false;
3688 const bool sorted =
false;
3689 const bool merged =
true;
3703 if (!domainMap_.is_null()) {
3704 if (!domainMap_->isSameAs(*
newColMap)) {
3707 importer_ = Teuchos::null;
3716template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3719 const char prefix[] =
"Tpetra::CrsGraph::replaceDomainMap: ";
3721 colMap_.is_null(), std::invalid_argument,
prefix <<
"You may not call "
3722 "this method unless the graph already has a column Map.");
3725 prefix <<
"The new domain Map must be nonnull.");
3728 Teuchos::RCP<const import_type>
newImporter = Teuchos::null;
3735template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3738 const Teuchos::RCP<const import_type>&
newImporter) {
3739 const char prefix[] =
"Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
3741 colMap_.is_null(), std::invalid_argument,
prefix <<
"You may not call "
3742 "this method unless the graph already has a column Map.");
3745 prefix <<
"The new domain Map must be nonnull.");
3755 "If the new Import is null, "
3756 "then the new domain Map must be the same as the current column Map.");
3759 colMap_->isSameAs(*(
newImporter->getTargetMap()));
3764 "new Import is nonnull, then the current column Map must be the same "
3765 "as the new Import's target Map, and the new domain Map must be the "
3766 "same as the new Import's source Map.");
3771 importer_ = Teuchos::rcp_const_cast<import_type>(
newImporter);
3774template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3777 const char prefix[] =
"Tpetra::CrsGraph::replaceRangeMap: ";
3779 rowMap_.is_null(), std::invalid_argument,
prefix <<
"You may not call "
3780 "this method unless the graph already has a row Map.");
3783 prefix <<
"The new range Map must be nonnull.");
3786 Teuchos::RCP<const export_type>
newExporter = Teuchos::null;
3793template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3796 const Teuchos::RCP<const export_type>&
newExporter) {
3797 const char prefix[] =
"Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
3799 rowMap_.is_null(), std::invalid_argument,
prefix <<
"You may not call "
3800 "this method unless the graph already has a column Map.");
3803 prefix <<
"The new domain Map must be nonnull.");
3813 "If the new Export is null, "
3814 "then the new range Map must be the same as the current row Map.");
3819 rowMap_->isSameAs(*(
newExporter->getSourceMap()));
3822 "new Export is nonnull, then the current row Map must be the same "
3823 "as the new Export's source Map, and the new range Map must be the "
3824 "same as the new Export's target Map.");
3829 exporter_ = Teuchos::rcp_const_cast<export_type>(
newExporter);
3832template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3837 lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
3838 this->getRowPtrsPackedDevice());
3841template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3845 return local_graph_host_type(
3846 lclIndsPacked_wdv.getHostView(Access::ReadWrite),
3847 this->getRowPtrsPackedHost());
3850template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3853 using Teuchos::ArrayView;
3854 using Teuchos::outArg;
3855 using Teuchos::reduceAll;
3856 using ::Tpetra::Details::ProfilingRegion;
3859 ProfilingRegion
regionCGC(
"Tpetra::CrsGraph::computeGlobalConstants");
3861 this->computeLocalConstants();
3866 if (!this->haveGlobalConstants_) {
3867 const Teuchos::Comm<int>& comm = *(this->getComm());
3881 lcl =
static_cast<GST>(this->getLocalNumEntries());
3884 this->globalNumEntries_ =
gbl;
3888 outArg(this->globalMaxNumRowEntries_));
3889 this->haveGlobalConstants_ =
true;
3893template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3896 using ::Tpetra::Details::ProfilingRegion;
3898 ProfilingRegion
regionCLC(
"Tpetra::CrsGraph::computeLocalConstants");
3899 if (this->haveLocalConstants_) {
3904 this->nodeMaxNumRowEntries_ =
3905 Teuchos::OrdinalTraits<size_t>::invalid();
3909 auto ptr = this->getRowPtrsPackedDevice();
3910 const LO
lclNumRows =
ptr.extent(0) == 0 ?
static_cast<LO
>(0) : (
static_cast<LO
>(
ptr.extent(0)) -
static_cast<LO
>(1));
3913 ::Tpetra::Details::maxDifference(
"Tpetra::CrsGraph: nodeMaxNumRowEntries",
3916 this->haveLocalConstants_ =
true;
3919template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3920std::pair<size_t, std::string>
3925 using Teuchos::arcp;
3926 using Teuchos::Array;
3930 typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
3931 typedef typename num_row_entries_type::non_const_value_type
num_ent_type;
3935 std::unique_ptr<std::string>
prefix;
3937 prefix = this->createPrefix(
"CrsGraph",
"makeIndicesLocal");
3938 std::ostringstream
os;
3939 os << *
prefix <<
"lclNumRows: " << getLocalNumRows() <<
endl;
3940 std::cerr <<
os.str();
3946 "The graph does not have a "
3947 "column Map yet. This method should never be called in that case. "
3948 "Please report this bug to the Tpetra developers.");
3951 "that it has a column Map, because hasColMap() returns true. However, "
3952 "the result of getColMap() is null. This should never happen. Please "
3953 "report this bug to the Tpetra developers.");
3961 const LO
lclNumRows =
static_cast<LO
>(this->getLocalNumRows());
3964 if (this->isGloballyIndexed() &&
lclNumRows != 0) {
3966 typename num_row_entries_type::const_type
h_numRowEnt =
3967 this->k_numRowEntries_;
3973 errStrm <<
"Unpacked row pointers (rowPtrsUnpacked_dev_) has length 0. This should never "
3974 "happen here. Please report this bug to the Tpetra developers."
3977 return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid(),
3989 using Kokkos::view_alloc;
3990 using Kokkos::WithoutInitializing;
4000 const std::string label(
"Tpetra::CrsGraph::lclInd");
4002 std::ostringstream
os;
4003 os << *
prefix <<
"(Re)allocate lclInd_wdv: old="
4004 << lclIndsUnpacked_wdv.extent(0) <<
", new=" <<
numEnt <<
endl;
4005 std::cerr <<
os.str();
4022 std::ostringstream
os;
4023 os << *
prefix <<
"Allocate device mirror k_numRowEnt: "
4025 std::cerr <<
os.str();
4030 using ::Tpetra::Details::convertColumnIndicesFromGlobalToLocal;
4033 lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4034 gblInds_wdv.getDeviceView(Access::ReadOnly),
4035 this->getRowPtrsUnpackedDevice(),
4039 const int myRank = [
this]() {
4040 auto map = this->getMap();
4041 if (
map.is_null()) {
4044 auto comm =
map->getComm();
4045 return comm.is_null() ? 0 : comm->getRank();
4049 errStrm <<
"(Process " <<
myRank <<
") When converting column "
4050 "indices from global to local, we encountered "
4054 <<
" not live in the column Map on this process." <<
endl;
4061 std::ostringstream
os;
4062 os << *
prefix <<
"Free gblInds_wdv: "
4063 << gblInds_wdv.extent(0) <<
endl;
4064 std::cerr <<
os.str();
4069 this->indicesAreLocal_ =
true;
4070 this->indicesAreGlobal_ =
false;
4071 this->checkInternalState();
4076template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4078 makeColMap(Teuchos::Array<int>& remotePIDs) {
4084 std::unique_ptr<std::string>
prefix;
4087 std::ostringstream
os;
4089 std::cerr <<
os.str();
4097 Teuchos::RCP<const map_type>
colMap = this->colMap_;
4099 this->sortGhostsAssociatedWithEachProcessor_;
4109 using Teuchos::outArg;
4110 using Teuchos::REDUCE_MIN;
4111 using Teuchos::reduceAll;
4117 auto comm = this->getComm();
4118 if (!comm.is_null()) {
4124 std::ostringstream
os;
4127 ": An error happened on at "
4128 "least one process in the CrsGraph's communicator. "
4129 "Here are all processes' error messages:"
4143 checkInternalState();
4145 std::ostringstream
os;
4147 std::cerr <<
os.str();
4151template <
class execution_space,
class LO,
class rowptr_type,
class colinds_type,
class numRowEntries_type>
4153 using ATS = KokkosKernels::ArithTraits<LO>;
4154 const auto unused = ATS::max();
4159 Kokkos::parallel_for(
4167template <
class execution_space,
class LO,
class rowptr_type,
class colinds_type,
class numRowEntries_type>
4168void mergeUnpackedGraph(rowptr_type rowptr, colinds_type colinds, numRowEntries_type numRowEntries) {
4172 auto numRows = rowptr.extent(0) - 1;
4178 Kokkos::parallel_for(
4179 "merge_entries", Kokkos::RangePolicy<execution_space>(0, numRows), KOKKOS_LAMBDA(
const LO rlid) {
4180 auto rowNNZ = numRowEntries(rlid);
4184 auto rowBegin = rowptr(rlid);
4185 auto pos = rowBegin;
4186 for (
size_t offset = rowBegin + 1; offset < rowBegin + rowNNZ; ++offset) {
4187 if ((colinds(offset) != colinds(pos))) {
4188 colinds(++pos) = colinds(offset);
4191 numRowEntries(rlid) = pos + 1 - rowBegin;
4195template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4196void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4197 sortAndMergeAllIndices(
const bool sorted,
const bool merged) {
4199 const char tfecfFuncName[] =
"sortAndMergeAllIndices";
4200 Details::ProfilingRegion regionSortAndMerge(
"Tpetra::CrsGraph::sortAndMergeAllIndices");
4202 std::unique_ptr<std::string> prefix;
4204 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
4205 std::ostringstream os;
4206 os << *prefix <<
"Start: "
4207 <<
"sorted=" << (sorted ?
"true" :
"false")
4208 <<
", merged=" << (merged ?
"true" :
"false") << endl;
4209 std::cerr << os.str();
4211 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isGloballyIndexed(), std::logic_error,
4212 "This method may only be called after makeIndicesLocal.");
4213 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!merged && this->isStorageOptimized(), std::logic_error,
4214 "The graph is already storage optimized, so we shouldn't be "
4215 "merging any indices. "
4216 "Please report this bug to the Tpetra developers.");
4218 if (!sorted || !merged) {
4219 if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
4222 auto rowptr = rowPtrsUnpacked_dev_;
4223 auto colinds = lclIndsUnpacked_wdv.getDeviceView(Access::ReadWrite);
4226 auto k_numRowEntries_d = Kokkos::create_mirror_view_and_copy(execution_space(), k_numRowEntries_);
4229 prepareSortMergeUnpackedGraph<execution_space, LocalOrdinal>(rowptr, colinds, k_numRowEntries_d);
4232 KokkosSparse::sort_crs_graph(rowptr, colinds);
4233 this->indicesAreSorted_ =
true;
4236 mergeUnpackedGraph<execution_space, LocalOrdinal>(rowptr, colinds, k_numRowEntries_d);
4237 Kokkos::deep_copy(k_numRowEntries_, k_numRowEntries_d);
4238 this->noRedundancies_ =
true;
4241 auto rowptr = rowPtrsPacked_dev_;
4242 auto colinds = lclIndsPacked_wdv.getDeviceView(Access::ReadWrite);
4243 if (!sorted && merged) {
4244 KokkosSparse::sort_crs_graph(rowptr, colinds);
4245 this->indicesAreSorted_ =
true;
4247 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::logic_error,
4248 "We should never get here."
4249 "Please report this bug to the Tpetra developers.");
4255 std::ostringstream os;
4256 os << *prefix <<
"Done" << endl;
4257 std::cerr << os.str();
4261template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4265 using Teuchos::ParameterList;
4268 using ::Tpetra::Details::ProfilingRegion;
4270 ProfilingRegion
regionMIE(
"Tpetra::CrsGraph::makeImportExport");
4273 "This method may not be called unless the graph has a column Map.");
4283 if (importer_.is_null()) {
4285 if (domainMap_ != colMap_ && (!domainMap_->isSameAs(*colMap_))) {
4286 if (
params.is_null() || !
params->isSublist(
"Import")) {
4288 importer_ =
rcp(
new import_type(domainMap_, colMap_, remotePIDs));
4308 if (exporter_.is_null()) {
4310 if (rangeMap_ != rowMap_ && !rangeMap_->isSameAs(*rowMap_)) {
4311 if (
params.is_null() || !
params->isSublist(
"Export")) {
4321template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4325 std::ostringstream
oss;
4326 oss << dist_object_type::description();
4327 if (isFillComplete()) {
4328 oss <<
"{status = fill complete"
4329 <<
", global rows = " << getGlobalNumRows()
4330 <<
", global cols = " << getGlobalNumCols()
4331 <<
", global num entries = " << getGlobalNumEntries()
4334 oss <<
"{status = fill not complete"
4335 <<
", global rows = " << getGlobalNumRows()
4341template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4344 const Teuchos::EVerbosityLevel
verbLevel)
const {
4347 using Teuchos::ArrayView;
4348 using Teuchos::Comm;
4350 using Teuchos::VERB_DEFAULT;
4351 using Teuchos::VERB_EXTREME;
4352 using Teuchos::VERB_HIGH;
4353 using Teuchos::VERB_LOW;
4354 using Teuchos::VERB_MEDIUM;
4355 using Teuchos::VERB_NONE;
4363 for (
size_t dec = 10;
dec < getGlobalNumRows();
dec *= 10) {
4366 width = std::max<size_t>(
width,
static_cast<size_t>(11)) + 2;
4376 if (
myImageID == 0)
out << this->description() << std::endl;
4378 if (isFillComplete() &&
myImageID == 0) {
4379 out <<
"Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4384 rowMap_->describe(
out,
vl);
4385 if (colMap_ != Teuchos::null) {
4386 if (
myImageID == 0)
out <<
"\nColumn map: " << std::endl;
4387 colMap_->describe(
out,
vl);
4389 if (domainMap_ != Teuchos::null) {
4390 if (
myImageID == 0)
out <<
"\nDomain map: " << std::endl;
4391 domainMap_->describe(
out,
vl);
4393 if (rangeMap_ != Teuchos::null) {
4394 if (
myImageID == 0)
out <<
"\nRange map: " << std::endl;
4395 rangeMap_->describe(
out,
vl);
4403 <<
"Node number of entries = " << this->getLocalNumEntries() << std::endl
4404 <<
"Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4405 if (!indicesAreAllocated()) {
4406 out <<
"Indices are not allocated." << std::endl;
4418 out << std::setw(
width) <<
"Node ID"
4419 << std::setw(
width) <<
"Global Row"
4420 << std::setw(
width) <<
"Num Entries";
4435 if (isGloballyIndexed()) {
4436 auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4437 for (
size_t j = 0;
j <
rowinfo.numEntries; ++
j) {
4441 }
else if (isLocallyIndexed()) {
4442 auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4443 for (
size_t j = 0;
j <
rowinfo.numEntries; ++
j) {
4445 out << colMap_->getGlobalElement(
collid) <<
" ";
4460template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4469template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4472 const size_t numSameIDs,
4483 const bool verbose = verbose_;
4493 std::unique_ptr<std::string>
prefix;
4495 prefix = this->createPrefix(
"CrsGraph",
"copyAndPermute");
4496 std::ostringstream
os;
4498 std::cerr <<
os.str();
4502 std::runtime_error,
"permuteToLIDs.extent(0) = " <<
permuteToLIDs.extent(0) <<
" != permuteFromLIDs.extent(0) = " <<
permuteFromLIDs.extent(0) <<
".");
4510 std::ostringstream
os;
4512 std::cerr <<
os.str();
4516 applyCrsPadding(*
padding, verbose);
4527 nonconst_global_inds_host_view_type
row_copy;
4535 std::ostringstream
os;
4536 os << *
prefix <<
"src_filled || srcCrsGraph == nullptr" <<
endl;
4537 std::cerr <<
os.str();
4544 for (
size_t i = 0;
i < numSameIDs; ++
i, ++
myid) {
4554 std::ostringstream
os;
4555 os << *
prefix <<
"! src_filled && srcCrsGraph != nullptr" <<
endl;
4556 std::cerr <<
os.str();
4558 for (
size_t i = 0;
i < numSameIDs; ++
i, ++
myid) {
4560 global_inds_host_view_type row;
4562 this->insertGlobalIndices(
gid, row.extent(0), row.data());
4586 global_inds_host_view_type row;
4588 this->insertGlobalIndices(
mygid, row.extent(0), row.data());
4593 std::ostringstream
os;
4595 std::cerr <<
os.str();
4599template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4602 const bool verbose) {
4606 using LO = local_ordinal_type;
4607 using row_ptrs_type =
4608 typename local_graph_device_type::row_map_type::non_const_type;
4609 using range_policy =
4610 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4612 ProfilingRegion
regionCAP(
"Tpetra::CrsGraph::applyCrsPadding");
4614 std::unique_ptr<std::string>
prefix;
4617 std::ostringstream
os;
4621 std::cerr <<
os.str();
4623 const int myRank = !verbose ? -1 : [&]() {
4624 auto map = this->getMap();
4625 if (map.is_null()) {
4628 auto comm = map->getComm();
4629 if (comm.is_null()) {
4632 return comm->getRank();
4641 if (!indicesAreAllocated()) {
4643 std::ostringstream os;
4644 os << *prefix <<
"Call allocateIndices" << endl;
4645 std::cerr << os.str();
4647 allocateIndices(GlobalIndices, verbose);
4649 TEUCHOS_ASSERT(indicesAreAllocated());
4654 auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
4656 std::ostringstream os;
4657 os << *prefix <<
"Allocate row_ptrs_beg: "
4658 << rowPtrsUnpacked_dev.extent(0) << endl;
4659 std::cerr << os.str();
4661 using Kokkos::view_alloc;
4662 using Kokkos::WithoutInitializing;
4663 row_ptrs_type row_ptrs_beg(
4664 view_alloc(
"row_ptrs_beg", WithoutInitializing),
4665 rowPtrsUnpacked_dev.extent(0));
4667 Kokkos::deep_copy(execution_space(), row_ptrs_beg, rowPtrsUnpacked_dev);
4669 const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) : size_t(row_ptrs_beg.extent(0) - 1);
4671 std::ostringstream os;
4672 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
4673 std::cerr << os.str();
4675 row_ptrs_type row_ptrs_end(
4676 view_alloc(
"row_ptrs_end", WithoutInitializing), N);
4677 row_ptrs_type num_row_entries;
4679 const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
4681 execution_space().fence();
4683 if (refill_num_row_entries) {
4687 row_ptrs_type(view_alloc(
"num_row_entries", WithoutInitializing), N);
4688 Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
4689 Kokkos::parallel_for(
4690 "Fill end row pointers", range_policy(0, N),
4691 KOKKOS_LAMBDA(
const size_t i) {
4692 row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
4698 Kokkos::parallel_for(
4699 "Fill end row pointers", range_policy(0, N),
4700 KOKKOS_LAMBDA(
const size_t i) {
4701 row_ptrs_end(i) = row_ptrs_beg(i + 1);
4705 if (isGloballyIndexed()) {
4707 padding, myRank, verbose);
4709 padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
4710 padding, myRank, verbose);
4713 if (refill_num_row_entries) {
4714 Kokkos::parallel_for(
4715 "Fill num entries", range_policy(0, N),
4716 KOKKOS_LAMBDA(
const size_t i) {
4717 num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
4719 Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
4722 std::ostringstream os;
4723 os << *prefix <<
"Reassign k_rowPtrs_; old size: "
4724 << rowPtrsUnpacked_dev.extent(0) <<
", new size: "
4725 << row_ptrs_beg.extent(0) << endl;
4726 std::cerr << os.str();
4727 TEUCHOS_ASSERT(rowPtrsUnpacked_dev.extent(0) == row_ptrs_beg.extent(0));
4730 setRowPtrsUnpacked(row_ptrs_beg);
4733template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4735 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4736CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4738 const RowGraph<LocalOrdinal, GlobalOrdinal, Node>& source,
4739 const size_t numSameIDs,
4740 const Kokkos::DualView<
const local_ordinal_type*,
4741 buffer_device_type>& permuteToLIDs,
4742 const Kokkos::DualView<
const local_ordinal_type*,
4743 buffer_device_type>& permuteFromLIDs,
4744 const bool verbose)
const {
4748 std::unique_ptr<std::string> prefix;
4751 "computeCrsPadding(same & permute)");
4752 std::ostringstream os;
4753 os << *prefix <<
"{numSameIDs: " << numSameIDs
4754 <<
", numPermutes: " << permuteFromLIDs.extent(0) <<
"}"
4756 std::cerr << os.str();
4759 const int myRank = [&]() {
4760 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
4761 return comm.is_null() ? -1 : comm->getRank();
4763 std::unique_ptr<padding_type> padding(
4764 new padding_type(myRank, numSameIDs,
4765 permuteFromLIDs.extent(0)));
4767 computeCrsPaddingForSameIDs(*padding, source,
4768 static_cast<LO
>(numSameIDs));
4769 computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
4774template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4775void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4776 computeCrsPaddingForSameIDs(
4777 padding_type& padding,
4778 const RowGraph<local_ordinal_type, global_ordinal_type,
4780 const local_ordinal_type numSameIDs)
const {
4782 using GO = global_ordinal_type;
4783 using Details::Impl::getRowGraphGlobalRow;
4785 const char tfecfFuncName[] =
"computeCrsPaddingForSameIds";
4787 std::unique_ptr<std::string> prefix;
4788 const bool verbose = verbose_;
4790 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
4791 std::ostringstream os;
4792 os << *prefix <<
"numSameIDs: " << numSameIDs << endl;
4793 std::cerr << os.str();
4796 if (numSameIDs == 0) {
4800 const map_type& srcRowMap = *(source.getRowMap());
4801 const map_type& tgtRowMap = *rowMap_;
4802 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
4803 const this_CRS_type* srcCrs =
dynamic_cast<const this_CRS_type*
>(&source);
4804 const bool src_is_unique =
4805 srcCrs ==
nullptr ? false : srcCrs->isMerged();
4806 const bool tgt_is_unique = this->isMerged();
4808 std::vector<GO> srcGblColIndsScratch;
4809 std::vector<GO> tgtGblColIndsScratch;
4811 execute_sync_host_uvm_access();
4812 for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
4813 const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
4814 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
4815 auto srcGblColInds = getRowGraphGlobalRow(
4816 srcGblColIndsScratch, source, srcGblRowInd);
4817 auto tgtGblColInds = getRowGraphGlobalRow(
4818 tgtGblColIndsScratch, *
this, tgtGblRowInd);
4819 padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
4820 tgtGblColInds.size(), tgt_is_unique,
4821 srcGblColInds.getRawPtr(),
4822 srcGblColInds.size(), src_is_unique);
4825 std::ostringstream os;
4826 os << *prefix <<
"Done" << endl;
4827 std::cerr << os.str();
4831template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4832void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4833 computeCrsPaddingForPermutedIDs(
4834 padding_type& padding,
4835 const RowGraph<local_ordinal_type, global_ordinal_type,
4837 const Kokkos::DualView<
const local_ordinal_type*,
4838 buffer_device_type>& permuteToLIDs,
4839 const Kokkos::DualView<
const local_ordinal_type*,
4840 buffer_device_type>& permuteFromLIDs)
const {
4842 using GO = global_ordinal_type;
4843 using Details::Impl::getRowGraphGlobalRow;
4845 const char tfecfFuncName[] =
"computeCrsPaddingForPermutedIds";
4847 std::unique_ptr<std::string> prefix;
4848 const bool verbose = verbose_;
4850 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
4851 std::ostringstream os;
4852 os << *prefix <<
"permuteToLIDs.extent(0): "
4853 << permuteToLIDs.extent(0)
4854 <<
", permuteFromLIDs.extent(0): "
4855 << permuteFromLIDs.extent(0) << endl;
4856 std::cerr << os.str();
4859 if (permuteToLIDs.extent(0) == 0) {
4863 const map_type& srcRowMap = *(source.getRowMap());
4864 const map_type& tgtRowMap = *rowMap_;
4865 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
4866 const this_CRS_type* srcCrs =
dynamic_cast<const this_CRS_type*
>(&source);
4867 const bool src_is_unique =
4868 srcCrs ==
nullptr ? false : srcCrs->isMerged();
4869 const bool tgt_is_unique = this->isMerged();
4871 TEUCHOS_ASSERT(!permuteToLIDs.need_sync_host());
4872 auto permuteToLIDs_h = permuteToLIDs.view_host();
4873 TEUCHOS_ASSERT(!permuteFromLIDs.need_sync_host());
4874 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
4876 std::vector<GO> srcGblColIndsScratch;
4877 std::vector<GO> tgtGblColIndsScratch;
4878 const LO numPermutes =
static_cast<LO
>(permuteToLIDs_h.extent(0));
4880 execute_sync_host_uvm_access();
4881 for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
4882 const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
4883 const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
4884 auto srcGblColInds = getRowGraphGlobalRow(
4885 srcGblColIndsScratch, source, srcGblRowInd);
4886 const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
4887 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
4888 auto tgtGblColInds = getRowGraphGlobalRow(
4889 tgtGblColIndsScratch, *
this, tgtGblRowInd);
4890 padding.update_permute(whichPermute, tgtLclRowInd,
4891 tgtGblColInds.getRawPtr(),
4892 tgtGblColInds.size(), tgt_is_unique,
4893 srcGblColInds.getRawPtr(),
4894 srcGblColInds.size(), src_is_unique);
4898 std::ostringstream os;
4899 os << *prefix <<
"Done" << endl;
4900 std::cerr << os.str();
4904template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4906 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4907CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4908 computeCrsPaddingForImports(
4909 const Kokkos::DualView<
const local_ordinal_type*,
4910 buffer_device_type>& importLIDs,
4911 Kokkos::DualView<packet_type*, buffer_device_type> imports,
4912 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
4913 const bool verbose)
const {
4914 using Details::Impl::getRowGraphGlobalRow;
4917 using GO = global_ordinal_type;
4918 const char tfecfFuncName[] =
"computeCrsPaddingForImports";
4920 std::unique_ptr<std::string> prefix;
4922 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
4923 std::ostringstream os;
4924 os << *prefix <<
"importLIDs.extent(0): "
4925 << importLIDs.extent(0)
4926 <<
", imports.extent(0): "
4927 << imports.extent(0)
4928 <<
", numPacketsPerLID.extent(0): "
4929 << numPacketsPerLID.extent(0) << endl;
4930 std::cerr << os.str();
4933 const LO numImports =
static_cast<LO
>(importLIDs.extent(0));
4934 const int myRank = [&]() {
4935 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
4936 return comm.is_null() ? -1 : comm->getRank();
4938 std::unique_ptr<padding_type> padding(
4939 new padding_type(myRank, numImports));
4941 if (imports.need_sync_host()) {
4942 imports.sync_host();
4944 auto imports_h = imports.view_host();
4945 if (numPacketsPerLID.need_sync_host()) {
4946 numPacketsPerLID.sync_host();
4948 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
4950 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
4951 auto importLIDs_h = importLIDs.view_host();
4953 const map_type& tgtRowMap = *rowMap_;
4957 constexpr bool src_is_unique =
false;
4958 const bool tgt_is_unique = isMerged();
4960 std::vector<GO> tgtGblColIndsScratch;
4962 execute_sync_host_uvm_access();
4963 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
4968 const LO origSrcNumEnt =
4969 static_cast<LO
>(numPacketsPerLID_h[whichImport]);
4970 GO*
const srcGblColInds = imports_h.data() + offset;
4972 const LO tgtLclRowInd = importLIDs_h[whichImport];
4973 const GO tgtGblRowInd =
4974 tgtRowMap.getGlobalElement(tgtLclRowInd);
4975 auto tgtGblColInds = getRowGraphGlobalRow(
4976 tgtGblColIndsScratch, *
this, tgtGblRowInd);
4977 const size_t origTgtNumEnt(tgtGblColInds.size());
4979 padding->update_import(whichImport, tgtLclRowInd,
4980 tgtGblColInds.getRawPtr(),
4981 origTgtNumEnt, tgt_is_unique,
4983 origSrcNumEnt, src_is_unique);
4984 offset += origSrcNumEnt;
4988 std::ostringstream os;
4989 os << *prefix <<
"Done" << endl;
4990 std::cerr << os.str();
4995template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4997 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4998CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4999 computePaddingForCrsMatrixUnpack(
5000 const Kokkos::DualView<
const local_ordinal_type*,
5001 buffer_device_type>& importLIDs,
5002 Kokkos::DualView<char*, buffer_device_type> imports,
5003 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5004 const bool verbose)
const {
5005 using Details::PackTraits;
5006 using Details::Impl::getRowGraphGlobalRow;
5009 using GO = global_ordinal_type;
5010 const char tfecfFuncName[] =
"computePaddingForCrsMatrixUnpack";
5012 std::unique_ptr<std::string> prefix;
5014 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5015 std::ostringstream os;
5016 os << *prefix <<
"importLIDs.extent(0): "
5017 << importLIDs.extent(0)
5018 <<
", imports.extent(0): "
5019 << imports.extent(0)
5020 <<
", numPacketsPerLID.extent(0): "
5021 << numPacketsPerLID.extent(0) << endl;
5022 std::cerr << os.str();
5024 const bool extraVerbose =
5027 const LO numImports =
static_cast<LO
>(importLIDs.extent(0));
5028 TEUCHOS_ASSERT(LO(numPacketsPerLID.extent(0)) >= numImports);
5029 const int myRank = [&]() {
5030 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
5031 return comm.is_null() ? -1 : comm->getRank();
5033 std::unique_ptr<padding_type> padding(
5034 new padding_type(myRank, numImports));
5036 if (imports.need_sync_host()) {
5037 imports.sync_host();
5039 auto imports_h = imports.view_host();
5040 if (numPacketsPerLID.need_sync_host()) {
5041 numPacketsPerLID.sync_host();
5043 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5045 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
5046 auto importLIDs_h = importLIDs.view_host();
5048 const map_type& tgtRowMap = *rowMap_;
5052 constexpr bool src_is_unique =
false;
5053 const bool tgt_is_unique = isMerged();
5055 std::vector<GO> srcGblColIndsScratch;
5056 std::vector<GO> tgtGblColIndsScratch;
5058 execute_sync_host_uvm_access();
5059 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5064 const size_t numBytes = numPacketsPerLID_h[whichImport];
5066 std::ostringstream os;
5067 os << *prefix <<
"whichImport=" << whichImport
5068 <<
", numImports=" << numImports
5069 <<
", numBytes=" << numBytes << endl;
5070 std::cerr << os.str();
5072 if (numBytes == 0) {
5075 LO origSrcNumEnt = 0;
5076 const size_t numEntBeg = offset;
5077 const size_t numEntLen =
5078 PackTraits<LO>::packValueCount(origSrcNumEnt);
5079 TEUCHOS_ASSERT(numBytes >= numEntLen);
5080 TEUCHOS_ASSERT(imports_h.extent(0) >= numEntBeg + numEntLen);
5081 PackTraits<LO>::unpackValue(origSrcNumEnt,
5082 imports_h.data() + numEntBeg);
5084 std::ostringstream os;
5085 os << *prefix <<
"whichImport=" << whichImport
5086 <<
", numImports=" << numImports
5087 <<
", origSrcNumEnt=" << origSrcNumEnt << endl;
5088 std::cerr << os.str();
5090 TEUCHOS_ASSERT(origSrcNumEnt >= LO(0));
5091 TEUCHOS_ASSERT(numBytes >=
size_t(numEntLen + origSrcNumEnt *
sizeof(GO)));
5092 const size_t gidsBeg = numEntBeg + numEntLen;
5093 if (srcGblColIndsScratch.size() <
size_t(origSrcNumEnt)) {
5094 srcGblColIndsScratch.resize(origSrcNumEnt);
5096 GO*
const srcGblColInds = srcGblColIndsScratch.data();
5097 PackTraits<GO>::unpackArray(srcGblColInds,
5098 imports_h.data() + gidsBeg,
5100 const LO tgtLclRowInd = importLIDs_h[whichImport];
5101 const GO tgtGblRowInd =
5102 tgtRowMap.getGlobalElement(tgtLclRowInd);
5103 auto tgtGblColInds = getRowGraphGlobalRow(
5104 tgtGblColIndsScratch, *
this, tgtGblRowInd);
5105 const size_t origNumTgtEnt(tgtGblColInds.size());
5108 std::ostringstream os;
5109 os << *prefix <<
"whichImport=" << whichImport
5110 <<
", numImports=" << numImports
5111 <<
": Call padding->update_import" << endl;
5112 std::cerr << os.str();
5114 padding->update_import(whichImport, tgtLclRowInd,
5115 tgtGblColInds.getRawPtr(),
5116 origNumTgtEnt, tgt_is_unique,
5118 origSrcNumEnt, src_is_unique);
5123 std::ostringstream os;
5124 os << *prefix <<
"Done" << endl;
5125 std::cerr << os.str();
5130template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5131void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5132 packAndPrepare(
const SrcDistObject& source,
5133 const Kokkos::DualView<
const local_ordinal_type*,
5134 buffer_device_type>& exportLIDs,
5135 Kokkos::DualView<packet_type*,
5136 buffer_device_type>& exports,
5137 Kokkos::DualView<
size_t*,
5140 size_t& constantNumPackets) {
5142 using GO = global_ordinal_type;
5144 using crs_graph_type =
5145 CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5146 const char tfecfFuncName[] =
"packAndPrepare: ";
5147 ProfilingRegion region_papn(
"Tpetra::CrsGraph::packAndPrepare");
5149 const bool verbose = verbose_;
5150 std::unique_ptr<std::string> prefix;
5152 prefix = this->
createPrefix(
"CrsGraph",
"packAndPrepare");
5153 std::ostringstream os;
5154 os << *prefix <<
"Start" << endl;
5155 std::cerr << os.str();
5158 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(exportLIDs.extent(0) != numPacketsPerLID.extent(0),
5160 "exportLIDs.extent(0) = " << exportLIDs.extent(0)
5161 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
5163 const row_graph_type* srcRowGraphPtr =
5164 dynamic_cast<const row_graph_type*
>(&source);
5165 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(srcRowGraphPtr ==
nullptr, std::invalid_argument,
5166 "Source of an Export "
5167 "or Import operation to a CrsGraph must be a RowGraph with the same "
5168 "template parameters.");
5172 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillComplete(), std::runtime_error,
5173 "The target graph of an Import or Export must not be fill complete.");
5175 const crs_graph_type* srcCrsGraphPtr =
5176 dynamic_cast<const crs_graph_type*
>(&source);
5178 if (srcCrsGraphPtr ==
nullptr) {
5179 using Teuchos::ArrayView;
5183 std::ostringstream os;
5184 os << *prefix <<
"Source is a RowGraph but not a CrsGraph"
5186 std::cerr << os.str();
5193 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5194 auto exportLIDs_h = exportLIDs.view_host();
5195 ArrayView<const LO> exportLIDs_av(exportLIDs_h.data(),
5196 exportLIDs_h.extent(0));
5197 Teuchos::Array<GO> exports_a;
5199 numPacketsPerLID.clear_sync_state();
5200 numPacketsPerLID.modify_host();
5201 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5202 ArrayView<size_t> numPacketsPerLID_av(numPacketsPerLID_h.data(),
5203 numPacketsPerLID_h.extent(0));
5204 srcRowGraphPtr->pack(exportLIDs_av, exports_a, numPacketsPerLID_av,
5205 constantNumPackets);
5206 const size_t newSize =
static_cast<size_t>(exports_a.size());
5207 if (
static_cast<size_t>(exports.extent(0)) != newSize) {
5208 using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5209 exports = exports_dv_type(
"exports", newSize);
5211 Kokkos::View<
const packet_type*, Kokkos::HostSpace,
5212 Kokkos::MemoryUnmanaged>
5213 exports_a_h(exports_a.getRawPtr(), newSize);
5214 exports.clear_sync_state();
5215 exports.modify_host();
5217 Kokkos::deep_copy(exports.view_host(), exports_a_h);
5220 else if (!getColMap().is_null() &&
5221 (this->getRowPtrsPackedDevice().extent(0) != 0 ||
5222 getRowMap()->getLocalNumElements() == 0)) {
5224 std::ostringstream os;
5225 os << *prefix <<
"packCrsGraphNew path" << endl;
5226 std::cerr << os.str();
5228 using export_pids_type =
5229 Kokkos::DualView<const int*, buffer_device_type>;
5230 export_pids_type exportPIDs;
5232 using NT = node_type;
5234 packCrsGraphNew<LO, GO, NT>(*srcCrsGraphPtr, exportLIDs, exportPIDs,
5235 exports, numPacketsPerLID,
5236 constantNumPackets,
false);
5238 srcCrsGraphPtr->packFillActiveNew(exportLIDs, exports, numPacketsPerLID,
5239 constantNumPackets);
5243 std::ostringstream os;
5244 os << *prefix <<
"Done" << endl;
5245 std::cerr << os.str();
5249template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5252 Teuchos::Array<GlobalOrdinal>& exports,
5255 auto col_map = this->getColMap();
5257 if (!
col_map.is_null() && (
this->getRowPtrsPackedDevice().extent(0) != 0 || getRowMap()->getLocalNumElements() == 0)) {
5267template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5270 Teuchos::Array<GlobalOrdinal>& exports,
5276 using host_execution_space =
5277 typename Kokkos::View<size_t*, device_type>::
5278 host_mirror_type::execution_space;
5280 const bool verbose = verbose_;
5283 std::unique_ptr<std::string>
prefix;
5285 prefix = this->createPrefix(
"CrsGraph",
"allocateIndices");
5286 std::ostringstream
os;
5288 std::cerr <<
os.str();
5290 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs != numPacketsPerLID.size(), std::runtime_error,
5291 "exportLIDs.size() = " << numExportLIDs <<
" != numPacketsPerLID.size()"
5293 << numPacketsPerLID.size() <<
".");
5295 const map_type&
rowMap = *(this->getRowMap());
5296 const map_type*
const colMapPtr = this->colMap_.getRawPtr();
5297 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() && colMapPtr ==
nullptr, std::logic_error,
5298 "This graph claims to be locally indexed, but its column Map is nullptr. "
5299 "This should never happen. Please report this bug to the Tpetra "
5303 constantNumPackets = 0;
5307 size_t*
const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr();
5308 const LO*
const exportLIDs_raw = exportLIDs.getRawPtr();
5315 Kokkos::RangePolicy<host_execution_space, LO> inputRange(0, numExportLIDs);
5316 size_t totalNumPackets = 0;
5317 size_t errCount = 0;
5320 typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5322 Kokkos::View<size_t, host_device_type> errCountView(&errCount);
5323 constexpr size_t ONE = 1;
5325 execute_sync_host_uvm_access();
5326 Kokkos::parallel_reduce(
5327 "Tpetra::CrsGraph::pack: totalNumPackets",
5329 [=, *
this](
const LO& i,
size_t& curTotalNumPackets) {
5330 const GO gblRow =
rowMap.getGlobalElement(exportLIDs_raw[i]);
5331 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid()) {
5332 Kokkos::atomic_add(&errCountView(), ONE);
5333 numPacketsPerLID_raw[i] = 0;
5335 const size_t numEnt = this->getNumEntriesInGlobalRow(gblRow);
5336 numPacketsPerLID_raw[i] = numEnt;
5337 curTotalNumPackets += numEnt;
5343 std::ostringstream os;
5344 os << *prefix <<
"totalNumPackets=" << totalNumPackets << endl;
5345 std::cerr << os.str();
5347 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5348 "totalNumPackets count encountered "
5349 "one or more errors! errCount = "
5351 <<
", totalNumPackets = " << totalNumPackets <<
".");
5355 exports.resize(totalNumPackets);
5357 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->supportsRowViews(), std::logic_error,
5358 "this->supportsRowViews() returns false; this should never happen. "
5359 "Please report this bug to the Tpetra developers.");
5365 std::ostringstream os;
5366 os << *prefix <<
"Pack into exports" << endl;
5367 std::cerr << os.str();
5372 GO*
const exports_raw = exports.getRawPtr();
5374 Kokkos::parallel_scan(
"Tpetra::CrsGraph::pack: pack from views",
5375 inputRange, [=, &prefix, *
this](
const LO i,
size_t& exportsOffset,
const bool final) {
5376 const size_t curOffset = exportsOffset;
5377 const GO gblRow =
rowMap.getGlobalElement(exportLIDs_raw[i]);
5378 const RowInfo rowInfo =
5379 this->getRowInfoFromGlobalRowIndex(gblRow);
5381 using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5382 if (rowInfo.localRow == TDO::invalid()) {
5384 std::ostringstream os;
5385 os << *prefix <<
": INVALID rowInfo: i=" << i
5386 <<
", lclRow=" << exportLIDs_raw[i] << endl;
5387 std::cerr << os.str();
5389 Kokkos::atomic_add(&errCountView(), ONE);
5390 }
else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5392 std::ostringstream os;
5393 os << *prefix <<
": UH OH! For i=" << i <<
", lclRow="
5394 << exportLIDs_raw[i] <<
", gblRow=" << gblRow <<
", curOffset "
5396 << curOffset <<
") + numEnt (= " << rowInfo.numEntries
5397 <<
") > totalNumPackets (= " << totalNumPackets <<
")."
5399 std::cerr << os.str();
5401 Kokkos::atomic_add(&errCountView(), ONE);
5403 const LO numEnt =
static_cast<LO
>(rowInfo.numEntries);
5404 if (this->isLocallyIndexed()) {
5405 auto lclColInds = getLocalIndsViewHost(rowInfo);
5407 for (LO k = 0; k < numEnt; ++k) {
5408 const LO lclColInd = lclColInds(k);
5409 const GO gblColInd = colMapPtr->getGlobalElement(lclColInd);
5413 exports_raw[curOffset + k] = gblColInd;
5416 exportsOffset = curOffset + numEnt;
5417 }
else if (this->isGloballyIndexed()) {
5418 auto gblColInds = getGlobalIndsViewHost(rowInfo);
5420 for (LO k = 0; k < numEnt; ++k) {
5421 const GO gblColInd = gblColInds(k);
5425 exports_raw[curOffset + k] = gblColInd;
5428 exportsOffset = curOffset + numEnt;
5436 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5437 "Packing encountered "
5438 "one or more errors! errCount = "
5440 <<
", totalNumPackets = " << totalNumPackets <<
".");
5443 std::ostringstream os;
5444 os << *prefix <<
"Done" << endl;
5445 std::cerr << os.str();
5449template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5450void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5451 packFillActiveNew(
const Kokkos::DualView<
const local_ordinal_type*,
5452 buffer_device_type>& exportLIDs,
5453 Kokkos::DualView<packet_type*,
5454 buffer_device_type>& exports,
5455 Kokkos::DualView<
size_t*,
5458 size_t& constantNumPackets)
const {
5461 using GO = global_ordinal_type;
5462 using host_execution_space =
typename Kokkos::View<
size_t*,
5463 device_type>::host_mirror_type::execution_space;
5464 using host_device_type =
5465 Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5466 using exports_dv_type =
5467 Kokkos::DualView<packet_type*, buffer_device_type>;
5468 const char tfecfFuncName[] =
"packFillActiveNew: ";
5469 const bool verbose = verbose_;
5471 const auto numExportLIDs = exportLIDs.extent(0);
5472 std::unique_ptr<std::string> prefix;
5474 prefix = this->
createPrefix(
"CrsGraph",
"packFillActiveNew");
5475 std::ostringstream os;
5476 os << *prefix <<
"numExportLIDs: " << numExportLIDs
5477 <<
", numPacketsPerLID.extent(0): "
5478 << numPacketsPerLID.extent(0) << endl;
5479 std::cerr << os.str();
5481 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs != numPacketsPerLID.extent(0), std::runtime_error,
5482 "exportLIDs.extent(0) = " << numExportLIDs
5483 <<
" != numPacketsPerLID.extent(0) = "
5484 << numPacketsPerLID.extent(0) <<
".");
5485 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5486 auto exportLIDs_h = exportLIDs.view_host();
5488 const map_type&
rowMap = *(this->getRowMap());
5489 const map_type*
const colMapPtr = this->colMap_.getRawPtr();
5490 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() && colMapPtr ==
nullptr, std::logic_error,
5491 "This graph claims to be locally indexed, but its column Map is nullptr. "
5492 "This should never happen. Please report this bug to the Tpetra "
5496 constantNumPackets = 0;
5498 numPacketsPerLID.clear_sync_state();
5499 numPacketsPerLID.modify_host();
5500 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5507 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5508 range_type inputRange(0, numExportLIDs);
5509 size_t totalNumPackets = 0;
5510 size_t errCount = 0;
5513 Kokkos::View<size_t, host_device_type> errCountView(&errCount);
5514 constexpr size_t ONE = 1;
5517 std::ostringstream os;
5518 os << *prefix <<
"Compute totalNumPackets" << endl;
5519 std::cerr << os.str();
5522 execute_sync_host_uvm_access();
5523 totalNumPackets = 0;
5524 for (
size_t i = 0; i < numExportLIDs; ++i) {
5525 const LO lclRow = exportLIDs_h[i];
5526 const GO gblRow =
rowMap.getGlobalElement(lclRow);
5527 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid()) {
5529 std::ostringstream os;
5530 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
5531 <<
" not in row Map on this process" << endl;
5532 std::cerr << os.str();
5534 Kokkos::atomic_add(&errCountView(), ONE);
5535 numPacketsPerLID_h(i) = 0;
5537 const size_t numEnt = this->getNumEntriesInGlobalRow(gblRow);
5538 numPacketsPerLID_h(i) = numEnt;
5539 totalNumPackets += numEnt;
5544 std::ostringstream os;
5545 os << *prefix <<
"totalNumPackets: " << totalNumPackets
5546 <<
", errCount: " << errCount << endl;
5547 std::cerr << os.str();
5549 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5550 "totalNumPackets count encountered "
5551 "one or more errors! totalNumPackets: "
5553 <<
", errCount: " << errCount <<
".");
5556 if (
size_t(exports.extent(0)) < totalNumPackets) {
5558 exports = exports_dv_type(
"exports", totalNumPackets);
5561 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->supportsRowViews(), std::logic_error,
5562 "this->supportsRowViews() returns false; this should never happen. "
5563 "Please report this bug to the Tpetra developers.");
5569 std::ostringstream os;
5570 os << *prefix <<
"Pack into exports buffer" << endl;
5571 std::cerr << os.str();
5574 exports.clear_sync_state();
5575 exports.modify_host();
5576 auto exports_h = exports.view_host();
5582 if (isLocallyIndexed())
5583 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5584 else if (isGloballyIndexed())
5585 gblInds_wdv.getHostView(Access::ReadOnly);
5588 Kokkos::parallel_scan(
"Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5589 inputRange, [=, &prefix, *
this](
const LO i,
size_t& exportsOffset,
const bool final) {
5590 const size_t curOffset = exportsOffset;
5591 const LO lclRow = exportLIDs_h(i);
5592 const GO gblRow =
rowMap.getGlobalElement(lclRow);
5593 if (gblRow == Details::OrdinalTraits<GO>::invalid()) {
5595 std::ostringstream os;
5596 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
5597 <<
" not in row Map on this process" << endl;
5598 std::cerr << os.str();
5600 Kokkos::atomic_add(&errCountView(), ONE);
5604 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(gblRow);
5605 if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid()) {
5607 std::ostringstream os;
5608 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
5609 <<
", gblRow=" << gblRow <<
": invalid rowInfo"
5611 std::cerr << os.str();
5613 Kokkos::atomic_add(&errCountView(), ONE);
5617 if (curOffset + rowInfo.numEntries > totalNumPackets) {
5619 std::ostringstream os;
5620 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
5621 <<
", gblRow=" << gblRow <<
", curOffset (= "
5622 << curOffset <<
") + numEnt (= " << rowInfo.numEntries
5623 <<
") > totalNumPackets (= " << totalNumPackets
5625 std::cerr << os.str();
5627 Kokkos::atomic_add(&errCountView(), ONE);
5631 const LO numEnt =
static_cast<LO
>(rowInfo.numEntries);
5632 if (this->isLocallyIndexed()) {
5633 auto lclColInds = getLocalIndsViewHost(rowInfo);
5635 for (LO k = 0; k < numEnt; ++k) {
5636 const LO lclColInd = lclColInds(k);
5637 const GO gblColInd = colMapPtr->getGlobalElement(lclColInd);
5641 exports_h(curOffset + k) = gblColInd;
5644 exportsOffset = curOffset + numEnt;
5645 }
else if (this->isGloballyIndexed()) {
5646 auto gblColInds = getGlobalIndsViewHost(rowInfo);
5648 for (LO k = 0; k < numEnt; ++k) {
5649 const GO gblColInd = gblColInds(k);
5653 exports_h(curOffset + k) = gblColInd;
5656 exportsOffset = curOffset + numEnt;
5670 std::ostringstream os;
5671 os << *prefix <<
"errCount=" << errCount <<
"; Done" << endl;
5672 std::cerr << os.str();
5676template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5683 Kokkos::DualView<
size_t*,
5694 ProfilingRegion
regionCGC(
"Tpetra::CrsGraph::unpackAndCombine");
5695 const bool verbose = verbose_;
5697 std::unique_ptr<std::string>
prefix;
5700 std::ostringstream
os;
5702 std::cerr <<
os.str();
5705 auto padding = computeCrsPaddingForImports(
5707 applyCrsPadding(*
padding, verbose);
5709 std::ostringstream
os;
5710 os << *
prefix <<
"Done computing & applying padding" <<
endl;
5711 std::cerr <<
os.str();
5733 std::runtime_error,
": importLIDs.extent(0) = " <<
importLIDs.extent(0) <<
" != numPacketsPerLID.extent(0) = " <<
numPacketsPerLID.extent(0) <<
".");
5735 ": Import or Export operations are not allowed on a target "
5736 "CrsGraph that is fillComplete.");
5743 if (imports.need_sync_host()) {
5744 imports.sync_host();
5752 if (isLocallyIndexed()) {
5754 std::ostringstream
os;
5755 os << *
prefix <<
"Preallocate local indices scratch" <<
endl;
5756 std::cerr <<
os.str();
5763 std::ostringstream
os;
5764 os << *
prefix <<
"Local indices scratch size: "
5766 std::cerr <<
os.str();
5771 std::ostringstream
os;
5773 if (isGloballyIndexed()) {
5774 os <<
"Graph is globally indexed";
5776 os <<
"Graph is neither locally nor globally indexed";
5779 std::cerr <<
os.str();
5790 std::ostringstream
os;
5791 os << *
prefix <<
"i=" <<
i <<
", numImportLIDs="
5793 std::cerr <<
os.str();
5800 std::logic_error,
"importLIDs[i=" <<
i <<
"]=" <<
lclRow <<
" is not in the row Map on the calling "
5804 if (!isLocallyIndexed()) {
5816 }
catch (std::exception&
e) {
5818 "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
5825 std::ostringstream
os;
5827 std::cerr <<
os.str();
5831template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5834 using Teuchos::Comm;
5835 using Teuchos::null;
5836 using Teuchos::ParameterList;
5850 if (!domainMap_.is_null()) {
5851 if (domainMap_.getRawPtr() == rowMap_.getRawPtr()) {
5861 if (!rangeMap_.is_null()) {
5862 if (rangeMap_.getRawPtr() == rowMap_.getRawPtr()) {
5872 if (!colMap_.is_null()) {
5886 if (!rangeMap_.is_null() &&
5889 if (
params.is_null() || !
params->isSublist(
"Export")) {
5897 if (!domainMap_.is_null() &&
5900 if (
params.is_null() || !
params->isSublist(
"Import")) {
5926template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5928 getLocalDiagOffsets(
const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets)
const {
5933 const bool verbose = verbose_;
5935 std::unique_ptr<std::string>
prefix;
5937 prefix = this->createPrefix(
"CrsGraph",
"getLocalDiagOffsets");
5938 std::ostringstream
os;
5939 os << *
prefix <<
"offsets.extent(0)=" << offsets.extent(0)
5941 std::cerr <<
os.str();
5945 const LO
lclNumRows =
static_cast<LO
>(this->getLocalNumRows());
5947 std::invalid_argument,
"offsets.extent(0) = " << offsets.extent(0) <<
" < getLocalNumRows() = " <<
lclNumRows <<
".");
5972 const bool sorted = this->isSorted();
5973 if (isFillComplete()) {
5974 auto lclGraph = this->getLocalGraphDevice();
5982 auto offsets_h = Kokkos::create_mirror_view(offsets);
5992 if (
lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid()) {
6000 const size_t hint = 0;
6011 typename local_inds_dualv_type::t_host::const_type
lclColInds;
6046 std::ostringstream
os;
6047 os << *
prefix <<
"Wrong offsets: [";
6056 std::cerr <<
os.str();
6061 using Teuchos::reduceAll;
6062 Teuchos::RCP<const Teuchos::Comm<int>> comm = this->getComm();
6085 std::ostringstream
os;
6086 os <<
"Issue(s) that we noticed (on Process " <<
gblResults[4] <<
", "
6087 "possibly among others): "
6090 os <<
" - The column Map does not contain at least one diagonal entry "
6095 os <<
" - On one or more processes, some row does not contain a "
6100 os <<
" - On one or more processes, some offsets are incorrect."
6104 os <<
" - One or more processes had some other error."
6112template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6117 const bool verbose = verbose_;
6119 std::unique_ptr<std::string>
prefix;
6121 prefix = this->createPrefix(
"CrsGraph",
"getLocalOffRankOffsets");
6122 std::ostringstream
os;
6123 os << *
prefix <<
"offsets.extent(0)=" << offsets.extent(0)
6125 std::cerr <<
os.str();
6131 const size_t lclNumRows = this->getLocalNumRows();
6133 if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) ==
lclNumRows + 1) {
6134 offsets = k_offRankOffsets_;
6137 haveLocalOffRankOffsets_ =
false;
6152 if (isFillComplete()) {
6153 k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing(
"offRankOffset"),
lclNumRows + 1);
6154 auto lclGraph = this->getLocalGraphDevice();
6155 ::Tpetra::Details::getGraphOffRankOffsets(k_offRankOffsets_,
6158 offsets = k_offRankOffsets_;
6159 haveLocalOffRankOffsets_ =
true;
6183template <
class DeviceType,
6185 std::is_same<
typename DeviceType::memory_space,
6186 Kokkos::HostSpace>::value>
6187struct HelpGetLocalDiagOffsets {};
6189template <
class DeviceType>
6190struct HelpGetLocalDiagOffsets<DeviceType, true> {
6191 typedef DeviceType device_type;
6192 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
6193 Kokkos::MemoryUnmanaged>
6194 device_offsets_type;
6195 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
6196 Kokkos::MemoryUnmanaged>
6199 static device_offsets_type
6200 getDeviceOffsets(
const host_offsets_type& hostOffsets) {
6207 copyBackIfNeeded(
const host_offsets_type& ,
6208 const device_offsets_type& ) {
6212template <
class DeviceType>
6213struct HelpGetLocalDiagOffsets<DeviceType, false> {
6214 typedef DeviceType device_type;
6218 typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6219 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
6220 Kokkos::MemoryUnmanaged>
6223 static device_offsets_type
6224 getDeviceOffsets(
const host_offsets_type& hostOffsets) {
6227 return device_offsets_type(
"offsets", hostOffsets.extent(0));
6231 copyBackIfNeeded(
const host_offsets_type& hostOffsets,
6232 const device_offsets_type& deviceOffsets) {
6234 Kokkos::deep_copy(hostOffsets, deviceOffsets);
6239template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6245 "The graph does not yet have a column Map.");
6246 const LO
myNumRows =
static_cast<LO
>(this->getLocalNumRows());
6247 if (
static_cast<LO
>(offsets.size()) !=
myNumRows) {
6264 typedef typename helper_type::host_offsets_type host_offsets_type;
6274template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6280template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6283 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>&
rowTransfer,
6284 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>>&
domainTransfer,
6285 const Teuchos::RCP<const map_type>& domainMap,
6286 const Teuchos::RCP<const map_type>&
rangeMap,
6287 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
6289 using Teuchos::ArrayRCP;
6290 using Teuchos::ArrayView;
6291 using Teuchos::Comm;
6292 using Teuchos::ParameterList;
6302 using NT = node_type;
6306 const char*
prefix =
"Tpetra::CrsGraph::transferAndFillComplete: ";
6319 prefix <<
"The 'rowTransfer' input argument must be either an Import or "
6320 "an Export, and its template parameters must match the corresponding "
6321 "template parameters of the CrsGraph.");
6336 prefix <<
"The 'domainTransfer' input argument must be either an "
6337 "Import or an Export, and its template parameters must match the "
6338 "corresponding template parameters of the CrsGraph.");
6344 std::invalid_argument,
6345 prefix <<
"The 'rowTransfer' and 'domainTransfer' input arguments "
6346 "must be of the same type (either Import or Export).");
6352 std::invalid_argument,
6353 prefix <<
"The 'rowTransfer' and 'domainTransfer' input arguments "
6354 "must be of the same type (either Import or Export).");
6360 const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6366 bool reverseMode =
false;
6367 bool restrictComm =
false;
6368 RCP<ParameterList> graphparams;
6369 if (!params.is_null()) {
6370 reverseMode = params->get(
"Reverse Mode", reverseMode);
6371 restrictComm = params->get(
"Restrict Communicator", restrictComm);
6372 graphparams = sublist(params,
"CrsGraph");
6377 RCP<const map_type> MyRowMap = reverseMode ? rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6378 RCP<const map_type> MyColMap;
6380 RCP<const map_type> MyRangeMap = !rangeMap.is_null() ? rangeMap : getRangeMap();
6381 RCP<const map_type> BaseRowMap = MyRowMap;
6382 RCP<const map_type> BaseDomainMap = MyDomainMap;
6390 if (!destGraph.is_null()) {
6401 const bool NewFlag =
6402 !destGraph->isLocallyIndexed() && !destGraph->isGloballyIndexed();
6403 TEUCHOS_TEST_FOR_EXCEPTION(!NewFlag, std::invalid_argument,
6404 prefix <<
"The input argument 'destGraph' is only allowed to be nonnull, "
6405 "if its graph is empty (neither locally nor globally indexed).");
6414 TEUCHOS_TEST_FOR_EXCEPTION(
6415 !destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6416 prefix <<
"The (row) Map of the input argument 'destGraph' is not the "
6417 "same as the (row) Map specified by the input argument 'rowTransfer'.");
6419 TEUCHOS_TEST_FOR_EXCEPTION(
6420 !destGraph->checkSizes(*
this), std::invalid_argument,
6421 prefix <<
"You provided a nonnull destination graph, but checkSizes() "
6422 "indicates that it is not a legal legal target for redistribution from "
6423 "the source graph (*this). This may mean that they do not have the "
6424 "same dimensions.");
6438 TEUCHOS_TEST_FOR_EXCEPTION(
6439 !(reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6440 std::invalid_argument, prefix <<
"rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6442 TEUCHOS_TEST_FOR_EXCEPTION(
6443 !(!reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6444 std::invalid_argument, prefix <<
"rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6447 TEUCHOS_TEST_FOR_EXCEPTION(
6448 !xferDomainAsImport.is_null() && !xferDomainAsImport->getTargetMap()->isSameAs(*
domainMap),
6449 std::invalid_argument,
6450 prefix <<
"The target map of the 'domainTransfer' input argument must be "
6451 "the same as the rebalanced domain map 'domainMap'");
6453 TEUCHOS_TEST_FOR_EXCEPTION(
6454 !xferDomainAsExport.is_null() && !xferDomainAsExport->getSourceMap()->isSameAs(*
domainMap),
6455 std::invalid_argument,
6456 prefix <<
"The source map of the 'domainTransfer' input argument must be "
6457 "the same as the rebalanced domain map 'domainMap'");
6470 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6471 ArrayView<const LO> ExportLIDs = reverseMode ? rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
6472 ArrayView<const LO> RemoteLIDs = reverseMode ? rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
6473 ArrayView<const LO> PermuteToLIDs = reverseMode ? rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
6474 ArrayView<const LO> PermuteFromLIDs = reverseMode ? rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
6475 Distributor& Distor = rowTransfer.getDistributor();
6478 Teuchos::Array<int> SourcePids;
6479 Teuchos::Array<int> TargetPids;
6480 int MyPID = getComm()->getRank();
6483 RCP<const map_type> ReducedRowMap, ReducedColMap,
6484 ReducedDomainMap, ReducedRangeMap;
6485 RCP<const Comm<int>> ReducedComm;
6489 if (destGraph.is_null()) {
6490 destGraph = rcp(
new this_CRS_type(MyRowMap, 0, graphparams));
6497 ReducedRowMap = MyRowMap->removeEmptyProcesses();
6498 ReducedComm = ReducedRowMap.is_null() ? Teuchos::null : ReducedRowMap->getComm();
6499 destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
6501 ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ? ReducedRowMap : MyDomainMap->replaceCommWithSubset(ReducedComm);
6502 ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ? ReducedRowMap : MyRangeMap->replaceCommWithSubset(ReducedComm);
6505 MyRowMap = ReducedRowMap;
6506 MyDomainMap = ReducedDomainMap;
6507 MyRangeMap = ReducedRangeMap;
6510 if (!ReducedComm.is_null()) {
6511 MyPID = ReducedComm->getRank();
6516 ReducedComm = MyRowMap->getComm();
6525 RCP<const import_type> MyImporter = getImporter();
6528 bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
6530 if (!restrictComm && !MyImporter.is_null() && bSameDomainMap) {
6537 Import_Util::getPids(*MyImporter, SourcePids,
false);
6538 }
else if (restrictComm && !MyImporter.is_null() && bSameDomainMap) {
6541 ivector_type SourceDomain_pids(getDomainMap(),
true);
6542 ivector_type SourceCol_pids(getColMap());
6544 SourceDomain_pids.putScalar(MyPID);
6546 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter,
INSERT);
6547 SourcePids.resize(getColMap()->getLocalNumElements());
6548 SourceCol_pids.get1dCopy(SourcePids());
6549 }
else if (MyImporter.is_null() && bSameDomainMap) {
6551 SourcePids.resize(getColMap()->getLocalNumElements());
6552 SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
6553 }
else if (!MyImporter.is_null() &&
6554 !domainTransfer.is_null()) {
6561 ivector_type TargetDomain_pids(
domainMap);
6562 TargetDomain_pids.putScalar(MyPID);
6565 ivector_type SourceDomain_pids(getDomainMap());
6568 ivector_type SourceCol_pids(getColMap());
6570 if (!reverseMode && !xferDomainAsImport.is_null()) {
6571 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport,
INSERT);
6572 }
else if (reverseMode && !xferDomainAsExport.is_null()) {
6573 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport,
INSERT);
6574 }
else if (!reverseMode && !xferDomainAsExport.is_null()) {
6575 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport,
INSERT);
6576 }
else if (reverseMode && !xferDomainAsImport.is_null()) {
6577 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport,
INSERT);
6579 TEUCHOS_TEST_FOR_EXCEPTION(
6580 true, std::logic_error,
6581 prefix <<
"Should never get here! Please report this bug to a Tpetra developer.");
6583 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter,
INSERT);
6584 SourcePids.resize(getColMap()->getLocalNumElements());
6585 SourceCol_pids.get1dCopy(SourcePids());
6586 }
else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
6587 getDomainMap()->isSameAs(*getRowMap())) {
6590 ivector_type SourceRow_pids(getRowMap());
6591 ivector_type SourceCol_pids(getColMap());
6593 TargetRow_pids.putScalar(MyPID);
6594 if (!reverseMode && xferAsImport !=
nullptr) {
6595 SourceRow_pids.doExport(TargetRow_pids, *xferAsImport,
INSERT);
6596 }
else if (reverseMode && xferAsExport !=
nullptr) {
6597 SourceRow_pids.doExport(TargetRow_pids, *xferAsExport,
INSERT);
6598 }
else if (!reverseMode && xferAsExport !=
nullptr) {
6599 SourceRow_pids.doImport(TargetRow_pids, *xferAsExport,
INSERT);
6600 }
else if (reverseMode && xferAsImport !=
nullptr) {
6601 SourceRow_pids.doImport(TargetRow_pids, *xferAsImport,
INSERT);
6603 TEUCHOS_TEST_FOR_EXCEPTION(
6604 true, std::logic_error,
6605 prefix <<
"Should never get here! Please report this bug to a Tpetra developer.");
6607 SourceCol_pids.doImport(SourceRow_pids, *MyImporter,
INSERT);
6608 SourcePids.resize(getColMap()->getLocalNumElements());
6609 SourceCol_pids.get1dCopy(SourcePids());
6611 TEUCHOS_TEST_FOR_EXCEPTION(
6612 true, std::invalid_argument,
6613 prefix <<
"This method only allows either domainMap == getDomainMap(), "
6614 "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
6618 size_t constantNumPackets = destGraph->constantNumberOfPackets();
6619 if (constantNumPackets == 0) {
6620 destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
6627 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
6628 destGraph->reallocImportsIfNeeded(rbufLen,
false,
nullptr);
6633 destGraph->numExportPacketsPerLID_.modify_host();
6634 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
6639 numExportPacketsPerLID, ExportLIDs,
6640 SourcePids, constantNumPackets);
6647 if (communication_needed) {
6649 if (constantNumPackets == 0) {
6653 destGraph->numExportPacketsPerLID_.sync_host();
6654 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6656 destGraph->numImportPacketsPerLID_.sync_host();
6657 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
6660 Distor.doReversePostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
6661 destGraph->numImportPacketsPerLID_.view_host());
6662 size_t totalImportPackets = 0;
6664 totalImportPackets += numImportPacketsPerLID[i];
6669 destGraph->reallocImportsIfNeeded(totalImportPackets,
false,
nullptr);
6670 destGraph->imports_.modify_host();
6671 auto hostImports = destGraph->imports_.view_host();
6674 destGraph->exports_.sync_host();
6675 auto hostExports = destGraph->exports_.view_host();
6676 Distor.doReversePostsAndWaits(hostExports,
6677 numExportPacketsPerLID,
6679 numImportPacketsPerLID);
6681 destGraph->imports_.modify_host();
6682 auto hostImports = destGraph->imports_.view_host();
6685 destGraph->exports_.sync_host();
6686 auto hostExports = destGraph->exports_.view_host();
6687 Distor.doReversePostsAndWaits(hostExports,
6692 if (constantNumPackets == 0) {
6696 destGraph->numExportPacketsPerLID_.sync_host();
6697 destGraph->numImportPacketsPerLID_.sync_host();
6698 Distor.doPostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
6699 destGraph->numImportPacketsPerLID_.view_host());
6701 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
6703 size_t totalImportPackets = 0;
6705 totalImportPackets += numImportPacketsPerLID[i];
6710 destGraph->reallocImportsIfNeeded(totalImportPackets,
false,
nullptr);
6711 destGraph->imports_.modify_host();
6712 auto hostImports = destGraph->imports_.view_host();
6715 destGraph->exports_.sync_host();
6716 auto hostExports = destGraph->exports_.view_host();
6717 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6719 Distor.doPostsAndWaits(hostExports, numExportPacketsPerLID, hostImports, numImportPacketsPerLID);
6721 destGraph->imports_.modify_host();
6722 auto hostImports = destGraph->imports_.view_host();
6725 destGraph->exports_.sync_host();
6726 auto hostExports = destGraph->exports_.view_host();
6727 Distor.doPostsAndWaits(hostExports, constantNumPackets, hostImports);
6740 destGraph->numImportPacketsPerLID_.sync_host();
6741 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
6743 destGraph->imports_.sync_host();
6744 Teuchos::ArrayView<const packet_type> hostImports =
6748 numImportPacketsPerLID,
6749 constantNumPackets,
INSERT,
6750 NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
6751 size_t N = BaseRowMap->getLocalNumElements();
6754 ArrayRCP<size_t> CSR_rowptr(N + 1);
6755 ArrayRCP<GO> CSR_colind_GID;
6756 ArrayRCP<LO> CSR_colind_LID;
6757 CSR_colind_GID.resize(mynnz);
6761 if (
typeid(LO) ==
typeid(GO)) {
6762 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
6764 CSR_colind_LID.resize(mynnz);
6773 numImportPacketsPerLID, constantNumPackets,
6774 INSERT, NumSameIDs, PermuteToLIDs,
6775 PermuteFromLIDs, N, mynnz, MyPID,
6776 CSR_rowptr(), CSR_colind_GID(),
6777 SourcePids(), TargetPids);
6787 Teuchos::Array<int> RemotePids;
6788 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
6792 TargetPids, RemotePids,
6799 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ? ReducedRowMap : MyColMap->replaceCommWithSubset(ReducedComm);
6800 MyColMap = ReducedColMap;
6804 destGraph->replaceColMap(MyColMap);
6811 if (ReducedComm.is_null()) {
6818 if ((!reverseMode && xferAsImport !=
nullptr) ||
6819 (reverseMode && xferAsExport !=
nullptr)) {
6820 Import_Util::sortCrsEntries(CSR_rowptr(),
6822 }
else if ((!reverseMode && xferAsExport !=
nullptr) ||
6823 (reverseMode && xferAsImport !=
nullptr)) {
6824 Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
6826 if (CSR_rowptr[N] != mynnz) {
6827 CSR_colind_LID.resize(CSR_rowptr[N]);
6830 TEUCHOS_TEST_FOR_EXCEPTION(
6831 true, std::logic_error,
6832 prefix <<
"Should never get here! Please report this bug to a Tpetra developer.");
6840 destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
6846 Teuchos::ParameterList esfc_params;
6849 RCP<import_type> MyImport = rcp(
new import_type(MyDomainMap, MyColMap, RemotePids));
6853 if (!params.is_null())
6854 esfc_params.set(
"compute global constants", params->get(
"compute global constants",
true));
6856 destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
6857 MyImport, Teuchos::null, rcp(&esfc_params,
false));
6860template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6864 const Teuchos::RCP<const map_type>& domainMap,
6865 const Teuchos::RCP<const map_type>&
rangeMap,
6866 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
6870template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6875 const Teuchos::RCP<const map_type>& domainMap,
6876 const Teuchos::RCP<const map_type>&
rangeMap,
6877 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
6881template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6885 const Teuchos::RCP<const map_type>& domainMap,
6886 const Teuchos::RCP<const map_type>&
rangeMap,
6887 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
6891template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6896 const Teuchos::RCP<const map_type>& domainMap,
6897 const Teuchos::RCP<const map_type>&
rangeMap,
6898 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
6902template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6905 std::swap(
graph.need_sync_host_uvm_access,
this->need_sync_host_uvm_access);
6907 std::swap(
graph.rowMap_,
this->rowMap_);
6908 std::swap(
graph.colMap_,
this->colMap_);
6909 std::swap(
graph.rangeMap_,
this->rangeMap_);
6910 std::swap(
graph.domainMap_,
this->domainMap_);
6912 std::swap(
graph.importer_,
this->importer_);
6913 std::swap(
graph.exporter_,
this->exporter_);
6915 std::swap(
graph.nodeMaxNumRowEntries_,
this->nodeMaxNumRowEntries_);
6917 std::swap(
graph.globalNumEntries_,
this->globalNumEntries_);
6918 std::swap(
graph.globalMaxNumRowEntries_,
this->globalMaxNumRowEntries_);
6920 std::swap(
graph.numAllocForAllRows_,
this->numAllocForAllRows_);
6922 std::swap(
graph.rowPtrsPacked_dev_,
this->rowPtrsPacked_dev_);
6923 std::swap(
graph.rowPtrsPacked_host_,
this->rowPtrsPacked_host_);
6925 std::swap(
graph.rowPtrsUnpacked_dev_,
this->rowPtrsUnpacked_dev_);
6926 std::swap(
graph.rowPtrsUnpacked_host_,
this->rowPtrsUnpacked_host_);
6927 std::swap(
graph.packedUnpackedRowPtrsMatch_,
this->packedUnpackedRowPtrsMatch_);
6929 std::swap(
graph.k_offRankOffsets_,
this->k_offRankOffsets_);
6931 std::swap(
graph.lclIndsUnpacked_wdv,
this->lclIndsUnpacked_wdv);
6932 std::swap(
graph.gblInds_wdv,
this->gblInds_wdv);
6933 std::swap(
graph.lclIndsPacked_wdv,
this->lclIndsPacked_wdv);
6935 std::swap(
graph.storageStatus_,
this->storageStatus_);
6937 std::swap(
graph.indicesAreAllocated_,
this->indicesAreAllocated_);
6938 std::swap(
graph.indicesAreLocal_,
this->indicesAreLocal_);
6939 std::swap(
graph.indicesAreGlobal_,
this->indicesAreGlobal_);
6940 std::swap(
graph.fillComplete_,
this->fillComplete_);
6941 std::swap(
graph.indicesAreSorted_,
this->indicesAreSorted_);
6942 std::swap(
graph.noRedundancies_,
this->noRedundancies_);
6943 std::swap(
graph.haveLocalConstants_,
this->haveLocalConstants_);
6944 std::swap(
graph.haveGlobalConstants_,
this->haveGlobalConstants_);
6945 std::swap(
graph.haveLocalOffRankOffsets_,
this->haveLocalOffRankOffsets_);
6947 std::swap(
graph.sortGhostsAssociatedWithEachProcessor_,
this->sortGhostsAssociatedWithEachProcessor_);
6949 std::swap(
graph.k_numAllocPerRow_,
this->k_numAllocPerRow_);
6950 std::swap(
graph.k_numRowEntries_,
this->k_numRowEntries_);
6951 std::swap(
graph.nonlocals_,
this->nonlocals_);
6954template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6964 auto v1 =
m1.find(
key)->second;
6965 auto v2 =
m2.find(
key)->second;
6966 std::sort(
v1.begin(),
v1.end());
6967 std::sort(
v2.begin(),
v2.end());
6970 for (
size_t i = 0;
output &&
i <
v1.size();
i++) {
6985 output = this->nodeMaxNumRowEntries_ ==
graph.nodeMaxNumRowEntries_ ?
output :
false;
6988 output = this->globalMaxNumRowEntries_ ==
graph.globalMaxNumRowEntries_ ?
output :
false;
6990 output = this->numAllocForAllRows_ ==
graph.numAllocForAllRows_ ?
output :
false;
6994 output = this->indicesAreAllocated_ ==
graph.indicesAreAllocated_ ?
output :
false;
7000 output = this->haveLocalConstants_ ==
graph.haveLocalConstants_ ?
output :
false;
7001 output = this->haveGlobalConstants_ ==
graph.haveGlobalConstants_ ?
output :
false;
7002 output = this->haveLocalOffRankOffsets_ ==
graph.haveLocalOffRankOffsets_ ?
output :
false;
7003 output = this->sortGhostsAssociatedWithEachProcessor_ ==
graph.sortGhostsAssociatedWithEachProcessor_ ?
output :
false;
7011 output = this->k_numAllocPerRow_.extent(0) ==
graph.k_numAllocPerRow_.extent(0) ?
output :
false;
7012 if (
output && this->k_numAllocPerRow_.extent(0) > 0) {
7013 for (
size_t i = 0;
output &&
i < this->k_numAllocPerRow_.extent(0);
i++)
7019 output = this->k_numRowEntries_.extent(0) ==
graph.k_numRowEntries_.extent(0) ?
output :
false;
7020 if (
output && this->k_numRowEntries_.extent(0) > 0) {
7021 for (
size_t i = 0;
output &&
i < this->k_numRowEntries_.extent(0);
i++)
7027 auto rowPtrsThis = this->getRowPtrsUnpackedHost();
7035 output = this->lclIndsUnpacked_wdv.extent(0) ==
graph.lclIndsUnpacked_wdv.extent(0) ?
output :
false;
7036 if (
output && this->lclIndsUnpacked_wdv.extent(0) > 0) {
7037 auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7038 auto indGraph =
graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7044 output = this->gblInds_wdv.extent(0) ==
graph.gblInds_wdv.extent(0) ?
output :
false;
7045 if (
output && this->gblInds_wdv.extent(0) > 0) {
7046 auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7047 auto indtGraph =
graph.gblInds_wdv.getHostView(Access::ReadOnly);
7114template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7118 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
permuteToLIDs,
7119 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
permuteFromLIDs,
7125 typedef typename Node::execution_space
exec_space;
7126 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
7131 using local_map_type =
typename crs_graph_type::map_type::local_map_type;
7141 typename crs_graph_type::num_row_entries_type::non_const_type
h_numRowEnt =
tgtCrsGraph.k_numRowEntries_;
7145 const bool sorted =
false;
7151#ifdef CRSGRAPH_INNER_ABORT
7152#undef CRSGRAPH_INNER_ABORT
7155#ifdef KOKKOS_ENABLE_SYCL
7156#define CRSGRAPH_INNER_ABORT(lin) \
7158 sycl::ext::oneapi::experimental::printf("ERROR: Tpetra_CrsGraph_def.hpp:%d", lin); \
7159 Kokkos::abort("error"); \
7162#define CRSGRAPH_INNER_ABORT(lin) \
7164 printf("ERROR: Tpetra_CrsGraph_def.hpp:%d", lin); \
7165 Kokkos::abort("error"); \
7169 Kokkos::parallel_for(
7170 "Tpetra_CrsGraph::copyAndPermuteNew",
7179 auto srcGid = srcRowMapLocal.getGlobalElement(srcLid);
7180 if (srcGid == GINV) CRSGRAPH_INNER_ABORT(__LINE__);
7181 auto tgtGid = tgtRowMapLocal.getGlobalElement(tgtLid);
7182 auto tgtLocalRow = tgtRowMapLocal.getLocalElement(tgtGid);
7183 if (tgtLocalRow == LINV) CRSGRAPH_INNER_ABORT(__LINE__);
7184 if (tgtLocalRow != tgtLid) CRSGRAPH_INNER_ABORT(__LINE__);
7185 auto tgtNumEntries = k_numRowEnt(tgtLocalRow);
7188 auto start = srcLocalRowPtrsDevice(srcLid);
7189 auto end = srcLocalRowPtrsDevice(srcLid + 1);
7190 auto rowLength = (end - start);
7192 auto tstart = tgtLocalRowPtrsDevice(tgtLocalRow);
7193 auto tend = tstart + tgtNumEntries;
7194 auto tend1 = tgtLocalRowPtrsDevice(tgtLocalRow + 1);
7196 const size_t num_avail = (tend1 < tend) ?
size_t(0) : tend1 - tend;
7197 size_t num_inserted = 0;
7199 global_inds_device_value_t* tgtGlobalColIndsPtr = tgtGlobalColInds.data();
7202 for (
size_t j = 0; j < rowLength; j++) {
7203 auto ci = srcLocalColIndsDevice(start + j);
7204 GO gi = srcColMapLocal.getGlobalElement(ci);
7205 if (gi == GINV) CRSGRAPH_INNER_ABORT(__LINE__);
7206 auto numInTgtRow = (tend - tstart);
7208 const size_t offset = KokkosSparse::findRelOffset(
7209 tgtGlobalColIndsPtr + tstart, numInTgtRow, gi, hint, sorted);
7211 if (offset == numInTgtRow) {
7212 if (num_inserted >= num_avail) {
7213 Kokkos::abort(
"num_avail");
7215 tgtGlobalColIndsPtr[tstart + offset] = gi;
7221 k_numRowEnt(tgtLocalRow) += num_inserted;
7224 Kokkos::deep_copy(tgtCrsGraph.k_numRowEntries_, k_numRowEnt);
7225 tgtCrsGraph.setLocallyModified();
7228template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7229void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::copyAndPermuteNew(
7230 const row_graph_type& srcRowGraph,
7231 row_graph_type& tgtRowGraph,
7232 const size_t numSameIDs,
7233 const Kokkos::DualView<
const local_ordinal_type*,
7234 buffer_device_type>& permuteToLIDs,
7235 const Kokkos::DualView<
const local_ordinal_type*,
7236 buffer_device_type>& permuteFromLIDs,
7240 using GO = global_ordinal_type;
7241 const char tfecfFuncName[] =
"copyAndPermuteNew: ";
7242 const bool verbose = verbose_;
7244 Details::ProfilingRegion regionCAP(
"Tpetra::CrsGraph::copyAndPermuteNew");
7245 std::unique_ptr<std::string> prefix;
7247 prefix = this->
createPrefix(
"CrsGraph",
"copyAndPermuteNew");
7248 std::ostringstream os;
7249 os << *prefix << endl;
7250 std::cerr << os.str();
7253 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7254 permuteToLIDs.extent(0) != permuteFromLIDs.extent(0),
7256 "permuteToLIDs.extent(0) = " << permuteToLIDs.extent(0) <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0) <<
".");
7259 std::ostringstream os;
7260 os << *prefix <<
"Compute padding" << endl;
7261 std::cerr << os.str();
7264 using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
7265 const crs_graph_type* srcCrsGraphPtr =
dynamic_cast<const crs_graph_type*
>(&srcRowGraph);
7266 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7267 !srcCrsGraphPtr, std::runtime_error,
"error srcGraph type= " <<
typeid(srcRowGraph).name());
7268 const crs_graph_type& srcCrsGraph = *srcCrsGraphPtr;
7270 crs_graph_type* tgtCrsGraphPtr =
dynamic_cast<crs_graph_type*
>(&tgtRowGraph);
7271 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7272 !tgtCrsGraphPtr, std::runtime_error,
"error tgtGraph type= " <<
typeid(tgtRowGraph).name());
7274 crs_graph_type& tgtCrsGraph = *tgtCrsGraphPtr;
7275 auto padding = tgtCrsGraph.computeCrsPadding(
7276 srcRowGraph, numSameIDs, permuteToLIDs, permuteFromLIDs, verbose);
7277 tgtCrsGraph.applyCrsPadding(*padding, verbose);
7279 const map_type& srcRowMap = *(srcRowGraph.getRowMap());
7280 const map_type& tgtRowMap = *(tgtRowGraph.getRowMap());
7281 const bool src_filled = srcRowGraph.isFillComplete();
7282 nonconst_global_inds_host_view_type row_copy;
7288 LO numSameIDs_as_LID =
static_cast<LO
>(numSameIDs);
7290 if (src_filled || srcCrsGraphPtr ==
nullptr) {
7292 std::ostringstream os;
7293 os << *prefix <<
"src_filled || srcCrsGraph == nullptr" << endl;
7294 std::cerr << os.str();
7301 Kokkos::DualView<const local_ordinal_type*, buffer_device_type> noPermute;
7302 insertGlobalIndicesDevice(srcCrsGraph, tgtCrsGraph,
7303 noPermute, noPermute,
7307 std::ostringstream os;
7308 os << *prefix <<
"! src_filled && srcCrsGraph != nullptr" << endl;
7309 std::cerr << os.str();
7311 for (
size_t i = 0; i < numSameIDs; ++i, ++myid) {
7312 const GO gid = srcRowMap.getGlobalElement(myid);
7313 global_inds_host_view_type row;
7314 srcCrsGraph.getGlobalRowView(gid, row);
7315 tgtCrsGraph.insertGlobalIndices(gid, row.extent(0), row.data());
7322 auto permuteToLIDs_h = permuteToLIDs.view_host();
7323 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
7324 auto permuteToLIDs_d = permuteToLIDs.view_device();
7325 auto permuteFromLIDs_d = permuteFromLIDs.view_device();
7327 if (src_filled || srcCrsGraphPtr ==
nullptr) {
7328 insertGlobalIndicesDevice(
7333 static_cast<LO
>(permuteToLIDs_h.extent(0)));
7335 for (LO i = 0; i < static_cast<LO>(permuteToLIDs_h.extent(0)); ++i) {
7336 const GO mygid = tgtRowMap.getGlobalElement(permuteToLIDs_h[i]);
7337 const GO srcgid = srcRowMap.getGlobalElement(permuteFromLIDs_h[i]);
7338 global_inds_host_view_type row;
7339 srcCrsGraph.getGlobalRowView(srcgid, row);
7340 tgtCrsGraph.insertGlobalIndices(mygid, row.extent(0), row.data());
7345 std::ostringstream os;
7346 os << *prefix <<
"Done" << endl;
7347 std::cerr << os.str();
7359#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7361 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7362 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7363 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7364 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7365 CrsGraph<LO, GO, NODE>::node_type>& importer, \
7366 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7367 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7368 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7369 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7370 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7371 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7372 const Teuchos::RCP<Teuchos::ParameterList>& params);
7374#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7376 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7377 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7378 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7379 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7380 CrsGraph<LO, GO, NODE>::node_type>& rowImporter, \
7381 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7382 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7383 CrsGraph<LO, GO, NODE>::node_type>& domainImporter, \
7384 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7385 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7386 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7387 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7388 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7389 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7390 const Teuchos::RCP<Teuchos::ParameterList>& params);
7392#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7394 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7395 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7396 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7397 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7398 CrsGraph<LO, GO, NODE>::node_type>& exporter, \
7399 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7400 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7401 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7402 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7403 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7404 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7405 const Teuchos::RCP<Teuchos::ParameterList>& params);
7407#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7409 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7410 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7411 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7412 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7413 CrsGraph<LO, GO, NODE>::node_type>& rowExporter, \
7414 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7415 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7416 CrsGraph<LO, GO, NODE>::node_type>& domainExporter, \
7417 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7418 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7419 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7420 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7421 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7422 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7423 const Teuchos::RCP<Teuchos::ParameterList>& params);
7425#define TPETRA_CRSGRAPH_INSTANT(LO, GO, NODE) \
7426 template class CrsGraph<LO, GO, NODE>; \
7427 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7428 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7429 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7430 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE)