2345 os <<
"You attempted to insert entries in owned row " <<
gblRow
2346 <<
", at the following column indices: [";
2354 <<
"Of those, the following indices are not in "
2355 "the column Map on this process: [";
2363 <<
"Since the matrix has a column Map already, "
2364 "it is invalid to insert entries at those locations.";
2365 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::invalid_argument, os.str());
2369 this->insertGlobalIndicesImpl(lclRow, inputGblColInds, numInputInds);
2371 this->insertGlobalIndicesIntoNonownedRows(gblRow, inputGblColInds,
2376template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2384template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2391 const char tfecfFuncName[] =
"insertGlobalIndicesFiltered: ";
2394 "Graph indices are local; use insertLocalIndices().");
2400 "You are not allowed to call this method if fill is not active. "
2401 "If fillComplete has been called, you must first call resumeFill "
2402 "before you may insert indices.");
2403 if (!indicesAreAllocated()) {
2404 allocateIndices(GlobalIndices, verbose_);
2409 if (!colMap_.is_null()) {
2420 if (
lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid()) {
2443template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2460template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2465 !isFillActive(), std::runtime_error,
"requires that fill is active.");
2467 isStorageOptimized(), std::runtime_error,
2468 "cannot remove indices after optimizeStorage() has been called.");
2470 isGloballyIndexed(), std::runtime_error,
"graph indices are global.");
2472 !rowMap_->isNodeLocalElement(
lrow), std::runtime_error,
2473 "Local row " <<
lrow <<
" is not in the row Map on the calling process.");
2474 if (!indicesAreAllocated()) {
2475 allocateIndices(LocalIndices, verbose_);
2478 if (k_numRowEntries_.extent(0) != 0) {
2479 this->k_numRowEntries_(
lrow) = 0;
2484 !indicesAreAllocated() ||
2485 !isLocallyIndexed(),
2487 "Violated stated post-conditions. Please contact Tpetra team.");
2491template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2494 const typename local_graph_device_type::entries_type::non_const_type&
columnIndices) {
2496 ProfilingRegion
region(
"Tpetra::CrsGraph::setAllIndices");
2499 !hasColMap() || getColMap().
is_null(), std::runtime_error,
2500 "The graph must have a column Map before you may call this method.");
2507 std::runtime_error,
"Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2511 std::runtime_error,
"rowPointers.size() = " <<
rowPtrLen <<
" != this->getLocalNumRows()+1 = " << (
numLocalRows + 1) <<
".");
2516 using exec_space =
typename local_graph_device_type::execution_space;
2519 Kokkos::parallel_reduce(
2520 Kokkos::RangePolicy<exec_space>(0,
columnIndices.extent(0)),
2527 auto comm = this->getComm();
2534 message = std::string(
"ERROR, rank ") + std::to_string(comm->getRank()) +
", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2537 throw std::invalid_argument(
"CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2541 if (debug_ && this->isSorted()) {
2544 using exec_space =
typename local_graph_device_type::execution_space;
2545 using size_type =
typename local_graph_device_type::size_type;
2546 Kokkos::parallel_reduce(
2560 auto comm = this->getComm();
2561 Teuchos::reduceAll<int, int>(*comm, Teuchos::REDUCE_MAX,
notSorted,
2567 message = std::string(
"ERROR, rank ") + std::to_string(comm->getRank()) +
", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2570 throw std::invalid_argument(
"CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2574 indicesAreAllocated_ =
true;
2575 indicesAreLocal_ =
true;
2576 indicesAreSorted_ =
true;
2577 noRedundancies_ =
true;
2579 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
2582 set_need_sync_host_uvm_access();
2586 storageStatus_ = Details::STORAGE_1D_PACKED;
2591 numAllocForAllRows_ = 0;
2592 k_numAllocPerRow_ =
decltype(k_numAllocPerRow_)();
2594 checkInternalState();
2597template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2602 typedef typename local_graph_device_type::row_map_type
row_map_type;
2603 typedef typename row_map_type::array_layout
layout_type;
2606 Kokkos::MemoryUnmanaged>
2611 constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2616 if constexpr (
same) {
2624 std::is_same<
typename row_map_type::memory_space,
2625 Kokkos::HostSpace>::value;
2647 Kokkos::View<LocalOrdinal*, layout_type, device_type>
k_ind =
2648 Kokkos::Compat::getKokkosViewDeepCopy<device_type>(
columnIndices());
2652template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2656 using Teuchos::Comm;
2657 using Teuchos::outArg;
2660 using Teuchos::REDUCE_MAX;
2661 using Teuchos::REDUCE_MIN;
2662 using Teuchos::reduceAll;
2666 using size_type =
typename Teuchos::Array<GO>::size_type;
2672 std::unique_ptr<std::string>
prefix;
2674 prefix = this->createPrefix(
"CrsGraph",
"globalAssemble");
2675 std::ostringstream
os;
2677 std::cerr <<
os.str();
2682 "Fill must be active before "
2683 "you may call this method.");
2698 std::ostringstream
os;
2700 std::cerr <<
os.str();
2703 }
else if (verbose_) {
2704 std::ostringstream
os;
2705 os << *
prefix <<
"At least 1 process has nonlocal rows"
2707 std::cerr <<
os.str();
2726 for (
auto mapIter = this->nonlocals_.begin();
2727 mapIter != this->nonlocals_.end();
2757 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
2762 std::ostringstream
os;
2763 os << *
prefix <<
"nonlocalRowMap->getIndexBase()="
2765 std::cerr <<
os.str();
2777 for (
auto mapIter = this->nonlocals_.begin();
2778 mapIter != this->nonlocals_.end();
2787 std::ostringstream
os;
2789 std::cerr <<
os.str();
2805 std::ostringstream
os;
2807 std::cerr <<
os.str();
2814 std::ostringstream
os;
2815 os << *
prefix <<
"Original row Map is NOT 1-to-1" <<
endl;
2816 std::cerr <<
os.str();
2833 std::ostringstream
os;
2835 std::cerr <<
os.str();
2846 std::ostringstream
os;
2848 std::cerr <<
os.str();
2860 checkInternalState();
2862 std::ostringstream
os;
2864 std::cerr <<
os.str();
2868template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2871 clearGlobalConstants();
2872 if (
params != Teuchos::null) this->setParameterList(
params);
2874 indicesAreSorted_ =
true;
2875 noRedundancies_ =
true;
2876 fillComplete_ =
false;
2879template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2892 Teuchos::RCP<const map_type>
domMap = this->getDomainMap();
2894 domMap = this->getRowMap();
2896 Teuchos::RCP<const map_type>
ranMap = this->getRangeMap();
2898 ranMap = this->getRowMap();
2903template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2905 fillComplete(
const Teuchos::RCP<const map_type>& domainMap,
2906 const Teuchos::RCP<const map_type>&
rangeMap,
2907 const Teuchos::RCP<Teuchos::ParameterList>&
params) {
2911 const bool verbose = verbose_;
2915 std::unique_ptr<std::string>
prefix;
2917 prefix = this->createPrefix(
"CrsGraph",
"fillComplete");
2918 std::ostringstream
os;
2920 std::cerr <<
os.str();
2924 "Graph fill state must be active (isFillActive() "
2925 "must be true) before calling fillComplete().");
2927 const int numProcs = getComm()->getSize();
2936 if (
params->isParameter(
"sort column map ghost gids")) {
2937 sortGhostsAssociatedWithEachProcessor_ =
2938 params->get<
bool>(
"sort column map ghost gids",
2939 sortGhostsAssociatedWithEachProcessor_);
2940 }
else if (
params->isParameter(
"Sort column Map ghost GIDs")) {
2941 sortGhostsAssociatedWithEachProcessor_ =
2942 params->get<
bool>(
"Sort column Map ghost GIDs",
2943 sortGhostsAssociatedWithEachProcessor_);
2958 if (!indicesAreAllocated()) {
2961 allocateIndices(LocalIndices, verbose);
2964 allocateIndices(GlobalIndices, verbose);
2980 std::ostringstream
os;
2981 os << *
prefix <<
"Do not need to call globalAssemble; "
2982 "assertNoNonlocalInserts="
2986 std::cerr <<
os.str();
2991 std::ostringstream
os;
2993 Details::Impl::verbosePrintMap(
2994 os, nonlocals_.begin(), nonlocals_.end(),
2995 nonlocals_.size(),
"nonlocals_");
2996 std::cerr <<
os.str() <<
endl;
3000 auto map = this->getMap();
3001 auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
3003 if (!comm.is_null()) {
3004 using Teuchos::REDUCE_MAX;
3005 using Teuchos::reduceAll;
3011 "least one process in the CrsGraph's communicator. This "
3012 "means either that you incorrectly set the "
3013 "\"No Nonlocal Changes\" fillComplete parameter to true, "
3014 "or that you inserted invalid entries. "
3015 "Rerun with the environment variable TPETRA_VERBOSE="
3016 "CrsGraph set to see the entries of nonlocals_ on every "
3017 "MPI process (WARNING: lots of output).");
3020 "nonlocals_.size()=" <<
numNonlocals <<
" != 0 on the "
3021 "calling process. This means either that you incorrectly "
3022 "set the \"No Nonlocal Changes\" fillComplete parameter "
3023 "to true, or that you inserted invalid entries. "
3024 "Rerun with the environment "
3025 "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3026 "of nonlocals_ on every MPI process (WARNING: lots of "
3039 Teuchos::Array<int> remotePIDs(0);
3042 this->makeColMap(remotePIDs);
3048 this->makeIndicesLocal(verbose);
3052 using Teuchos::outArg;
3054 using Teuchos::REDUCE_MIN;
3055 using Teuchos::reduceAll;
3059 if (!
map.is_null()) {
3060 comm =
map->getComm();
3062 if (comm.is_null()) {
3070 std::ostringstream
os;
3087 this->sortAndMergeAllIndices(this->isSorted(), this->isMerged());
3095 this->fillLocalGraph(
params);
3098 params->get(
"compute global constants",
true);
3100 this->computeGlobalConstants();
3102 this->computeLocalConstants();
3104 this->fillComplete_ =
true;
3105 this->checkInternalState();
3108 std::ostringstream
os;
3110 std::cerr <<
os.str();
3114template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3117 const Teuchos::RCP<const map_type>&
rangeMap,
3118 const Teuchos::RCP<const import_type>&
importer,
3119 const Teuchos::RCP<const export_type>&
exporter,
3120 const Teuchos::RCP<Teuchos::ParameterList>&
params) {
3126 std::runtime_error,
"The input domain Map and range Map must be nonnull.");
3128 isFillComplete() || !hasColMap(), std::runtime_error,
3130 "call this method unless the graph has a column Map.");
3134 std::runtime_error,
"The calling process has getLocalNumRows() = " << getLocalNumRows() <<
" > 0 rows, but the row offsets array has not "
3138 std::runtime_error,
"The row offsets array has length " <<
rowPtrsUnpackedLength <<
" != getLocalNumRows()+1 = " << (getLocalNumRows() + 1) <<
".");
3153 numAllocForAllRows_ = 0;
3154 k_numAllocPerRow_ =
decltype(k_numAllocPerRow_)();
3155 indicesAreAllocated_ =
true;
3160 indicesAreLocal_ =
true;
3161 indicesAreGlobal_ =
false;
3170 indicesAreSorted_ =
true;
3171 noRedundancies_ =
true;
3175 importer_ = Teuchos::null;
3176 exporter_ = Teuchos::null;
3180 !
importer->getSourceMap()->isSameAs(*getDomainMap()) ||
3181 !
importer->getTargetMap()->isSameAs(*getColMap()),
3182 std::invalid_argument,
": importer does not match matrix maps.");
3189 !
exporter->getSourceMap()->isSameAs(*getRowMap()) ||
3190 !
exporter->getTargetMap()->isSameAs(*getRangeMap()),
3191 std::invalid_argument,
": exporter does not match matrix maps.");
3197 Teuchos::Array<int> remotePIDs(0);
3198 this->makeImportExport(remotePIDs,
false);
3203 this->fillLocalGraph(
params);
3207 params->get(
"compute global constants",
true);
3211 this->computeGlobalConstants();
3214 this->computeLocalConstants();
3217 fillComplete_ =
true;
3219 checkInternalState();
3222template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3225 using ::Tpetra::Details::computeOffsetsFromCounts;
3226 typedef typename local_graph_device_type::row_map_type
row_map_type;
3228 typedef typename local_graph_device_type::entries_type::non_const_type
lclinds_1d_type;
3230 "fillLocalGraph (called from fillComplete or "
3231 "expertStaticFillComplete): ";
3232 const size_t lclNumRows = this->getLocalNumRows();
3240 if (!
params.is_null() && !
params->get(
"Optimize Storage",
true)) {
3249 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3251 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked.extent(0) == 0, std::logic_error,
3252 "rowPtrsUnpacked_host_ has size zero, but shouldn't");
3253 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsUnpacked.extent(0) != lclNumRows + 1, std::logic_error,
3254 "rowPtrsUnpacked_host_.extent(0) = "
3255 << rowPtrsUnpacked.extent(0) <<
" != (lclNumRows + 1) = "
3256 << (lclNumRows + 1) <<
".");
3257 const size_t numOffsets = rowPtrsUnpacked.extent(0);
3258 const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3259 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numOffsets != 0 &&
3260 lclIndsUnpacked_wdv.extent(0) != valToCheck,
3261 std::logic_error,
"numOffsets=" << numOffsets <<
" != 0 "
3262 " and lclIndsUnpacked_wdv.extent(0)="
3263 << lclIndsUnpacked_wdv.extent(0) <<
" != rowPtrsUnpacked_host_(" << numOffsets <<
")=" << valToCheck <<
".");
3266 size_t allocSize = 0;
3268 allocSize = this->getLocalAllocationSize();
3269 }
catch (std::logic_error& e) {
3270 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::logic_error,
3271 "getLocalAllocationSize threw "
3272 "std::logic_error: "
3274 }
catch (std::runtime_error& e) {
3275 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
3276 "getLocalAllocationSize threw "
3277 "std::runtime_error: "
3279 }
catch (std::exception& e) {
3280 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
3281 "getLocalAllocationSize threw "
3285 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
3286 "getLocalAllocationSize threw "
3287 "an exception not a subclass of std::exception.");
3290 if (this->getLocalNumEntries() != allocSize) {
3293 non_const_row_map_type ptr_d;
3294 row_map_type ptr_d_const;
3303 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3304 if (rowPtrsUnpacked.extent(0) != 0) {
3305 const size_t numOffsets =
3306 static_cast<size_t>(rowPtrsUnpacked.extent(0));
3307 const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3308 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck !=
size_t(lclIndsUnpacked_wdv.extent(0)),
3310 "(Unpacked branch) Before allocating "
3311 "or packing, k_rowPtrs_("
3312 << (numOffsets - 1) <<
")="
3313 << valToCheck <<
" != lclIndsUnpacked_wdv.extent(0)="
3314 << lclIndsUnpacked_wdv.extent(0) <<
".");
3324 size_t lclTotalNumEntries = 0;
3328 non_const_row_map_type(
"Tpetra::CrsGraph::ptr", lclNumRows + 1);
3329 ptr_d_const = ptr_d;
3333 typename num_row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3335 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
size_t(numRowEnt_h.extent(0)) != lclNumRows,
3337 "(Unpacked branch) "
3338 "numRowEnt_h.extent(0)="
3339 << numRowEnt_h.extent(0)
3340 <<
" != getLocalNumRows()=" << lclNumRows <<
"");
3346 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
static_cast<size_t>(ptr_d.extent(0)) != lclNumRows + 1,
3348 "(Unpacked branch) After allocating "
3349 "ptr_d, ptr_d.extent(0) = "
3351 <<
" != lclNumRows+1 = " << (lclNumRows + 1) <<
".");
3352 const auto valToCheck =
3353 ::Tpetra::Details::getEntryOnHost(ptr_d, lclNumRows);
3354 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck != lclTotalNumEntries, std::logic_error,
3355 "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3356 "after filling ptr_d, ptr_d(lclNumRows="
3358 <<
") = " << valToCheck <<
" != total number of entries "
3359 "on the calling process = "
3360 << lclTotalNumEntries
3366 lclinds_1d_type ind_d =
3367 lclinds_1d_type(
"Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3379 typedef pack_functor<
3380 typename local_graph_device_type::entries_type::non_const_type,
3381 typename local_inds_dualv_type::t_dev::const_type,
3383 typename local_graph_device_type::row_map_type>
3385 inds_packer_type f(ind_d,
3386 lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3387 ptr_d, this->getRowPtrsUnpackedDevice());
3389 typedef typename decltype(ind_d)::execution_space exec_space;
3390 typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3391 Kokkos::parallel_for(range_type(0, lclNumRows), f);
3395 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(ptr_d.extent(0) == 0, std::logic_error,
3396 "(\"Optimize Storage\"=true branch) After packing, "
3397 "ptr_d.extent(0)=0.");
3398 if (ptr_d.extent(0) != 0) {
3399 const size_t numOffsets =
static_cast<size_t>(ptr_d.extent(0));
3400 const auto valToCheck =
3401 ::Tpetra::Details::getEntryOnHost(ptr_d, numOffsets - 1);
3402 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
static_cast<size_t>(valToCheck) != ind_d.extent(0),
3404 "(\"Optimize Storage\"=true branch) "
3405 "After packing, ptr_d("
3406 << (numOffsets - 1) <<
")="
3407 << valToCheck <<
" != ind_d.extent(0)="
3408 << ind_d.extent(0) <<
".");
3412 if (requestOptimizedStorage)
3413 setRowPtrs(ptr_d_const);
3415 setRowPtrsPacked(ptr_d_const);
3416 lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3419 this->setRowPtrs(rowPtrsUnpacked_dev_);
3420 lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3423 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3424 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3425 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowPtrsPacked_dev.extent(0) == 0, std::logic_error,
3426 "(\"Optimize Storage\"=false branch) "
3427 "rowPtrsPacked_dev_.extent(0) = 0.");
3428 if (rowPtrsPacked_dev.extent(0) != 0) {
3429 const size_t numOffsets =
3430 static_cast<size_t>(rowPtrsPacked_dev.extent(0));
3431 const size_t valToCheck =
3432 rowPtrsPacked_host(numOffsets - 1);
3433 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(valToCheck !=
size_t(lclIndsPacked_wdv.extent(0)),
3435 "(\"Optimize Storage\"=false branch) "
3436 "rowPtrsPacked_dev_("
3437 << (numOffsets - 1) <<
")="
3439 <<
" != lclIndsPacked_wdv.extent(0)="
3440 << lclIndsPacked_wdv.extent(0) <<
".");
3446 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3447 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3448 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
static_cast<size_t>(rowPtrsPacked_dev.extent(0)) != lclNumRows + 1,
3449 std::logic_error,
"After packing, rowPtrsPacked_dev_.extent(0) = " << rowPtrsPacked_dev.extent(0) <<
" != lclNumRows+1 = " << (lclNumRows + 1) <<
".");
3450 if (rowPtrsPacked_dev.extent(0) != 0) {
3451 const size_t numOffsets =
static_cast<size_t>(rowPtrsPacked_dev.extent(0));
3452 const auto valToCheck = rowPtrsPacked_host(numOffsets - 1);
3453 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
static_cast<size_t>(valToCheck) != lclIndsPacked_wdv.extent(0),
3454 std::logic_error,
"After packing, rowPtrsPacked_dev_(" << (numOffsets - 1) <<
") = " << valToCheck <<
" != lclIndsPacked_wdv.extent(0) = " << lclIndsPacked_wdv.extent(0) <<
".");
3458 if (requestOptimizedStorage) {
3464 k_numRowEntries_ = num_row_entries_type();
3467 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3469 storageStatus_ = Details::STORAGE_1D_PACKED;
3472 set_need_sync_host_uvm_access();
3475template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3487 isLocallyIndexed() || isGloballyIndexed(), std::runtime_error,
3488 "Requires matching maps and non-static graph.");
3492template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3495 const Teuchos::RCP<const import_type>&
newImport,
3498 using Teuchos::REDUCE_MIN;
3499 using Teuchos::reduceAll;
3506 isFillComplete(), std::runtime_error,
3507 "The graph is fill complete "
3508 "(isFillComplete() returns true). You must call resumeFill() before "
3509 "you may call this method.");
3527 const LO
lclNumRows =
static_cast<LO
>(this->getLocalNumRows());
3561 if (indicesAreAllocated()) {
3562 if (isLocallyIndexed()) {
3567 const size_t allocSize = this->getLocalAllocationSize();
3568 auto oldLclInds1D = lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly);
3574 const auto LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
3575 const auto GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
3580 Kokkos::parallel_reduce(
3581 "Tpetra::CrsGraph::reindexColumns",
3582 Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, allocSize),
3629 for (
size_t k = 0;
k <
rowInfo.numEntries; ++
k) {
3648 getRowMap().is_null() ? Teuchos::null : getRowMap()->getComm();
3649 if (!comm.is_null()) {
3655 "It is not possible to continue."
3656 " The most likely reason is that the graph is locally indexed, but the "
3657 "column Map is missing (null) on some processes, due to a previous call "
3658 "to replaceColMap().");
3662 "On some process, the graph "
3663 "contains column indices that are in the old column Map, but not in the "
3664 "new column Map (on that process). This method does NOT redistribute "
3665 "data; it does not claim to do the work of an Import or Export operation."
3666 " This means that for all processess, the calling process MUST own all "
3667 "column indices, in both the old column Map and the new column Map. In "
3668 "this case, you will need to do an Import or Export operation to "
3669 "redistribute data.");
3672 if (isLocallyIndexed()) {
3679 if (isLocallyIndexed()) {
3686 indicesAreSorted_ =
false;
3694 const bool sorted =
false;
3695 const bool merged =
true;
3709 if (!domainMap_.is_null()) {
3710 if (!domainMap_->isSameAs(*
newColMap)) {
3713 importer_ = Teuchos::null;
3722template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3725 const char prefix[] =
"Tpetra::CrsGraph::replaceDomainMap: ";
3727 colMap_.is_null(), std::invalid_argument,
prefix <<
"You may not call "
3728 "this method unless the graph already has a column Map.");
3731 prefix <<
"The new domain Map must be nonnull.");
3734 Teuchos::RCP<const import_type>
newImporter = Teuchos::null;
3741template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3744 const Teuchos::RCP<const import_type>&
newImporter) {
3745 const char prefix[] =
"Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
3747 colMap_.is_null(), std::invalid_argument,
prefix <<
"You may not call "
3748 "this method unless the graph already has a column Map.");
3751 prefix <<
"The new domain Map must be nonnull.");
3761 "If the new Import is null, "
3762 "then the new domain Map must be the same as the current column Map.");
3765 colMap_->isSameAs(*(
newImporter->getTargetMap()));
3770 "new Import is nonnull, then the current column Map must be the same "
3771 "as the new Import's target Map, and the new domain Map must be the "
3772 "same as the new Import's source Map.");
3777 importer_ = Teuchos::rcp_const_cast<import_type>(
newImporter);
3780template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3783 const char prefix[] =
"Tpetra::CrsGraph::replaceRangeMap: ";
3785 rowMap_.is_null(), std::invalid_argument,
prefix <<
"You may not call "
3786 "this method unless the graph already has a row Map.");
3789 prefix <<
"The new range Map must be nonnull.");
3792 Teuchos::RCP<const export_type>
newExporter = Teuchos::null;
3799template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3802 const Teuchos::RCP<const export_type>&
newExporter) {
3803 const char prefix[] =
"Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
3805 rowMap_.is_null(), std::invalid_argument,
prefix <<
"You may not call "
3806 "this method unless the graph already has a column Map.");
3809 prefix <<
"The new domain Map must be nonnull.");
3819 "If the new Export is null, "
3820 "then the new range Map must be the same as the current row Map.");
3825 rowMap_->isSameAs(*(
newExporter->getSourceMap()));
3828 "new Export is nonnull, then the current row Map must be the same "
3829 "as the new Export's source Map, and the new range Map must be the "
3830 "same as the new Export's target Map.");
3835 exporter_ = Teuchos::rcp_const_cast<export_type>(
newExporter);
3838template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3843 lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
3844 this->getRowPtrsPackedDevice());
3847template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3851 return local_graph_host_type(
3852 lclIndsPacked_wdv.getHostView(Access::ReadWrite),
3853 this->getRowPtrsPackedHost());
3856template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3859 using Teuchos::ArrayView;
3860 using Teuchos::outArg;
3861 using Teuchos::reduceAll;
3862 using ::Tpetra::Details::ProfilingRegion;
3865 ProfilingRegion
regionCGC(
"Tpetra::CrsGraph::computeGlobalConstants");
3868 std::shared_ptr<Details::CommRequest>
req;
3869 if (!this->haveGlobalConstants_) {
3870 lcl =
static_cast<GST>(this->getLocalNumEntries());
3871 req = Details::iallreduce(
lcl,
gbl, Teuchos::REDUCE_SUM, *this->getComm());
3874 this->computeLocalConstants();
3879 if (!this->haveGlobalConstants_) {
3893 auto req2 = Details::iallreduce(
lclMaxNumRowEnt, this->globalMaxNumRowEntries_, Teuchos::REDUCE_MAX, *this->getComm());
3896 this->globalNumEntries_ =
gbl;
3899 this->haveGlobalConstants_ =
true;
3903template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3906 using ::Tpetra::Details::ProfilingRegion;
3908 ProfilingRegion
regionCLC(
"Tpetra::CrsGraph::computeLocalConstants");
3909 if (this->haveLocalConstants_) {
3914 this->nodeMaxNumRowEntries_ =
3915 Teuchos::OrdinalTraits<size_t>::invalid();
3919 auto ptr = this->getRowPtrsPackedDevice();
3920 const LO
lclNumRows =
ptr.extent(0) == 0 ?
static_cast<LO
>(0) : (
static_cast<LO
>(
ptr.extent(0)) -
static_cast<LO
>(1));
3923 ::Tpetra::Details::maxDifference(
"Tpetra::CrsGraph: nodeMaxNumRowEntries",
3926 this->haveLocalConstants_ =
true;
3929template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3930std::pair<size_t, std::string>
3935 using Teuchos::arcp;
3936 using Teuchos::Array;
3940 typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
3941 typedef typename num_row_entries_type::non_const_value_type
num_ent_type;
3945 std::unique_ptr<std::string>
prefix;
3947 prefix = this->createPrefix(
"CrsGraph",
"makeIndicesLocal");
3948 std::ostringstream
os;
3949 os << *
prefix <<
"lclNumRows: " << getLocalNumRows() <<
endl;
3950 std::cerr <<
os.str();
3956 "The graph does not have a "
3957 "column Map yet. This method should never be called in that case. "
3958 "Please report this bug to the Tpetra developers.");
3961 "that it has a column Map, because hasColMap() returns true. However, "
3962 "the result of getColMap() is null. This should never happen. Please "
3963 "report this bug to the Tpetra developers.");
3971 const LO
lclNumRows =
static_cast<LO
>(this->getLocalNumRows());
3974 if (this->isGloballyIndexed() &&
lclNumRows != 0) {
3976 typename num_row_entries_type::const_type
h_numRowEnt =
3977 this->k_numRowEntries_;
3983 errStrm <<
"Unpacked row pointers (rowPtrsUnpacked_dev_) has length 0. This should never "
3984 "happen here. Please report this bug to the Tpetra developers."
3987 return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid(),
3999 using Kokkos::view_alloc;
4000 using Kokkos::WithoutInitializing;
4010 const std::string label(
"Tpetra::CrsGraph::lclInd");
4012 std::ostringstream
os;
4013 os << *
prefix <<
"(Re)allocate lclInd_wdv: old="
4014 << lclIndsUnpacked_wdv.extent(0) <<
", new=" <<
numEnt <<
endl;
4015 std::cerr <<
os.str();
4032 std::ostringstream
os;
4033 os << *
prefix <<
"Allocate device mirror k_numRowEnt: "
4035 std::cerr <<
os.str();
4040 using ::Tpetra::Details::convertColumnIndicesFromGlobalToLocal;
4043 lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4044 gblInds_wdv.getDeviceView(Access::ReadOnly),
4045 this->getRowPtrsUnpackedDevice(),
4049 const int myRank = [
this]() {
4050 auto map = this->getMap();
4051 if (
map.is_null()) {
4054 auto comm =
map->getComm();
4055 return comm.is_null() ? 0 : comm->getRank();
4059 errStrm <<
"(Process " << myRank <<
") When converting column "
4060 "indices from global to local, we encountered "
4064 <<
" not live in the column Map on this process." <<
endl;
4071 std::ostringstream
os;
4072 os << *
prefix <<
"Free gblInds_wdv: "
4073 << gblInds_wdv.extent(0) <<
endl;
4074 std::cerr <<
os.str();
4079 this->indicesAreLocal_ =
true;
4080 this->indicesAreGlobal_ =
false;
4081 this->checkInternalState();
4086template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4088 makeColMap(Teuchos::Array<int>& remotePIDs) {
4094 std::unique_ptr<std::string>
prefix;
4097 std::ostringstream
os;
4099 std::cerr <<
os.str();
4107 Teuchos::RCP<const map_type>
colMap = this->colMap_;
4109 this->sortGhostsAssociatedWithEachProcessor_;
4119 using Teuchos::outArg;
4120 using Teuchos::REDUCE_MIN;
4121 using Teuchos::reduceAll;
4127 auto comm = this->getComm();
4128 if (!comm.is_null()) {
4134 std::ostringstream
os;
4137 ": An error happened on at "
4138 "least one process in the CrsGraph's communicator. "
4139 "Here are all processes' error messages:"
4153 checkInternalState();
4155 std::ostringstream
os;
4157 std::cerr <<
os.str();
4161template <
class execution_space,
class LO,
class rowptr_type,
class colinds_type,
class numRowEntries_type>
4163 using ATS = KokkosKernels::ArithTraits<LO>;
4164 const auto unused = ATS::max();
4169 Kokkos::parallel_for(
4177template <
class execution_space,
class LO,
class rowptr_type,
class colinds_type,
class numRowEntries_type>
4178void mergeUnpackedGraph(rowptr_type rowptr, colinds_type colinds, numRowEntries_type numRowEntries) {
4179 auto numRows = rowptr.extent(0) - 1;
4185 Kokkos::parallel_for(
4186 "merge_entries", Kokkos::RangePolicy<execution_space>(0, numRows), KOKKOS_LAMBDA(
const LO rlid) {
4187 auto rowNNZ = numRowEntries(rlid);
4191 auto rowBegin = rowptr(rlid);
4192 auto pos = rowBegin;
4193 for (
size_t offset = rowBegin + 1; offset < rowBegin + rowNNZ; ++offset) {
4194 if ((colinds(offset) != colinds(pos))) {
4195 colinds(++pos) = colinds(offset);
4198 numRowEntries(rlid) = pos + 1 - rowBegin;
4202template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4203void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4204 sortAndMergeAllIndices(
const bool sorted,
const bool merged) {
4206 const char tfecfFuncName[] =
"sortAndMergeAllIndices";
4208 std::unique_ptr<std::string> prefix;
4210 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
4211 std::ostringstream os;
4212 os << *prefix <<
"Start: "
4213 <<
"sorted=" << (sorted ?
"true" :
"false")
4214 <<
", merged=" << (merged ?
"true" :
"false") << endl;
4215 std::cerr << os.str();
4217 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isGloballyIndexed(), std::logic_error,
4218 "This method may only be called after makeIndicesLocal.");
4219 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!merged && this->isStorageOptimized(), std::logic_error,
4220 "The graph is already storage optimized, so we shouldn't be "
4221 "merging any indices. "
4222 "Please report this bug to the Tpetra developers.");
4224 if (!sorted || !merged) {
4225 Details::ProfilingRegion regionSortAndMerge(
"Tpetra::CrsGraph::sortAndMergeAllIndices");
4227 if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
4230 auto rowptr = rowPtrsUnpacked_dev_;
4231 auto colinds = lclIndsUnpacked_wdv.getDeviceView(Access::ReadWrite);
4234 auto k_numRowEntries_d = Kokkos::create_mirror_view_and_copy(execution_space(), k_numRowEntries_);
4237 prepareSortMergeUnpackedGraph<execution_space, LocalOrdinal>(rowptr, colinds, k_numRowEntries_d);
4242 Import_Util::sortCrsEntries(rowptr, colinds);
4243 this->indicesAreSorted_ =
true;
4246 mergeUnpackedGraph<execution_space, LocalOrdinal>(rowptr, colinds, k_numRowEntries_d);
4247 Kokkos::deep_copy(k_numRowEntries_, k_numRowEntries_d);
4248 this->noRedundancies_ =
true;
4251 auto rowptr = rowPtrsPacked_dev_;
4252 auto colinds = lclIndsPacked_wdv.getDeviceView(Access::ReadWrite);
4253 if (!sorted && merged) {
4254 Import_Util::sortCrsEntries(rowptr, colinds);
4255 this->indicesAreSorted_ =
true;
4257 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::logic_error,
4258 "We should never get here."
4259 "Please report this bug to the Tpetra developers.");
4265 std::ostringstream os;
4266 os << *prefix <<
"Done" << endl;
4267 std::cerr << os.str();
4271template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4275 using Teuchos::ParameterList;
4278 using ::Tpetra::Details::ProfilingRegion;
4280 ProfilingRegion
regionMIE(
"Tpetra::CrsGraph::makeImportExport");
4283 "This method may not be called unless the graph has a column Map.");
4293 if (importer_.is_null()) {
4295 if (domainMap_ != colMap_ && (!domainMap_->isSameAs(*colMap_))) {
4296 if (
params.is_null() || !
params->isSublist(
"Import")) {
4298 importer_ =
rcp(
new import_type(domainMap_, colMap_, remotePIDs));
4318 if (exporter_.is_null()) {
4320 if (rangeMap_ != rowMap_ && !rangeMap_->isSameAs(*rowMap_)) {
4321 if (
params.is_null() || !
params->isSublist(
"Export")) {
4331template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4335 std::ostringstream
oss;
4336 oss << dist_object_type::description();
4337 if (isFillComplete()) {
4338 oss <<
"{status = fill complete"
4339 <<
", global rows = " << getGlobalNumRows()
4340 <<
", global cols = " << getGlobalNumCols()
4341 <<
", global num entries = " << getGlobalNumEntries()
4344 oss <<
"{status = fill not complete"
4345 <<
", global rows = " << getGlobalNumRows()
4351template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4354 const Teuchos::EVerbosityLevel
verbLevel)
const {
4357 using Teuchos::ArrayView;
4358 using Teuchos::Comm;
4360 using Teuchos::VERB_DEFAULT;
4361 using Teuchos::VERB_EXTREME;
4362 using Teuchos::VERB_HIGH;
4363 using Teuchos::VERB_LOW;
4364 using Teuchos::VERB_MEDIUM;
4365 using Teuchos::VERB_NONE;
4373 for (
size_t dec = 10;
dec < getGlobalNumRows();
dec *= 10) {
4376 width = std::max<size_t>(
width,
static_cast<size_t>(11)) + 2;
4386 if (
myImageID == 0)
out << this->description() << std::endl;
4388 if (isFillComplete() &&
myImageID == 0) {
4389 out <<
"Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4394 rowMap_->describe(
out,
vl);
4395 if (colMap_ != Teuchos::null) {
4396 if (
myImageID == 0)
out <<
"\nColumn map: " << std::endl;
4397 colMap_->describe(
out,
vl);
4399 if (domainMap_ != Teuchos::null) {
4400 if (
myImageID == 0)
out <<
"\nDomain map: " << std::endl;
4401 domainMap_->describe(
out,
vl);
4403 if (rangeMap_ != Teuchos::null) {
4404 if (
myImageID == 0)
out <<
"\nRange map: " << std::endl;
4405 rangeMap_->describe(
out,
vl);
4413 <<
"Node number of entries = " << this->getLocalNumEntries() << std::endl
4414 <<
"Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4415 if (!indicesAreAllocated()) {
4416 out <<
"Indices are not allocated." << std::endl;
4428 out << std::setw(
width) <<
"Node ID"
4429 << std::setw(
width) <<
"Global Row"
4430 << std::setw(
width) <<
"Num Entries";
4445 if (isGloballyIndexed()) {
4446 auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4447 for (
size_t j = 0;
j <
rowinfo.numEntries; ++
j) {
4451 }
else if (isLocallyIndexed()) {
4452 auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4453 for (
size_t j = 0;
j <
rowinfo.numEntries; ++
j) {
4455 out << colMap_->getGlobalElement(
collid) <<
" ";
4470template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4479template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4482 const size_t numSameIDs,
4493 const bool verbose = verbose_;
4503 std::unique_ptr<std::string>
prefix;
4505 prefix = this->createPrefix(
"CrsGraph",
"copyAndPermute");
4506 std::ostringstream
os;
4508 std::cerr <<
os.str();
4512 std::runtime_error,
"permuteToLIDs.extent(0) = " <<
permuteToLIDs.extent(0) <<
" != permuteFromLIDs.extent(0) = " <<
permuteFromLIDs.extent(0) <<
".");
4520 std::ostringstream
os;
4522 std::cerr <<
os.str();
4526 applyCrsPadding(*
padding, verbose);
4537 nonconst_global_inds_host_view_type
row_copy;
4545 std::ostringstream
os;
4546 os << *
prefix <<
"src_filled || srcCrsGraph == nullptr" <<
endl;
4547 std::cerr <<
os.str();
4554 for (
size_t i = 0;
i < numSameIDs; ++
i, ++
myid) {
4564 std::ostringstream
os;
4565 os << *
prefix <<
"! src_filled && srcCrsGraph != nullptr" <<
endl;
4566 std::cerr <<
os.str();
4568 for (
size_t i = 0;
i < numSameIDs; ++
i, ++
myid) {
4570 global_inds_host_view_type row;
4572 this->insertGlobalIndices(
gid, row.extent(0), row.data());
4596 global_inds_host_view_type row;
4598 this->insertGlobalIndices(
mygid, row.extent(0), row.data());
4603 std::ostringstream
os;
4605 std::cerr <<
os.str();
4609template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4612 const bool verbose) {
4616 using LO = local_ordinal_type;
4617 using row_ptrs_type =
4618 typename local_graph_device_type::row_map_type::non_const_type;
4619 using range_policy =
4620 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4622 ProfilingRegion
regionCAP(
"Tpetra::CrsGraph::applyCrsPadding");
4624 std::unique_ptr<std::string>
prefix;
4627 std::ostringstream
os;
4631 std::cerr <<
os.str();
4633 const int myRank = !verbose ? -1 : [&]() {
4634 auto map = this->getMap();
4635 if (map.is_null()) {
4638 auto comm = map->getComm();
4639 if (comm.is_null()) {
4642 return comm->getRank();
4651 if (!indicesAreAllocated()) {
4653 std::ostringstream os;
4654 os << *prefix <<
"Call allocateIndices" << endl;
4655 std::cerr << os.str();
4657 allocateIndices(GlobalIndices, verbose);
4659 TEUCHOS_ASSERT(indicesAreAllocated());
4664 auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
4666 std::ostringstream os;
4667 os << *prefix <<
"Allocate row_ptrs_beg: "
4668 << rowPtrsUnpacked_dev.extent(0) << endl;
4669 std::cerr << os.str();
4671 using Kokkos::view_alloc;
4672 using Kokkos::WithoutInitializing;
4673 row_ptrs_type row_ptrs_beg(
4674 view_alloc(
"row_ptrs_beg", WithoutInitializing),
4675 rowPtrsUnpacked_dev.extent(0));
4677 Kokkos::deep_copy(execution_space(), row_ptrs_beg, rowPtrsUnpacked_dev);
4679 const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) : size_t(row_ptrs_beg.extent(0) - 1);
4681 std::ostringstream os;
4682 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
4683 std::cerr << os.str();
4685 row_ptrs_type row_ptrs_end(
4686 view_alloc(
"row_ptrs_end", WithoutInitializing), N);
4687 row_ptrs_type num_row_entries;
4689 const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
4691 execution_space().fence();
4693 if (refill_num_row_entries) {
4697 row_ptrs_type(view_alloc(
"num_row_entries", WithoutInitializing), N);
4698 Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
4699 Kokkos::parallel_for(
4700 "Fill end row pointers", range_policy(0, N),
4701 KOKKOS_LAMBDA(
const size_t i) {
4702 row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
4708 Kokkos::parallel_for(
4709 "Fill end row pointers", range_policy(0, N),
4710 KOKKOS_LAMBDA(
const size_t i) {
4711 row_ptrs_end(i) = row_ptrs_beg(i + 1);
4715 if (isGloballyIndexed()) {
4717 padding, myRank, verbose);
4719 padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
4720 padding, myRank, verbose);
4723 if (refill_num_row_entries) {
4724 Kokkos::parallel_for(
4725 "Fill num entries", range_policy(0, N),
4726 KOKKOS_LAMBDA(
const size_t i) {
4727 num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
4729 Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
4732 std::ostringstream os;
4733 os << *prefix <<
"Reassign k_rowPtrs_; old size: "
4734 << rowPtrsUnpacked_dev.extent(0) <<
", new size: "
4735 << row_ptrs_beg.extent(0) << endl;
4736 std::cerr << os.str();
4737 TEUCHOS_ASSERT(rowPtrsUnpacked_dev.extent(0) == row_ptrs_beg.extent(0));
4740 setRowPtrsUnpacked(row_ptrs_beg);
4743template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4745 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4746CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4748 const RowGraph<LocalOrdinal, GlobalOrdinal, Node>& source,
4749 const size_t numSameIDs,
4750 const Kokkos::DualView<
const local_ordinal_type*,
4751 buffer_device_type>& permuteToLIDs,
4752 const Kokkos::DualView<
const local_ordinal_type*,
4753 buffer_device_type>& permuteFromLIDs,
4754 const bool verbose)
const {
4758 std::unique_ptr<std::string> prefix;
4761 "computeCrsPadding(same & permute)");
4762 std::ostringstream os;
4763 os << *prefix <<
"{numSameIDs: " << numSameIDs
4764 <<
", numPermutes: " << permuteFromLIDs.extent(0) <<
"}"
4766 std::cerr << os.str();
4769 const int myRank = [&]() {
4770 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
4771 return comm.is_null() ? -1 : comm->getRank();
4773 std::unique_ptr<padding_type> padding(
4774 new padding_type(myRank, numSameIDs,
4775 permuteFromLIDs.extent(0)));
4777 computeCrsPaddingForSameIDs(*padding, source,
4778 static_cast<LO
>(numSameIDs));
4779 computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
4784template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4785void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4786 computeCrsPaddingForSameIDs(
4787 padding_type& padding,
4788 const RowGraph<local_ordinal_type, global_ordinal_type,
4790 const local_ordinal_type numSameIDs)
const {
4792 using GO = global_ordinal_type;
4793 using Details::Impl::getRowGraphGlobalRow;
4795 const char tfecfFuncName[] =
"computeCrsPaddingForSameIds";
4797 std::unique_ptr<std::string> prefix;
4798 const bool verbose = verbose_;
4800 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
4801 std::ostringstream os;
4802 os << *prefix <<
"numSameIDs: " << numSameIDs << endl;
4803 std::cerr << os.str();
4806 if (numSameIDs == 0) {
4810 const map_type& srcRowMap = *(source.getRowMap());
4811 const map_type& tgtRowMap = *rowMap_;
4812 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
4813 const this_CRS_type* srcCrs =
dynamic_cast<const this_CRS_type*
>(&source);
4814 const bool src_is_unique =
4815 srcCrs ==
nullptr ? false : srcCrs->isMerged();
4816 const bool tgt_is_unique = this->isMerged();
4818 std::vector<GO> srcGblColIndsScratch;
4819 std::vector<GO> tgtGblColIndsScratch;
4821 execute_sync_host_uvm_access();
4822 for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
4823 const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
4824 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
4825 auto srcGblColInds = getRowGraphGlobalRow(
4826 srcGblColIndsScratch, source, srcGblRowInd);
4827 auto tgtGblColInds = getRowGraphGlobalRow(
4828 tgtGblColIndsScratch, *
this, tgtGblRowInd);
4829 padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
4830 tgtGblColInds.size(), tgt_is_unique,
4831 srcGblColInds.getRawPtr(),
4832 srcGblColInds.size(), src_is_unique);
4835 std::ostringstream os;
4836 os << *prefix <<
"Done" << endl;
4837 std::cerr << os.str();
4841template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4842void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4843 computeCrsPaddingForPermutedIDs(
4844 padding_type& padding,
4845 const RowGraph<local_ordinal_type, global_ordinal_type,
4847 const Kokkos::DualView<
const local_ordinal_type*,
4848 buffer_device_type>& permuteToLIDs,
4849 const Kokkos::DualView<
const local_ordinal_type*,
4850 buffer_device_type>& permuteFromLIDs)
const {
4852 using GO = global_ordinal_type;
4853 using Details::Impl::getRowGraphGlobalRow;
4855 const char tfecfFuncName[] =
"computeCrsPaddingForPermutedIds";
4857 std::unique_ptr<std::string> prefix;
4858 const bool verbose = verbose_;
4860 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
4861 std::ostringstream os;
4862 os << *prefix <<
"permuteToLIDs.extent(0): "
4863 << permuteToLIDs.extent(0)
4864 <<
", permuteFromLIDs.extent(0): "
4865 << permuteFromLIDs.extent(0) << endl;
4866 std::cerr << os.str();
4869 if (permuteToLIDs.extent(0) == 0) {
4873 const map_type& srcRowMap = *(source.getRowMap());
4874 const map_type& tgtRowMap = *rowMap_;
4875 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
4876 const this_CRS_type* srcCrs =
dynamic_cast<const this_CRS_type*
>(&source);
4877 const bool src_is_unique =
4878 srcCrs ==
nullptr ? false : srcCrs->isMerged();
4879 const bool tgt_is_unique = this->isMerged();
4881 TEUCHOS_ASSERT(!permuteToLIDs.need_sync_host());
4882 auto permuteToLIDs_h = permuteToLIDs.view_host();
4883 TEUCHOS_ASSERT(!permuteFromLIDs.need_sync_host());
4884 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
4886 std::vector<GO> srcGblColIndsScratch;
4887 std::vector<GO> tgtGblColIndsScratch;
4888 const LO numPermutes =
static_cast<LO
>(permuteToLIDs_h.extent(0));
4890 execute_sync_host_uvm_access();
4891 for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
4892 const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
4893 const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
4894 auto srcGblColInds = getRowGraphGlobalRow(
4895 srcGblColIndsScratch, source, srcGblRowInd);
4896 const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
4897 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
4898 auto tgtGblColInds = getRowGraphGlobalRow(
4899 tgtGblColIndsScratch, *
this, tgtGblRowInd);
4900 padding.update_permute(whichPermute, tgtLclRowInd,
4901 tgtGblColInds.getRawPtr(),
4902 tgtGblColInds.size(), tgt_is_unique,
4903 srcGblColInds.getRawPtr(),
4904 srcGblColInds.size(), src_is_unique);
4908 std::ostringstream os;
4909 os << *prefix <<
"Done" << endl;
4910 std::cerr << os.str();
4914template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4916 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
4917CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4918 computeCrsPaddingForImports(
4919 const Kokkos::DualView<
const local_ordinal_type*,
4920 buffer_device_type>& importLIDs,
4921 Kokkos::DualView<packet_type*, buffer_device_type> imports,
4922 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
4923 const bool verbose)
const {
4924 using Details::Impl::getRowGraphGlobalRow;
4927 using GO = global_ordinal_type;
4928 const char tfecfFuncName[] =
"computeCrsPaddingForImports";
4930 std::unique_ptr<std::string> prefix;
4932 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
4933 std::ostringstream os;
4934 os << *prefix <<
"importLIDs.extent(0): "
4935 << importLIDs.extent(0)
4936 <<
", imports.extent(0): "
4937 << imports.extent(0)
4938 <<
", numPacketsPerLID.extent(0): "
4939 << numPacketsPerLID.extent(0) << endl;
4940 std::cerr << os.str();
4943 const LO numImports =
static_cast<LO
>(importLIDs.extent(0));
4944 const int myRank = [&]() {
4945 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
4946 return comm.is_null() ? -1 : comm->getRank();
4948 std::unique_ptr<padding_type> padding(
4949 new padding_type(myRank, numImports));
4951 if (imports.need_sync_host()) {
4952 imports.sync_host();
4954 auto imports_h = imports.view_host();
4955 if (numPacketsPerLID.need_sync_host()) {
4956 numPacketsPerLID.sync_host();
4958 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
4960 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
4961 auto importLIDs_h = importLIDs.view_host();
4963 const map_type& tgtRowMap = *rowMap_;
4967 constexpr bool src_is_unique =
false;
4968 const bool tgt_is_unique = isMerged();
4970 std::vector<GO> tgtGblColIndsScratch;
4972 execute_sync_host_uvm_access();
4973 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
4978 const LO origSrcNumEnt =
4979 static_cast<LO
>(numPacketsPerLID_h[whichImport]);
4980 GO*
const srcGblColInds = imports_h.data() + offset;
4982 const LO tgtLclRowInd = importLIDs_h[whichImport];
4983 const GO tgtGblRowInd =
4984 tgtRowMap.getGlobalElement(tgtLclRowInd);
4985 auto tgtGblColInds = getRowGraphGlobalRow(
4986 tgtGblColIndsScratch, *
this, tgtGblRowInd);
4987 const size_t origTgtNumEnt(tgtGblColInds.size());
4989 padding->update_import(whichImport, tgtLclRowInd,
4990 tgtGblColInds.getRawPtr(),
4991 origTgtNumEnt, tgt_is_unique,
4993 origSrcNumEnt, src_is_unique);
4994 offset += origSrcNumEnt;
4998 std::ostringstream os;
4999 os << *prefix <<
"Done" << endl;
5000 std::cerr << os.str();
5005template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5007 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type>
5008CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5009 computePaddingForCrsMatrixUnpack(
5010 const Kokkos::DualView<
const local_ordinal_type*,
5011 buffer_device_type>& importLIDs,
5012 Kokkos::DualView<char*, buffer_device_type> imports,
5013 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5014 const bool verbose)
const {
5015 using Details::PackTraits;
5016 using Details::Impl::getRowGraphGlobalRow;
5019 using GO = global_ordinal_type;
5020 const char tfecfFuncName[] =
"computePaddingForCrsMatrixUnpack";
5022 std::unique_ptr<std::string> prefix;
5024 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5025 std::ostringstream os;
5026 os << *prefix <<
"importLIDs.extent(0): "
5027 << importLIDs.extent(0)
5028 <<
", imports.extent(0): "
5029 << imports.extent(0)
5030 <<
", numPacketsPerLID.extent(0): "
5031 << numPacketsPerLID.extent(0) << endl;
5032 std::cerr << os.str();
5034 const bool extraVerbose =
5037 const LO numImports =
static_cast<LO
>(importLIDs.extent(0));
5038 TEUCHOS_ASSERT(LO(numPacketsPerLID.extent(0)) >= numImports);
5039 const int myRank = [&]() {
5040 auto comm = rowMap_.is_null() ? Teuchos::null : rowMap_->getComm();
5041 return comm.is_null() ? -1 : comm->getRank();
5043 std::unique_ptr<padding_type> padding(
5044 new padding_type(myRank, numImports));
5046 if (imports.need_sync_host()) {
5047 imports.sync_host();
5049 auto imports_h = imports.view_host();
5050 if (numPacketsPerLID.need_sync_host()) {
5051 numPacketsPerLID.sync_host();
5053 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5055 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
5056 auto importLIDs_h = importLIDs.view_host();
5058 const map_type& tgtRowMap = *rowMap_;
5062 constexpr bool src_is_unique =
false;
5063 const bool tgt_is_unique = isMerged();
5065 std::vector<GO> srcGblColIndsScratch;
5066 std::vector<GO> tgtGblColIndsScratch;
5068 execute_sync_host_uvm_access();
5069 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5074 const size_t numBytes = numPacketsPerLID_h[whichImport];
5076 std::ostringstream os;
5077 os << *prefix <<
"whichImport=" << whichImport
5078 <<
", numImports=" << numImports
5079 <<
", numBytes=" << numBytes << endl;
5080 std::cerr << os.str();
5082 if (numBytes == 0) {
5085 LO origSrcNumEnt = 0;
5086 const size_t numEntBeg = offset;
5087 const size_t numEntLen =
5088 PackTraits<LO>::packValueCount(origSrcNumEnt);
5089 TEUCHOS_ASSERT(numBytes >= numEntLen);
5090 TEUCHOS_ASSERT(imports_h.extent(0) >= numEntBeg + numEntLen);
5091 PackTraits<LO>::unpackValue(origSrcNumEnt,
5092 imports_h.data() + numEntBeg);
5094 std::ostringstream os;
5095 os << *prefix <<
"whichImport=" << whichImport
5096 <<
", numImports=" << numImports
5097 <<
", origSrcNumEnt=" << origSrcNumEnt << endl;
5098 std::cerr << os.str();
5100 TEUCHOS_ASSERT(origSrcNumEnt >= LO(0));
5101 TEUCHOS_ASSERT(numBytes >=
size_t(numEntLen + origSrcNumEnt *
sizeof(GO)));
5102 const size_t gidsBeg = numEntBeg + numEntLen;
5103 if (srcGblColIndsScratch.size() <
size_t(origSrcNumEnt)) {
5104 srcGblColIndsScratch.resize(origSrcNumEnt);
5106 GO*
const srcGblColInds = srcGblColIndsScratch.data();
5107 PackTraits<GO>::unpackArray(srcGblColInds,
5108 imports_h.data() + gidsBeg,
5110 const LO tgtLclRowInd = importLIDs_h[whichImport];
5111 const GO tgtGblRowInd =
5112 tgtRowMap.getGlobalElement(tgtLclRowInd);
5113 auto tgtGblColInds = getRowGraphGlobalRow(
5114 tgtGblColIndsScratch, *
this, tgtGblRowInd);
5115 const size_t origNumTgtEnt(tgtGblColInds.size());
5118 std::ostringstream os;
5119 os << *prefix <<
"whichImport=" << whichImport
5120 <<
", numImports=" << numImports
5121 <<
": Call padding->update_import" << endl;
5122 std::cerr << os.str();
5124 padding->update_import(whichImport, tgtLclRowInd,
5125 tgtGblColInds.getRawPtr(),
5126 origNumTgtEnt, tgt_is_unique,
5128 origSrcNumEnt, src_is_unique);
5133 std::ostringstream os;
5134 os << *prefix <<
"Done" << endl;
5135 std::cerr << os.str();
5140template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5141void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5142 packAndPrepare(
const SrcDistObject& source,
5143 const Kokkos::DualView<
const local_ordinal_type*,
5144 buffer_device_type>& exportLIDs,
5145 Kokkos::DualView<packet_type*,
5146 buffer_device_type>& exports,
5147 Kokkos::DualView<
size_t*,
5150 size_t& constantNumPackets) {
5152 using GO = global_ordinal_type;
5154 using crs_graph_type =
5155 CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5156 const char tfecfFuncName[] =
"packAndPrepare: ";
5157 ProfilingRegion region_papn(
"Tpetra::CrsGraph::packAndPrepare");
5159 const bool verbose = verbose_;
5160 std::unique_ptr<std::string> prefix;
5162 prefix = this->
createPrefix(
"CrsGraph",
"packAndPrepare");
5163 std::ostringstream os;
5164 os << *prefix <<
"Start" << endl;
5165 std::cerr << os.str();
5168 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(exportLIDs.extent(0) != numPacketsPerLID.extent(0),
5170 "exportLIDs.extent(0) = " << exportLIDs.extent(0)
5171 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
5173 const row_graph_type* srcRowGraphPtr =
5174 dynamic_cast<const row_graph_type*
>(&source);
5175 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(srcRowGraphPtr ==
nullptr, std::invalid_argument,
5176 "Source of an Export "
5177 "or Import operation to a CrsGraph must be a RowGraph with the same "
5178 "template parameters.");
5182 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isFillComplete(), std::runtime_error,
5183 "The target graph of an Import or Export must not be fill complete.");
5185 const crs_graph_type* srcCrsGraphPtr =
5186 dynamic_cast<const crs_graph_type*
>(&source);
5188 if (srcCrsGraphPtr ==
nullptr) {
5189 using Teuchos::ArrayView;
5193 std::ostringstream os;
5194 os << *prefix <<
"Source is a RowGraph but not a CrsGraph"
5196 std::cerr << os.str();
5203 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5204 auto exportLIDs_h = exportLIDs.view_host();
5205 ArrayView<const LO> exportLIDs_av(exportLIDs_h.data(),
5206 exportLIDs_h.extent(0));
5207 Teuchos::Array<GO> exports_a;
5209 numPacketsPerLID.clear_sync_state();
5210 numPacketsPerLID.modify_host();
5211 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5212 ArrayView<size_t> numPacketsPerLID_av(numPacketsPerLID_h.data(),
5213 numPacketsPerLID_h.extent(0));
5214 srcRowGraphPtr->pack(exportLIDs_av, exports_a, numPacketsPerLID_av,
5215 constantNumPackets);
5216 const size_t newSize =
static_cast<size_t>(exports_a.size());
5217 if (
static_cast<size_t>(exports.extent(0)) != newSize) {
5218 using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5219 exports = exports_dv_type(
"exports", newSize);
5221 Kokkos::View<
const packet_type*, Kokkos::HostSpace,
5222 Kokkos::MemoryUnmanaged>
5223 exports_a_h(exports_a.getRawPtr(), newSize);
5224 exports.clear_sync_state();
5225 exports.modify_host();
5227 Kokkos::deep_copy(exports.view_host(), exports_a_h);
5230 else if (!getColMap().is_null() &&
5231 (this->getRowPtrsPackedDevice().extent(0) != 0 ||
5232 getRowMap()->getLocalNumElements() == 0)) {
5234 std::ostringstream os;
5235 os << *prefix <<
"packCrsGraphNew path" << endl;
5236 std::cerr << os.str();
5238 using export_pids_type =
5239 Kokkos::DualView<const int*, buffer_device_type>;
5240 export_pids_type exportPIDs;
5242 using NT = node_type;
5244 packCrsGraphNew<LO, GO, NT>(*srcCrsGraphPtr, exportLIDs, exportPIDs,
5245 exports, numPacketsPerLID,
5246 constantNumPackets,
false);
5248 srcCrsGraphPtr->packFillActiveNew(exportLIDs, exports, numPacketsPerLID,
5249 constantNumPackets);
5253 std::ostringstream os;
5254 os << *prefix <<
"Done" << endl;
5255 std::cerr << os.str();
5259template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5262 Teuchos::Array<GlobalOrdinal>& exports,
5265 auto col_map = this->getColMap();
5267 if (!
col_map.is_null() && (
this->getRowPtrsPackedDevice().extent(0) != 0 || getRowMap()->getLocalNumElements() == 0)) {
5277template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5280 Teuchos::Array<GlobalOrdinal>& exports,
5286 using host_execution_space =
5287 typename Kokkos::View<size_t*, device_type>::
5288 host_mirror_type::execution_space;
5290 const bool verbose = verbose_;
5293 std::unique_ptr<std::string>
prefix;
5295 prefix = this->createPrefix(
"CrsGraph",
"allocateIndices");
5296 std::ostringstream
os;
5298 std::cerr <<
os.str();
5300 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs != numPacketsPerLID.size(), std::runtime_error,
5301 "exportLIDs.size() = " << numExportLIDs <<
" != numPacketsPerLID.size()"
5303 << numPacketsPerLID.size() <<
".");
5305 const map_type&
rowMap = *(this->getRowMap());
5306 const map_type*
const colMapPtr = this->colMap_.getRawPtr();
5307 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() && colMapPtr ==
nullptr, std::logic_error,
5308 "This graph claims to be locally indexed, but its column Map is nullptr. "
5309 "This should never happen. Please report this bug to the Tpetra "
5313 constantNumPackets = 0;
5317 size_t*
const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr();
5318 const LO*
const exportLIDs_raw = exportLIDs.getRawPtr();
5325 Kokkos::RangePolicy<host_execution_space, LO> inputRange(0, numExportLIDs);
5326 size_t totalNumPackets = 0;
5327 size_t errCount = 0;
5330 typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5332 Kokkos::View<size_t, host_device_type> errCountView(&errCount);
5333 constexpr size_t ONE = 1;
5335 execute_sync_host_uvm_access();
5336 Kokkos::parallel_reduce(
5337 "Tpetra::CrsGraph::pack: totalNumPackets",
5339 [=, *
this](
const LO& i,
size_t& curTotalNumPackets) {
5340 const GO gblRow =
rowMap.getGlobalElement(exportLIDs_raw[i]);
5341 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid()) {
5342 Kokkos::atomic_add(&errCountView(), ONE);
5343 numPacketsPerLID_raw[i] = 0;
5345 const size_t numEnt = this->getNumEntriesInGlobalRow(gblRow);
5346 numPacketsPerLID_raw[i] = numEnt;
5347 curTotalNumPackets += numEnt;
5353 std::ostringstream os;
5354 os << *prefix <<
"totalNumPackets=" << totalNumPackets << endl;
5355 std::cerr << os.str();
5357 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5358 "totalNumPackets count encountered "
5359 "one or more errors! errCount = "
5361 <<
", totalNumPackets = " << totalNumPackets <<
".");
5365 exports.resize(totalNumPackets);
5367 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->supportsRowViews(), std::logic_error,
5368 "this->supportsRowViews() returns false; this should never happen. "
5369 "Please report this bug to the Tpetra developers.");
5375 std::ostringstream os;
5376 os << *prefix <<
"Pack into exports" << endl;
5377 std::cerr << os.str();
5382 GO*
const exports_raw = exports.getRawPtr();
5384 Kokkos::parallel_scan(
"Tpetra::CrsGraph::pack: pack from views",
5385 inputRange, [=, &prefix, *
this](
const LO i,
size_t& exportsOffset,
const bool final) {
5386 const size_t curOffset = exportsOffset;
5387 const GO gblRow =
rowMap.getGlobalElement(exportLIDs_raw[i]);
5388 const RowInfo rowInfo =
5389 this->getRowInfoFromGlobalRowIndex(gblRow);
5391 using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5392 if (rowInfo.localRow == TDO::invalid()) {
5394 std::ostringstream os;
5395 os << *prefix <<
": INVALID rowInfo: i=" << i
5396 <<
", lclRow=" << exportLIDs_raw[i] << endl;
5397 std::cerr << os.str();
5399 Kokkos::atomic_add(&errCountView(), ONE);
5400 }
else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5402 std::ostringstream os;
5403 os << *prefix <<
": UH OH! For i=" << i <<
", lclRow="
5404 << exportLIDs_raw[i] <<
", gblRow=" << gblRow <<
", curOffset "
5406 << curOffset <<
") + numEnt (= " << rowInfo.numEntries
5407 <<
") > totalNumPackets (= " << totalNumPackets <<
")."
5409 std::cerr << os.str();
5411 Kokkos::atomic_add(&errCountView(), ONE);
5413 const LO numEnt =
static_cast<LO
>(rowInfo.numEntries);
5414 if (this->isLocallyIndexed()) {
5415 auto lclColInds = getLocalIndsViewHost(rowInfo);
5417 for (LO k = 0; k < numEnt; ++k) {
5418 const LO lclColInd = lclColInds(k);
5419 const GO gblColInd = colMapPtr->getGlobalElement(lclColInd);
5423 exports_raw[curOffset + k] = gblColInd;
5426 exportsOffset = curOffset + numEnt;
5427 }
else if (this->isGloballyIndexed()) {
5428 auto gblColInds = getGlobalIndsViewHost(rowInfo);
5430 for (LO k = 0; k < numEnt; ++k) {
5431 const GO gblColInd = gblColInds(k);
5435 exports_raw[curOffset + k] = gblColInd;
5438 exportsOffset = curOffset + numEnt;
5446 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5447 "Packing encountered "
5448 "one or more errors! errCount = "
5450 <<
", totalNumPackets = " << totalNumPackets <<
".");
5453 std::ostringstream os;
5454 os << *prefix <<
"Done" << endl;
5455 std::cerr << os.str();
5459template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5460void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5461 packFillActiveNew(
const Kokkos::DualView<
const local_ordinal_type*,
5462 buffer_device_type>& exportLIDs,
5463 Kokkos::DualView<packet_type*,
5464 buffer_device_type>& exports,
5465 Kokkos::DualView<
size_t*,
5468 size_t& constantNumPackets)
const {
5471 using GO = global_ordinal_type;
5472 using host_execution_space =
typename Kokkos::View<
size_t*,
5473 device_type>::host_mirror_type::execution_space;
5474 using host_device_type =
5475 Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5476 using exports_dv_type =
5477 Kokkos::DualView<packet_type*, buffer_device_type>;
5478 const char tfecfFuncName[] =
"packFillActiveNew: ";
5479 const bool verbose = verbose_;
5481 const auto numExportLIDs = exportLIDs.extent(0);
5482 std::unique_ptr<std::string> prefix;
5484 prefix = this->
createPrefix(
"CrsGraph",
"packFillActiveNew");
5485 std::ostringstream os;
5486 os << *prefix <<
"numExportLIDs: " << numExportLIDs
5487 <<
", numPacketsPerLID.extent(0): "
5488 << numPacketsPerLID.extent(0) << endl;
5489 std::cerr << os.str();
5491 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs != numPacketsPerLID.extent(0), std::runtime_error,
5492 "exportLIDs.extent(0) = " << numExportLIDs
5493 <<
" != numPacketsPerLID.extent(0) = "
5494 << numPacketsPerLID.extent(0) <<
".");
5495 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5496 auto exportLIDs_h = exportLIDs.view_host();
5498 const map_type&
rowMap = *(this->getRowMap());
5499 const map_type*
const colMapPtr = this->colMap_.getRawPtr();
5500 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(this->isLocallyIndexed() && colMapPtr ==
nullptr, std::logic_error,
5501 "This graph claims to be locally indexed, but its column Map is nullptr. "
5502 "This should never happen. Please report this bug to the Tpetra "
5506 constantNumPackets = 0;
5508 numPacketsPerLID.clear_sync_state();
5509 numPacketsPerLID.modify_host();
5510 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5517 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5518 range_type inputRange(0, numExportLIDs);
5519 size_t totalNumPackets = 0;
5520 size_t errCount = 0;
5523 Kokkos::View<size_t, host_device_type> errCountView(&errCount);
5524 constexpr size_t ONE = 1;
5527 std::ostringstream os;
5528 os << *prefix <<
"Compute totalNumPackets" << endl;
5529 std::cerr << os.str();
5532 execute_sync_host_uvm_access();
5533 totalNumPackets = 0;
5534 for (
size_t i = 0; i < numExportLIDs; ++i) {
5535 const LO lclRow = exportLIDs_h[i];
5536 const GO gblRow =
rowMap.getGlobalElement(lclRow);
5537 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid()) {
5539 std::ostringstream os;
5540 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
5541 <<
" not in row Map on this process" << endl;
5542 std::cerr << os.str();
5544 Kokkos::atomic_add(&errCountView(), ONE);
5545 numPacketsPerLID_h(i) = 0;
5547 const size_t numEnt = this->getNumEntriesInGlobalRow(gblRow);
5548 numPacketsPerLID_h(i) = numEnt;
5549 totalNumPackets += numEnt;
5554 std::ostringstream os;
5555 os << *prefix <<
"totalNumPackets: " << totalNumPackets
5556 <<
", errCount: " << errCount << endl;
5557 std::cerr << os.str();
5559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(errCount != 0, std::logic_error,
5560 "totalNumPackets count encountered "
5561 "one or more errors! totalNumPackets: "
5563 <<
", errCount: " << errCount <<
".");
5566 if (
size_t(exports.extent(0)) < totalNumPackets) {
5568 exports = exports_dv_type(
"exports", totalNumPackets);
5571 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!this->supportsRowViews(), std::logic_error,
5572 "this->supportsRowViews() returns false; this should never happen. "
5573 "Please report this bug to the Tpetra developers.");
5579 std::ostringstream os;
5580 os << *prefix <<
"Pack into exports buffer" << endl;
5581 std::cerr << os.str();
5584 exports.clear_sync_state();
5585 exports.modify_host();
5586 auto exports_h = exports.view_host();
5592 if (isLocallyIndexed())
5593 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5594 else if (isGloballyIndexed())
5595 gblInds_wdv.getHostView(Access::ReadOnly);
5598 Kokkos::parallel_scan(
"Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5599 inputRange, [=, &prefix, *
this](
const LO i,
size_t& exportsOffset,
const bool final) {
5600 const size_t curOffset = exportsOffset;
5601 const LO lclRow = exportLIDs_h(i);
5602 const GO gblRow =
rowMap.getGlobalElement(lclRow);
5603 if (gblRow == Details::OrdinalTraits<GO>::invalid()) {
5605 std::ostringstream os;
5606 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
5607 <<
" not in row Map on this process" << endl;
5608 std::cerr << os.str();
5610 Kokkos::atomic_add(&errCountView(), ONE);
5614 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex(gblRow);
5615 if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid()) {
5617 std::ostringstream os;
5618 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
5619 <<
", gblRow=" << gblRow <<
": invalid rowInfo"
5621 std::cerr << os.str();
5623 Kokkos::atomic_add(&errCountView(), ONE);
5627 if (curOffset + rowInfo.numEntries > totalNumPackets) {
5629 std::ostringstream os;
5630 os << *prefix <<
"For i=" << i <<
", lclRow=" << lclRow
5631 <<
", gblRow=" << gblRow <<
", curOffset (= "
5632 << curOffset <<
") + numEnt (= " << rowInfo.numEntries
5633 <<
") > totalNumPackets (= " << totalNumPackets
5635 std::cerr << os.str();
5637 Kokkos::atomic_add(&errCountView(), ONE);
5641 const LO numEnt =
static_cast<LO
>(rowInfo.numEntries);
5642 if (this->isLocallyIndexed()) {
5643 auto lclColInds = getLocalIndsViewHost(rowInfo);
5645 for (LO k = 0; k < numEnt; ++k) {
5646 const LO lclColInd = lclColInds(k);
5647 const GO gblColInd = colMapPtr->getGlobalElement(lclColInd);
5651 exports_h(curOffset + k) = gblColInd;
5654 exportsOffset = curOffset + numEnt;
5655 }
else if (this->isGloballyIndexed()) {
5656 auto gblColInds = getGlobalIndsViewHost(rowInfo);
5658 for (LO k = 0; k < numEnt; ++k) {
5659 const GO gblColInd = gblColInds(k);
5663 exports_h(curOffset + k) = gblColInd;
5666 exportsOffset = curOffset + numEnt;
5680 std::ostringstream os;
5681 os << *prefix <<
"errCount=" << errCount <<
"; Done" << endl;
5682 std::cerr << os.str();
5686template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5693 Kokkos::DualView<
size_t*,
5704 ProfilingRegion
regionCGC(
"Tpetra::CrsGraph::unpackAndCombine");
5705 const bool verbose = verbose_;
5707 std::unique_ptr<std::string>
prefix;
5710 std::ostringstream
os;
5712 std::cerr <<
os.str();
5715 auto padding = computeCrsPaddingForImports(
5717 applyCrsPadding(*
padding, verbose);
5719 std::ostringstream
os;
5720 os << *
prefix <<
"Done computing & applying padding" <<
endl;
5721 std::cerr <<
os.str();
5743 std::runtime_error,
": importLIDs.extent(0) = " <<
importLIDs.extent(0) <<
" != numPacketsPerLID.extent(0) = " <<
numPacketsPerLID.extent(0) <<
".");
5745 ": Import or Export operations are not allowed on a target "
5746 "CrsGraph that is fillComplete.");
5753 if (imports.need_sync_host()) {
5754 imports.sync_host();
5762 if (isLocallyIndexed()) {
5764 std::ostringstream
os;
5765 os << *
prefix <<
"Preallocate local indices scratch" <<
endl;
5766 std::cerr <<
os.str();
5773 std::ostringstream
os;
5774 os << *
prefix <<
"Local indices scratch size: "
5776 std::cerr <<
os.str();
5781 std::ostringstream
os;
5783 if (isGloballyIndexed()) {
5784 os <<
"Graph is globally indexed";
5786 os <<
"Graph is neither locally nor globally indexed";
5789 std::cerr <<
os.str();
5800 std::ostringstream
os;
5801 os << *
prefix <<
"i=" <<
i <<
", numImportLIDs="
5803 std::cerr <<
os.str();
5810 std::logic_error,
"importLIDs[i=" <<
i <<
"]=" <<
lclRow <<
" is not in the row Map on the calling "
5814 if (!isLocallyIndexed()) {
5826 }
catch (std::exception&
e) {
5828 "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
5835 std::ostringstream
os;
5837 std::cerr <<
os.str();
5841template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5844 using Teuchos::Comm;
5845 using Teuchos::null;
5846 using Teuchos::ParameterList;
5860 if (!domainMap_.is_null()) {
5861 if (domainMap_.getRawPtr() == rowMap_.getRawPtr()) {
5871 if (!rangeMap_.is_null()) {
5872 if (rangeMap_.getRawPtr() == rowMap_.getRawPtr()) {
5882 if (!colMap_.is_null()) {
5896 if (!rangeMap_.is_null() &&
5899 if (
params.is_null() || !
params->isSublist(
"Export")) {
5907 if (!domainMap_.is_null() &&
5910 if (
params.is_null() || !
params->isSublist(
"Import")) {
5936template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5938 getLocalDiagOffsets(
const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets)
const {
5943 const bool verbose = verbose_;
5945 std::unique_ptr<std::string>
prefix;
5947 prefix = this->createPrefix(
"CrsGraph",
"getLocalDiagOffsets");
5948 std::ostringstream
os;
5949 os << *
prefix <<
"offsets.extent(0)=" << offsets.extent(0)
5951 std::cerr <<
os.str();
5955 const LO
lclNumRows =
static_cast<LO
>(this->getLocalNumRows());
5957 std::invalid_argument,
"offsets.extent(0) = " << offsets.extent(0) <<
" < getLocalNumRows() = " <<
lclNumRows <<
".");
5982 const bool sorted = this->isSorted();
5983 if (isFillComplete()) {
5984 auto lclGraph = this->getLocalGraphDevice();
5992 auto offsets_h = Kokkos::create_mirror_view(offsets);
6002 if (
lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid()) {
6010 const size_t hint = 0;
6021 typename local_inds_dualv_type::t_host::const_type
lclColInds;
6056 std::ostringstream
os;
6057 os << *
prefix <<
"Wrong offsets: [";
6066 std::cerr <<
os.str();
6071 using Teuchos::reduceAll;
6072 Teuchos::RCP<const Teuchos::Comm<int>> comm = this->getComm();
6095 std::ostringstream
os;
6096 os <<
"Issue(s) that we noticed (on Process " <<
gblResults[4] <<
", "
6097 "possibly among others): "
6100 os <<
" - The column Map does not contain at least one diagonal entry "
6105 os <<
" - On one or more processes, some row does not contain a "
6110 os <<
" - On one or more processes, some offsets are incorrect."
6114 os <<
" - One or more processes had some other error."
6122template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6127 const bool verbose = verbose_;
6129 std::unique_ptr<std::string>
prefix;
6131 prefix = this->createPrefix(
"CrsGraph",
"getLocalOffRankOffsets");
6132 std::ostringstream
os;
6133 os << *
prefix <<
"offsets.extent(0)=" << offsets.extent(0)
6135 std::cerr <<
os.str();
6141 const size_t lclNumRows = this->getLocalNumRows();
6143 if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) ==
lclNumRows + 1) {
6144 offsets = k_offRankOffsets_;
6147 haveLocalOffRankOffsets_ =
false;
6162 if (isFillComplete()) {
6163 k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing(
"offRankOffset"),
lclNumRows + 1);
6164 auto lclGraph = this->getLocalGraphDevice();
6165 ::Tpetra::Details::getGraphOffRankOffsets(k_offRankOffsets_,
6168 offsets = k_offRankOffsets_;
6169 haveLocalOffRankOffsets_ =
true;
6193template <
class DeviceType,
6195 std::is_same<
typename DeviceType::memory_space,
6196 Kokkos::HostSpace>::value>
6197struct HelpGetLocalDiagOffsets {};
6199template <
class DeviceType>
6200struct HelpGetLocalDiagOffsets<DeviceType, true> {
6201 typedef DeviceType device_type;
6202 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
6203 Kokkos::MemoryUnmanaged>
6204 device_offsets_type;
6205 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
6206 Kokkos::MemoryUnmanaged>
6209 static device_offsets_type
6210 getDeviceOffsets(
const host_offsets_type& hostOffsets) {
6217 copyBackIfNeeded(
const host_offsets_type& ,
6218 const device_offsets_type& ) {
6222template <
class DeviceType>
6223struct HelpGetLocalDiagOffsets<DeviceType, false> {
6224 typedef DeviceType device_type;
6228 typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6229 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
6230 Kokkos::MemoryUnmanaged>
6233 static device_offsets_type
6234 getDeviceOffsets(
const host_offsets_type& hostOffsets) {
6237 return device_offsets_type(
"offsets", hostOffsets.extent(0));
6241 copyBackIfNeeded(
const host_offsets_type& hostOffsets,
6242 const device_offsets_type& deviceOffsets) {
6244 Kokkos::deep_copy(hostOffsets, deviceOffsets);
6249template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6255 "The graph does not yet have a column Map.");
6256 const LO
myNumRows =
static_cast<LO
>(this->getLocalNumRows());
6257 if (
static_cast<LO
>(offsets.size()) !=
myNumRows) {
6274 typedef typename helper_type::host_offsets_type host_offsets_type;
6284template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6290template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6293 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>&
rowTransfer,
6294 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>>&
domainTransfer,
6295 const Teuchos::RCP<const map_type>& domainMap,
6296 const Teuchos::RCP<const map_type>&
rangeMap,
6297 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
6299 using Teuchos::ArrayRCP;
6300 using Teuchos::ArrayView;
6301 using Teuchos::Comm;
6302 using Teuchos::ParameterList;
6312 using NT = node_type;
6316 const char*
prefix =
"Tpetra::CrsGraph::transferAndFillComplete: ";
6329 prefix <<
"The 'rowTransfer' input argument must be either an Import or "
6330 "an Export, and its template parameters must match the corresponding "
6331 "template parameters of the CrsGraph.");
6346 prefix <<
"The 'domainTransfer' input argument must be either an "
6347 "Import or an Export, and its template parameters must match the "
6348 "corresponding template parameters of the CrsGraph.");
6354 std::invalid_argument,
6355 prefix <<
"The 'rowTransfer' and 'domainTransfer' input arguments "
6356 "must be of the same type (either Import or Export).");
6362 std::invalid_argument,
6363 prefix <<
"The 'rowTransfer' and 'domainTransfer' input arguments "
6364 "must be of the same type (either Import or Export).");
6370 const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6376 bool reverseMode =
false;
6377 bool restrictComm =
false;
6378 RCP<ParameterList> graphparams;
6379 if (!params.is_null()) {
6380 reverseMode = params->get(
"Reverse Mode", reverseMode);
6381 restrictComm = params->get(
"Restrict Communicator", restrictComm);
6382 graphparams = sublist(params,
"CrsGraph");
6387 RCP<const map_type> MyRowMap = reverseMode ? rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6388 RCP<const map_type> MyColMap;
6390 RCP<const map_type> MyRangeMap = !rangeMap.is_null() ? rangeMap : getRangeMap();
6391 RCP<const map_type> BaseRowMap = MyRowMap;
6392 RCP<const map_type> BaseDomainMap = MyDomainMap;
6400 if (!destGraph.is_null()) {
6411 const bool NewFlag =
6412 !destGraph->isLocallyIndexed() && !destGraph->isGloballyIndexed();
6413 TEUCHOS_TEST_FOR_EXCEPTION(!NewFlag, std::invalid_argument,
6414 prefix <<
"The input argument 'destGraph' is only allowed to be nonnull, "
6415 "if its graph is empty (neither locally nor globally indexed).");
6424 TEUCHOS_TEST_FOR_EXCEPTION(
6425 !destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6426 prefix <<
"The (row) Map of the input argument 'destGraph' is not the "
6427 "same as the (row) Map specified by the input argument 'rowTransfer'.");
6429 TEUCHOS_TEST_FOR_EXCEPTION(
6430 !destGraph->checkSizes(*
this), std::invalid_argument,
6431 prefix <<
"You provided a nonnull destination graph, but checkSizes() "
6432 "indicates that it is not a legal legal target for redistribution from "
6433 "the source graph (*this). This may mean that they do not have the "
6434 "same dimensions.");
6448 TEUCHOS_TEST_FOR_EXCEPTION(
6449 !(reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6450 std::invalid_argument, prefix <<
"rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6452 TEUCHOS_TEST_FOR_EXCEPTION(
6453 !(!reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6454 std::invalid_argument, prefix <<
"rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6457 TEUCHOS_TEST_FOR_EXCEPTION(
6458 !xferDomainAsImport.is_null() && !xferDomainAsImport->getTargetMap()->isSameAs(*
domainMap),
6459 std::invalid_argument,
6460 prefix <<
"The target map of the 'domainTransfer' input argument must be "
6461 "the same as the rebalanced domain map 'domainMap'");
6463 TEUCHOS_TEST_FOR_EXCEPTION(
6464 !xferDomainAsExport.is_null() && !xferDomainAsExport->getSourceMap()->isSameAs(*
domainMap),
6465 std::invalid_argument,
6466 prefix <<
"The source map of the 'domainTransfer' input argument must be "
6467 "the same as the rebalanced domain map 'domainMap'");
6480 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6481 ArrayView<const LO> ExportLIDs = reverseMode ? rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
6482 ArrayView<const LO> RemoteLIDs = reverseMode ? rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
6483 ArrayView<const LO> PermuteToLIDs = reverseMode ? rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
6484 ArrayView<const LO> PermuteFromLIDs = reverseMode ? rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
6485 Distributor& Distor = rowTransfer.getDistributor();
6488 Teuchos::Array<int> SourcePids;
6489 Teuchos::Array<int> TargetPids;
6490 int MyPID = getComm()->getRank();
6493 RCP<const map_type> ReducedRowMap, ReducedColMap,
6494 ReducedDomainMap, ReducedRangeMap;
6495 RCP<const Comm<int>> ReducedComm;
6499 if (destGraph.is_null()) {
6500 destGraph = rcp(
new this_CRS_type(MyRowMap, 0, graphparams));
6507 ReducedRowMap = MyRowMap->removeEmptyProcesses();
6508 ReducedComm = ReducedRowMap.is_null() ? Teuchos::null : ReducedRowMap->getComm();
6509 destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
6511 ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ? ReducedRowMap : MyDomainMap->replaceCommWithSubset(ReducedComm);
6512 ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ? ReducedRowMap : MyRangeMap->replaceCommWithSubset(ReducedComm);
6515 MyRowMap = ReducedRowMap;
6516 MyDomainMap = ReducedDomainMap;
6517 MyRangeMap = ReducedRangeMap;
6520 if (!ReducedComm.is_null()) {
6521 MyPID = ReducedComm->getRank();
6526 ReducedComm = MyRowMap->getComm();
6535 RCP<const import_type> MyImporter = getImporter();
6538 bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
6540 if (!restrictComm && !MyImporter.is_null() && bSameDomainMap) {
6547 Import_Util::getPids(*MyImporter, SourcePids,
false);
6548 }
else if (restrictComm && !MyImporter.is_null() && bSameDomainMap) {
6551 ivector_type SourceDomain_pids(getDomainMap(),
true);
6552 ivector_type SourceCol_pids(getColMap());
6554 SourceDomain_pids.putScalar(MyPID);
6556 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter,
INSERT);
6557 SourcePids.resize(getColMap()->getLocalNumElements());
6558 SourceCol_pids.get1dCopy(SourcePids());
6559 }
else if (MyImporter.is_null() && bSameDomainMap) {
6561 SourcePids.resize(getColMap()->getLocalNumElements());
6562 SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
6563 }
else if (!MyImporter.is_null() &&
6564 !domainTransfer.is_null()) {
6571 ivector_type TargetDomain_pids(
domainMap);
6572 TargetDomain_pids.putScalar(MyPID);
6575 ivector_type SourceDomain_pids(getDomainMap());
6578 ivector_type SourceCol_pids(getColMap());
6580 if (!reverseMode && !xferDomainAsImport.is_null()) {
6581 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport,
INSERT);
6582 }
else if (reverseMode && !xferDomainAsExport.is_null()) {
6583 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport,
INSERT);
6584 }
else if (!reverseMode && !xferDomainAsExport.is_null()) {
6585 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport,
INSERT);
6586 }
else if (reverseMode && !xferDomainAsImport.is_null()) {
6587 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport,
INSERT);
6589 TEUCHOS_TEST_FOR_EXCEPTION(
6590 true, std::logic_error,
6591 prefix <<
"Should never get here! Please report this bug to a Tpetra developer.");
6593 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter,
INSERT);
6594 SourcePids.resize(getColMap()->getLocalNumElements());
6595 SourceCol_pids.get1dCopy(SourcePids());
6596 }
else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
6597 getDomainMap()->isSameAs(*getRowMap())) {
6600 ivector_type SourceRow_pids(getRowMap());
6601 ivector_type SourceCol_pids(getColMap());
6603 TargetRow_pids.putScalar(MyPID);
6604 if (!reverseMode && xferAsImport !=
nullptr) {
6605 SourceRow_pids.doExport(TargetRow_pids, *xferAsImport,
INSERT);
6606 }
else if (reverseMode && xferAsExport !=
nullptr) {
6607 SourceRow_pids.doExport(TargetRow_pids, *xferAsExport,
INSERT);
6608 }
else if (!reverseMode && xferAsExport !=
nullptr) {
6609 SourceRow_pids.doImport(TargetRow_pids, *xferAsExport,
INSERT);
6610 }
else if (reverseMode && xferAsImport !=
nullptr) {
6611 SourceRow_pids.doImport(TargetRow_pids, *xferAsImport,
INSERT);
6613 TEUCHOS_TEST_FOR_EXCEPTION(
6614 true, std::logic_error,
6615 prefix <<
"Should never get here! Please report this bug to a Tpetra developer.");
6617 SourceCol_pids.doImport(SourceRow_pids, *MyImporter,
INSERT);
6618 SourcePids.resize(getColMap()->getLocalNumElements());
6619 SourceCol_pids.get1dCopy(SourcePids());
6621 TEUCHOS_TEST_FOR_EXCEPTION(
6622 true, std::invalid_argument,
6623 prefix <<
"This method only allows either domainMap == getDomainMap(), "
6624 "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
6628 size_t constantNumPackets = destGraph->constantNumberOfPackets();
6629 if (constantNumPackets == 0) {
6630 destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
6637 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
6638 destGraph->reallocImportsIfNeeded(rbufLen,
false,
nullptr);
6643 destGraph->numExportPacketsPerLID_.modify_host();
6644 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
6649 numExportPacketsPerLID, ExportLIDs,
6650 SourcePids, constantNumPackets);
6657 if (communication_needed) {
6659 if (constantNumPackets == 0) {
6663 destGraph->numExportPacketsPerLID_.sync_host();
6664 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6666 destGraph->numImportPacketsPerLID_.sync_host();
6667 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
6670 Distor.doReversePostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
6671 destGraph->numImportPacketsPerLID_.view_host());
6672 size_t totalImportPackets = 0;
6674 totalImportPackets += numImportPacketsPerLID[i];
6679 destGraph->reallocImportsIfNeeded(totalImportPackets,
false,
nullptr);
6680 destGraph->imports_.modify_host();
6681 auto hostImports = destGraph->imports_.view_host();
6684 destGraph->exports_.sync_host();
6685 auto hostExports = destGraph->exports_.view_host();
6686 Distor.doReversePostsAndWaits(hostExports,
6687 numExportPacketsPerLID,
6689 numImportPacketsPerLID);
6691 destGraph->imports_.modify_host();
6692 auto hostImports = destGraph->imports_.view_host();
6695 destGraph->exports_.sync_host();
6696 auto hostExports = destGraph->exports_.view_host();
6697 Distor.doReversePostsAndWaits(hostExports,
6702 if (constantNumPackets == 0) {
6706 destGraph->numExportPacketsPerLID_.sync_host();
6707 destGraph->numImportPacketsPerLID_.sync_host();
6708 Distor.doPostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
6709 destGraph->numImportPacketsPerLID_.view_host());
6711 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
6713 size_t totalImportPackets = 0;
6715 totalImportPackets += numImportPacketsPerLID[i];
6720 destGraph->reallocImportsIfNeeded(totalImportPackets,
false,
nullptr);
6721 destGraph->imports_.modify_host();
6722 auto hostImports = destGraph->imports_.view_host();
6725 destGraph->exports_.sync_host();
6726 auto hostExports = destGraph->exports_.view_host();
6727 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
6729 Distor.doPostsAndWaits(hostExports, numExportPacketsPerLID, hostImports, numImportPacketsPerLID);
6731 destGraph->imports_.modify_host();
6732 auto hostImports = destGraph->imports_.view_host();
6735 destGraph->exports_.sync_host();
6736 auto hostExports = destGraph->exports_.view_host();
6737 Distor.doPostsAndWaits(hostExports, constantNumPackets, hostImports);
6750 destGraph->numImportPacketsPerLID_.sync_host();
6751 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
6753 destGraph->imports_.sync_host();
6754 Teuchos::ArrayView<const packet_type> hostImports =
6758 numImportPacketsPerLID,
6759 constantNumPackets,
INSERT,
6760 NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
6761 size_t N = BaseRowMap->getLocalNumElements();
6764 ArrayRCP<size_t> CSR_rowptr(N + 1);
6765 ArrayRCP<GO> CSR_colind_GID;
6766 ArrayRCP<LO> CSR_colind_LID;
6767 CSR_colind_GID.resize(mynnz);
6771 if (
typeid(LO) ==
typeid(GO)) {
6772 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
6774 CSR_colind_LID.resize(mynnz);
6783 numImportPacketsPerLID, constantNumPackets,
6784 INSERT, NumSameIDs, PermuteToLIDs,
6785 PermuteFromLIDs, N, mynnz, MyPID,
6786 CSR_rowptr(), CSR_colind_GID(),
6787 SourcePids(), TargetPids);
6797 Teuchos::Array<int> RemotePids;
6798 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
6802 TargetPids, RemotePids,
6810 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ? ReducedRowMap : MyColMap->replaceCommWithSubset(ReducedComm);
6811 MyColMap = ReducedColMap;
6815 destGraph->replaceColMap(MyColMap);
6822 if (ReducedComm.is_null()) {
6829 if ((!reverseMode && xferAsImport !=
nullptr) ||
6830 (reverseMode && xferAsExport !=
nullptr)) {
6831 Import_Util::sortCrsEntries(CSR_rowptr(),
6833 }
else if ((!reverseMode && xferAsExport !=
nullptr) ||
6834 (reverseMode && xferAsImport !=
nullptr)) {
6835 Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
6837 if (CSR_rowptr[N] != mynnz) {
6838 CSR_colind_LID.resize(CSR_rowptr[N]);
6841 TEUCHOS_TEST_FOR_EXCEPTION(
6842 true, std::logic_error,
6843 prefix <<
"Should never get here! Please report this bug to a Tpetra developer.");
6851 destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
6857 Teuchos::ParameterList esfc_params;
6860 RCP<import_type> MyImport = rcp(
new import_type(MyDomainMap, MyColMap, RemotePids));
6864 if (!params.is_null())
6865 esfc_params.set(
"compute global constants", params->get(
"compute global constants",
true));
6867 destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
6868 MyImport, Teuchos::null, rcp(&esfc_params,
false));
6871template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6875 const Teuchos::RCP<const map_type>& domainMap,
6876 const Teuchos::RCP<const map_type>&
rangeMap,
6877 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
6881template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6886 const Teuchos::RCP<const map_type>& domainMap,
6887 const Teuchos::RCP<const map_type>&
rangeMap,
6888 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
6892template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6896 const Teuchos::RCP<const map_type>& domainMap,
6897 const Teuchos::RCP<const map_type>&
rangeMap,
6898 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
6902template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6907 const Teuchos::RCP<const map_type>& domainMap,
6908 const Teuchos::RCP<const map_type>&
rangeMap,
6909 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
6913template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6916 std::swap(
graph.need_sync_host_uvm_access,
this->need_sync_host_uvm_access);
6918 std::swap(
graph.rowMap_,
this->rowMap_);
6919 std::swap(
graph.colMap_,
this->colMap_);
6920 std::swap(
graph.rangeMap_,
this->rangeMap_);
6921 std::swap(
graph.domainMap_,
this->domainMap_);
6923 std::swap(
graph.importer_,
this->importer_);
6924 std::swap(
graph.exporter_,
this->exporter_);
6926 std::swap(
graph.nodeMaxNumRowEntries_,
this->nodeMaxNumRowEntries_);
6928 std::swap(
graph.globalNumEntries_,
this->globalNumEntries_);
6929 std::swap(
graph.globalMaxNumRowEntries_,
this->globalMaxNumRowEntries_);
6931 std::swap(
graph.numAllocForAllRows_,
this->numAllocForAllRows_);
6933 std::swap(
graph.rowPtrsPacked_dev_,
this->rowPtrsPacked_dev_);
6934 std::swap(
graph.rowPtrsPacked_host_,
this->rowPtrsPacked_host_);
6936 std::swap(
graph.rowPtrsUnpacked_dev_,
this->rowPtrsUnpacked_dev_);
6937 std::swap(
graph.rowPtrsUnpacked_host_,
this->rowPtrsUnpacked_host_);
6938 std::swap(
graph.packedUnpackedRowPtrsMatch_,
this->packedUnpackedRowPtrsMatch_);
6940 std::swap(
graph.k_offRankOffsets_,
this->k_offRankOffsets_);
6942 std::swap(
graph.lclIndsUnpacked_wdv,
this->lclIndsUnpacked_wdv);
6943 std::swap(
graph.gblInds_wdv,
this->gblInds_wdv);
6944 std::swap(
graph.lclIndsPacked_wdv,
this->lclIndsPacked_wdv);
6946 std::swap(
graph.storageStatus_,
this->storageStatus_);
6948 std::swap(
graph.indicesAreAllocated_,
this->indicesAreAllocated_);
6949 std::swap(
graph.indicesAreLocal_,
this->indicesAreLocal_);
6950 std::swap(
graph.indicesAreGlobal_,
this->indicesAreGlobal_);
6951 std::swap(
graph.fillComplete_,
this->fillComplete_);
6952 std::swap(
graph.indicesAreSorted_,
this->indicesAreSorted_);
6953 std::swap(
graph.noRedundancies_,
this->noRedundancies_);
6954 std::swap(
graph.haveLocalConstants_,
this->haveLocalConstants_);
6955 std::swap(
graph.haveGlobalConstants_,
this->haveGlobalConstants_);
6956 std::swap(
graph.haveLocalOffRankOffsets_,
this->haveLocalOffRankOffsets_);
6958 std::swap(
graph.sortGhostsAssociatedWithEachProcessor_,
this->sortGhostsAssociatedWithEachProcessor_);
6960 std::swap(
graph.k_numAllocPerRow_,
this->k_numAllocPerRow_);
6961 std::swap(
graph.k_numRowEntries_,
this->k_numRowEntries_);
6962 std::swap(
graph.nonlocals_,
this->nonlocals_);
6965template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6975 auto v1 =
m1.find(
key)->second;
6976 auto v2 =
m2.find(
key)->second;
6977 std::sort(
v1.begin(),
v1.end());
6978 std::sort(
v2.begin(),
v2.end());
6981 for (
size_t i = 0;
output &&
i <
v1.size();
i++) {
6996 output = this->nodeMaxNumRowEntries_ ==
graph.nodeMaxNumRowEntries_ ?
output :
false;
6999 output = this->globalMaxNumRowEntries_ ==
graph.globalMaxNumRowEntries_ ?
output :
false;
7001 output = this->numAllocForAllRows_ ==
graph.numAllocForAllRows_ ?
output :
false;
7005 output = this->indicesAreAllocated_ ==
graph.indicesAreAllocated_ ?
output :
false;
7011 output = this->haveLocalConstants_ ==
graph.haveLocalConstants_ ?
output :
false;
7012 output = this->haveGlobalConstants_ ==
graph.haveGlobalConstants_ ?
output :
false;
7013 output = this->haveLocalOffRankOffsets_ ==
graph.haveLocalOffRankOffsets_ ?
output :
false;
7014 output = this->sortGhostsAssociatedWithEachProcessor_ ==
graph.sortGhostsAssociatedWithEachProcessor_ ?
output :
false;
7022 output = this->k_numAllocPerRow_.extent(0) ==
graph.k_numAllocPerRow_.extent(0) ?
output :
false;
7023 if (
output && this->k_numAllocPerRow_.extent(0) > 0) {
7024 for (
size_t i = 0;
output &&
i < this->k_numAllocPerRow_.extent(0);
i++)
7030 output = this->k_numRowEntries_.extent(0) ==
graph.k_numRowEntries_.extent(0) ?
output :
false;
7031 if (
output && this->k_numRowEntries_.extent(0) > 0) {
7032 for (
size_t i = 0;
output &&
i < this->k_numRowEntries_.extent(0);
i++)
7038 auto rowPtrsThis = this->getRowPtrsUnpackedHost();
7046 output = this->lclIndsUnpacked_wdv.extent(0) ==
graph.lclIndsUnpacked_wdv.extent(0) ?
output :
false;
7047 if (
output && this->lclIndsUnpacked_wdv.extent(0) > 0) {
7048 auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7049 auto indGraph =
graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7055 output = this->gblInds_wdv.extent(0) ==
graph.gblInds_wdv.extent(0) ?
output :
false;
7056 if (
output && this->gblInds_wdv.extent(0) > 0) {
7057 auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7058 auto indtGraph =
graph.gblInds_wdv.getHostView(Access::ReadOnly);
7125template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7129 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
permuteToLIDs,
7130 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
permuteFromLIDs,
7136 typedef typename Node::execution_space
exec_space;
7137 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
7142 using local_map_type =
typename crs_graph_type::map_type::local_map_type;
7152 typename crs_graph_type::num_row_entries_type::non_const_type
h_numRowEnt =
tgtCrsGraph.k_numRowEntries_;
7156 const bool sorted =
false;
7162#ifdef CRSGRAPH_INNER_ABORT
7163#undef CRSGRAPH_INNER_ABORT
7166#ifdef KOKKOS_ENABLE_SYCL
7167#define CRSGRAPH_INNER_ABORT(lin) \
7169 sycl::ext::oneapi::experimental::printf("ERROR: Tpetra_CrsGraph_def.hpp:%d", lin); \
7170 Kokkos::abort("error"); \
7173#define CRSGRAPH_INNER_ABORT(lin) \
7175 printf("ERROR: Tpetra_CrsGraph_def.hpp:%d", lin); \
7176 Kokkos::abort("error"); \
7180 Kokkos::parallel_for(
7181 "Tpetra_CrsGraph::copyAndPermuteNew",
7190 auto srcGid = srcRowMapLocal.getGlobalElement(srcLid);
7191 if (srcGid == GINV) CRSGRAPH_INNER_ABORT(__LINE__);
7192 auto tgtGid = tgtRowMapLocal.getGlobalElement(tgtLid);
7193 auto tgtLocalRow = tgtRowMapLocal.getLocalElement(tgtGid);
7194 if (tgtLocalRow == LINV) CRSGRAPH_INNER_ABORT(__LINE__);
7195 if (tgtLocalRow != tgtLid) CRSGRAPH_INNER_ABORT(__LINE__);
7196 auto tgtNumEntries = k_numRowEnt(tgtLocalRow);
7199 auto start = srcLocalRowPtrsDevice(srcLid);
7200 auto end = srcLocalRowPtrsDevice(srcLid + 1);
7201 auto rowLength = (end - start);
7203 auto tstart = tgtLocalRowPtrsDevice(tgtLocalRow);
7204 auto tend = tstart + tgtNumEntries;
7205 auto tend1 = tgtLocalRowPtrsDevice(tgtLocalRow + 1);
7207 const size_t num_avail = (tend1 < tend) ?
size_t(0) : tend1 - tend;
7208 size_t num_inserted = 0;
7210 global_inds_device_value_t* tgtGlobalColIndsPtr = tgtGlobalColInds.data();
7213 for (
size_t j = 0; j < rowLength; j++) {
7214 auto ci = srcLocalColIndsDevice(start + j);
7215 GO gi = srcColMapLocal.getGlobalElement(ci);
7216 if (gi == GINV) CRSGRAPH_INNER_ABORT(__LINE__);
7217 auto numInTgtRow = (tend - tstart);
7219 const size_t offset = KokkosSparse::findRelOffset(
7220 tgtGlobalColIndsPtr + tstart, numInTgtRow, gi, hint, sorted);
7222 if (offset == numInTgtRow) {
7223 if (num_inserted >= num_avail) {
7224 Kokkos::abort(
"num_avail");
7226 tgtGlobalColIndsPtr[tstart + offset] = gi;
7232 k_numRowEnt(tgtLocalRow) += num_inserted;
7235 Kokkos::deep_copy(tgtCrsGraph.k_numRowEntries_, k_numRowEnt);
7236 tgtCrsGraph.setLocallyModified();
7239template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7240void CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::copyAndPermuteNew(
7241 const row_graph_type& srcRowGraph,
7242 row_graph_type& tgtRowGraph,
7243 const size_t numSameIDs,
7244 const Kokkos::DualView<
const local_ordinal_type*,
7245 buffer_device_type>& permuteToLIDs,
7246 const Kokkos::DualView<
const local_ordinal_type*,
7247 buffer_device_type>& permuteFromLIDs,
7251 using GO = global_ordinal_type;
7252 const char tfecfFuncName[] =
"copyAndPermuteNew: ";
7253 const bool verbose = verbose_;
7255 Details::ProfilingRegion regionCAP(
"Tpetra::CrsGraph::copyAndPermuteNew");
7256 std::unique_ptr<std::string> prefix;
7258 prefix = this->
createPrefix(
"CrsGraph",
"copyAndPermuteNew");
7259 std::ostringstream os;
7260 os << *prefix << endl;
7261 std::cerr << os.str();
7264 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7265 permuteToLIDs.extent(0) != permuteFromLIDs.extent(0),
7267 "permuteToLIDs.extent(0) = " << permuteToLIDs.extent(0) <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0) <<
".");
7270 std::ostringstream os;
7271 os << *prefix <<
"Compute padding" << endl;
7272 std::cerr << os.str();
7275 using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
7276 const crs_graph_type* srcCrsGraphPtr =
dynamic_cast<const crs_graph_type*
>(&srcRowGraph);
7277 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7278 !srcCrsGraphPtr, std::runtime_error,
"error srcGraph type= " <<
typeid(srcRowGraph).name());
7279 const crs_graph_type& srcCrsGraph = *srcCrsGraphPtr;
7281 crs_graph_type* tgtCrsGraphPtr =
dynamic_cast<crs_graph_type*
>(&tgtRowGraph);
7282 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7283 !tgtCrsGraphPtr, std::runtime_error,
"error tgtGraph type= " <<
typeid(tgtRowGraph).name());
7285 crs_graph_type& tgtCrsGraph = *tgtCrsGraphPtr;
7286 auto padding = tgtCrsGraph.computeCrsPadding(
7287 srcRowGraph, numSameIDs, permuteToLIDs, permuteFromLIDs, verbose);
7288 tgtCrsGraph.applyCrsPadding(*padding, verbose);
7290 const map_type& srcRowMap = *(srcRowGraph.getRowMap());
7291 const map_type& tgtRowMap = *(tgtRowGraph.getRowMap());
7292 const bool src_filled = srcRowGraph.isFillComplete();
7293 nonconst_global_inds_host_view_type row_copy;
7299 LO numSameIDs_as_LID =
static_cast<LO
>(numSameIDs);
7301 if (src_filled || srcCrsGraphPtr ==
nullptr) {
7303 std::ostringstream os;
7304 os << *prefix <<
"src_filled || srcCrsGraph == nullptr" << endl;
7305 std::cerr << os.str();
7312 Kokkos::DualView<const local_ordinal_type*, buffer_device_type> noPermute;
7313 insertGlobalIndicesDevice(srcCrsGraph, tgtCrsGraph,
7314 noPermute, noPermute,
7318 std::ostringstream os;
7319 os << *prefix <<
"! src_filled && srcCrsGraph != nullptr" << endl;
7320 std::cerr << os.str();
7322 for (
size_t i = 0; i < numSameIDs; ++i, ++myid) {
7323 const GO gid = srcRowMap.getGlobalElement(myid);
7324 global_inds_host_view_type row;
7325 srcCrsGraph.getGlobalRowView(gid, row);
7326 tgtCrsGraph.insertGlobalIndices(gid, row.extent(0), row.data());
7333 auto permuteToLIDs_h = permuteToLIDs.view_host();
7334 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
7335 auto permuteToLIDs_d = permuteToLIDs.view_device();
7336 auto permuteFromLIDs_d = permuteFromLIDs.view_device();
7338 if (src_filled || srcCrsGraphPtr ==
nullptr) {
7339 insertGlobalIndicesDevice(
7344 static_cast<LO
>(permuteToLIDs_h.extent(0)));
7346 for (LO i = 0; i < static_cast<LO>(permuteToLIDs_h.extent(0)); ++i) {
7347 const GO mygid = tgtRowMap.getGlobalElement(permuteToLIDs_h[i]);
7348 const GO srcgid = srcRowMap.getGlobalElement(permuteFromLIDs_h[i]);
7349 global_inds_host_view_type row;
7350 srcCrsGraph.getGlobalRowView(srcgid, row);
7351 tgtCrsGraph.insertGlobalIndices(mygid, row.extent(0), row.data());
7356 std::ostringstream os;
7357 os << *prefix <<
"Done" << endl;
7358 std::cerr << os.str();
7370#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7372 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7373 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7374 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7375 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7376 CrsGraph<LO, GO, NODE>::node_type>& importer, \
7377 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7378 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7379 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7380 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7381 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7382 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7383 const Teuchos::RCP<Teuchos::ParameterList>& params);
7385#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7387 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7388 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7389 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7390 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7391 CrsGraph<LO, GO, NODE>::node_type>& rowImporter, \
7392 const Import<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7393 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7394 CrsGraph<LO, GO, NODE>::node_type>& domainImporter, \
7395 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7396 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7397 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7398 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7399 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7400 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7401 const Teuchos::RCP<Teuchos::ParameterList>& params);
7403#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7405 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7406 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7407 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7408 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7409 CrsGraph<LO, GO, NODE>::node_type>& exporter, \
7410 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7411 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7412 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7413 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7414 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7415 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7416 const Teuchos::RCP<Teuchos::ParameterList>& params);
7418#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7420 Teuchos::RCP<CrsGraph<LO, GO, NODE>> \
7421 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO, GO, NODE>>& sourceGraph, \
7422 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7423 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7424 CrsGraph<LO, GO, NODE>::node_type>& rowExporter, \
7425 const Export<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7426 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7427 CrsGraph<LO, GO, NODE>::node_type>& domainExporter, \
7428 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7429 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7430 CrsGraph<LO, GO, NODE>::node_type>>& domainMap, \
7431 const Teuchos::RCP<const Map<CrsGraph<LO, GO, NODE>::local_ordinal_type, \
7432 CrsGraph<LO, GO, NODE>::global_ordinal_type, \
7433 CrsGraph<LO, GO, NODE>::node_type>>& rangeMap, \
7434 const Teuchos::RCP<Teuchos::ParameterList>& params);
7436#define TPETRA_CRSGRAPH_INSTANT(LO, GO, NODE) \
7437 template class CrsGraph<LO, GO, NODE>; \
7438 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7439 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO, GO, NODE) \
7440 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE) \
7441 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO, GO, NODE)