3964 int isLocallyComplete = 1;
3968 std::ostringstream
os;
3970 std::cerr <<
os.str();
3974 isLocallyComplete = 0;
3977 std::ostringstream os;
3978 os << *prefix <<
"doExport from nonlocalMatrix" << endl;
3979 std::cerr << os.str();
3981 this->doExport(*nonlocalMatrix, exportToOrig,
Tpetra::ADD);
3985 std::ostringstream os;
3986 os << *prefix <<
"Original row Map is NOT 1-to-1" << endl;
3987 std::cerr << os.str();
3994 export_type exportToOneToOne(nonlocalRowMap, oneToOneRowMap);
3995 if (!exportToOneToOne.isLocallyComplete()) {
3996 isLocallyComplete = 0;
4004 std::ostringstream os;
4005 os << *prefix <<
"Create & doExport into 1-to-1 matrix"
4007 std::cerr << os.str();
4009 crs_matrix_type oneToOneMatrix(oneToOneRowMap, 0);
4011 oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4017 std::ostringstream os;
4018 os << *prefix <<
"Free nonlocalMatrix" << endl;
4019 std::cerr << os.str();
4021 nonlocalMatrix = Teuchos::null;
4025 std::ostringstream os;
4026 os << *prefix <<
"doImport from 1-to-1 matrix" << endl;
4027 std::cerr << os.str();
4029 import_type importToOrig(oneToOneRowMap,
origRowMap);
4030 this->doImport(oneToOneMatrix, importToOrig,
Tpetra::ADD);
4038 std::ostringstream os;
4039 os << *prefix <<
"Free nonlocals_ (std::map)" << endl;
4040 std::cerr << os.str();
4042 decltype(nonlocals_) newNonlocals;
4043 std::swap(nonlocals_, newNonlocals);
4052 int isGloballyComplete = 0;
4053 reduceAll<int, int>(*comm, REDUCE_MIN, isLocallyComplete,
4054 outArg(isGloballyComplete));
4055 TEUCHOS_TEST_FOR_EXCEPTION(isGloballyComplete != 1, std::runtime_error,
4056 "On at least one process, "
4057 "you called insertGlobalValues with a global row index which is not in "
4058 "the matrix's row Map on any process in its communicator.");
4061template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4064 if (!isStaticGraph()) {
4065 myGraph_->resumeFill(
params);
4068 applyHelper.reset();
4069 fillComplete_ =
false;
4072template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4075 return getCrsGraphRef().haveGlobalConstants();
4078template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4084 "getCrsGraph() returns null. This should not happen at this point. "
4085 "Please report this bug to the Tpetra developers.");
4088 if (this->isStaticGraph() &&
graph.isFillComplete()) {
4100template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4102 fillComplete(
const Teuchos::RCP<const map_type>& domainMap,
4103 const Teuchos::RCP<const map_type>&
rangeMap,
4104 const Teuchos::RCP<Teuchos::ParameterList>&
params) {
4108 using Teuchos::ArrayRCP;
4113 const bool verbose = Behavior::verbose(
"CrsMatrix");
4114 std::unique_ptr<std::string>
prefix;
4116 prefix = this->createPrefix(
"CrsMatrix",
"fillComplete(dom,ran,p)");
4117 std::ostringstream
os;
4119 std::cerr <<
os.str();
4122 "Tpetra::CrsMatrix::fillCompete",
4126 "Matrix fill state must be active (isFillActive() "
4127 "must be true) before you may call fillComplete().");
4128 const int numProcs = this->getComm()->getSize();
4146 if (
params->isParameter(
"sort column map ghost gids")) {
4148 }
else if (
params->isParameter(
"Sort column Map ghost GIDs")) {
4156 if (!this->myGraph_.is_null()) {
4157 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ =
sortGhosts;
4160 if (!this->getCrsGraphRef().indicesAreAllocated()) {
4161 if (this->hasColMap()) {
4162 allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
4164 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
4170 this->globalAssemble();
4174 "Cannot have nonlocal entries on a serial run. "
4175 "An invalid entry (i.e., with row index not in the row Map) must have "
4176 "been submitted to the CrsMatrix.");
4179 if (this->isStaticGraph()) {
4187#ifdef HAVE_TPETRA_DEBUG
4206 this->staticGraph_->getDomainMap()->isSameAs(*
domainMap);
4208 this->staticGraph_->getRangeMap()->isSameAs(*
rangeMap);
4211 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4212 "The graph cannot be changed because it was given to the CrsMatrix "
4213 "constructor as const. You can fix this by passing in the graph's "
4214 "domain Map and range Map to the matrix's fillComplete call.");
4217 "The CrsMatrix's range Map does not match the graph's range Map. "
4218 "The graph cannot be changed because it was given to the CrsMatrix "
4219 "constructor as const. You can fix this by passing in the graph's "
4220 "domain Map and range Map to the matrix's fillComplete call.");
4225 this->fillLocalMatrix(
params);
4235 Teuchos::Array<int> remotePIDs(0);
4238 this->myGraph_->makeColMap(remotePIDs);
4244 this->myGraph_->makeIndicesLocal(verbose);
4252 const bool sorted = this->myGraph_->isSorted();
4253 const bool merged = this->myGraph_->isMerged();
4263 this->fillLocalGraphAndMatrix(
params);
4266 params->get(
"compute global constants",
true);
4268 this->myGraph_->computeGlobalConstants();
4270 this->myGraph_->computeLocalConstants();
4272 this->myGraph_->fillComplete_ =
true;
4273 this->myGraph_->checkInternalState();
4278 this->fillComplete_ =
true;
4281 "Tpetra::CrsMatrix::fillCompete",
"checkInternalState");
4282 this->checkInternalState();
4286template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4289 const Teuchos::RCP<const map_type>&
rangeMap,
4290 const Teuchos::RCP<const import_type>&
importer,
4291 const Teuchos::RCP<const export_type>&
exporter,
4292 const Teuchos::RCP<Teuchos::ParameterList>&
params) {
4293#ifdef HAVE_TPETRA_MMM_TIMINGS
4296 label =
params->get(
"Timer Label", label);
4297 std::string
prefix = std::string(
"Tpetra ") + label + std::string(
": ");
4298 using Teuchos::TimeMonitor;
4300 Teuchos::TimeMonitor
all(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-all")));
4306 "Matrix fill state must be active (isFillActive() "
4307 "must be true) before calling fillComplete().");
4309 myGraph_.is_null(), std::logic_error,
"myGraph_ is null. This is not allowed.");
4312#ifdef HAVE_TPETRA_MMM_TIMINGS
4313 Teuchos::TimeMonitor
graph(*TimeMonitor::getNewTimer(
prefix + std::string(
"eSFC-M-Graph")));
4320#ifdef HAVE_TPETRA_MMM_TIMINGS
4324 fillLocalGraphAndMatrix(
params);
4329 fillComplete_ =
true;
4332#ifdef HAVE_TPETRA_DEBUG
4334 ": We're at the end of fillComplete(), but isFillActive() is true. "
4335 "Please report this bug to the Tpetra developers.");
4337 ": We're at the end of fillComplete(), but isFillActive() is true. "
4338 "Please report this bug to the Tpetra developers.");
4341#ifdef HAVE_TPETRA_MMM_TIMINGS
4342 Teuchos::TimeMonitor
cIS(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-M-cIS")));
4345 checkInternalState();
4349template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4382template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4385 using ::Tpetra::Details::ProfilingRegion;
4387 typedef typename Kokkos::View<LO*, device_type>::host_mirror_type::execution_space
4388 host_execution_space;
4389 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4390 const char tfecfFuncName[] =
"sortAndMergeIndicesAndValues: ";
4391 ProfilingRegion
regionSAM(
"Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4395 "Cannot sort or merge with "
4396 "\"static\" (const) graph, since the matrix does not own the graph.");
4398 "myGraph_ is null, but "
4399 "this matrix claims ! isStaticGraph(). "
4400 "Please report this bug to the Tpetra developers.");
4402 "It is invalid to call "
4403 "this method if the graph's storage has already been optimized. "
4404 "Please report this bug to the Tpetra developers.");
4407 const LO
lclNumRows =
static_cast<LO
>(this->getLocalNumRows());
4413 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
4414 auto cols_ =
graph.lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4415 Kokkos::parallel_reduce(
4416 "sortAndMergeIndicesAndValues", range_type(0,
lclNumRows),
4434 graph.indicesAreSorted_ =
true;
4437 graph.noRedundancies_ =
true;
4442template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4450 using Teuchos::rcp_const_cast;
4451 using Teuchos::rcpFromRef;
4453 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
4454 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
4488 (!
Y_in.isDistributed() && this->getComm()->getSize() != 1);
4505 if (!
X_in.isConstantStride()) {
4521 ProfilingRegion
regionImport(
"Tpetra::CrsMatrix::apply: Import");
4548 ProfilingRegion
regionExport(
"Tpetra::CrsMatrix::apply: Export");
4597 ProfilingRegion
regionReduce(
"Tpetra::CrsMatrix::apply: Reduce Y");
4602template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4606 const Teuchos::ETransp
mode,
4609 using Teuchos::null;
4612 using Teuchos::rcp_const_cast;
4613 using Teuchos::rcpFromRef;
4615 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
4661 if (importMV_ != Teuchos::null && importMV_->getNumVectors() !=
numVectors) {
4664 if (importMV_ ==
null) {
4669 if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() !=
numVectors) {
4672 if (exportMV_ ==
null) {
4680 ProfilingRegion
regionImport(
"Tpetra::CrsMatrix::apply (transpose): Import");
4689 ProfilingRegion
regionExport(
"Tpetra::CrsMatrix::apply (transpose): Export");
4696 importMV_->putScalar(
ZERO);
4715 if (!
Y_in.isConstantStride() ||
X.getRawPtr() == &
Y_in) {
4729 ProfilingRegion
regionReduce(
"Tpetra::CrsMatrix::apply (transpose): Reduce Y");
4734template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4738 const Teuchos::ETransp
mode,
4741 using Teuchos::NO_TRANS;
4745 auto X_lcl =
X.getLocalViewDevice(Access::ReadOnly);
4746 auto Y_lcl =
Y.getLocalViewDevice(Access::ReadWrite);
4752 "X.getNumVectors() = " <<
X.getNumVectors() <<
" != "
4753 "Y.getNumVectors() = "
4754 <<
Y.getNumVectors() <<
".");
4757 getColMap()->getLocalNumElements(),
4759 "NO_TRANS case: X has the wrong number of local rows. "
4760 "X.getLocalLength() = "
4761 <<
X.getLocalLength() <<
" != "
4762 "getColMap()->getLocalNumElements() = "
4763 << getColMap()->getLocalNumElements() <<
".");
4765 getRowMap()->getLocalNumElements(),
4767 "NO_TRANS case: Y has the wrong number of local rows. "
4768 "Y.getLocalLength() = "
4769 <<
Y.getLocalLength() <<
" != "
4770 "getRowMap()->getLocalNumElements() = "
4771 << getRowMap()->getLocalNumElements() <<
".");
4773 getRowMap()->getLocalNumElements(),
4775 "TRANS or CONJ_TRANS case: X has the wrong number of local "
4776 "rows. X.getLocalLength() = "
4777 <<
X.getLocalLength()
4778 <<
" != getRowMap()->getLocalNumElements() = "
4779 << getRowMap()->getLocalNumElements() <<
".");
4781 getColMap()->getLocalNumElements(),
4783 "TRANS or CONJ_TRANS case: X has the wrong number of local "
4784 "rows. Y.getLocalLength() = "
4785 <<
Y.getLocalLength()
4786 <<
" != getColMap()->getLocalNumElements() = "
4787 << getColMap()->getLocalNumElements() <<
".");
4789 "The matrix is not "
4790 "fill complete. You must call fillComplete() (possibly with "
4791 "domain and range Map arguments) without an intervening "
4792 "resumeFill() call before you may call this method.");
4794 std::runtime_error,
"X and Y must be constant stride.");
4800 std::runtime_error,
"X and Y may not alias one another.");
4803 auto A_lcl = getLocalMatrixDevice();
4805 if (!applyHelper.get()) {
4809#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
4815 if constexpr (std::is_same_v<execution_space, Kokkos::Cuda>) {
4819 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
4825 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map,
4826 useMergePath ? KokkosSparse::SPMV_MERGE_PATH : KokkosSparse::SPMV_DEFAULT);
4830 const char*
modeKK =
nullptr;
4832 case Teuchos::NO_TRANS:
4833 modeKK = KokkosSparse::NoTranspose;
4835 case Teuchos::TRANS:
4836 modeKK = KokkosSparse::Transpose;
4838 case Teuchos::CONJ_TRANS:
4839 modeKK = KokkosSparse::ConjugateTranspose;
4842 throw std::invalid_argument(
"Tpetra::CrsMatrix::localApply: invalid mode");
4845 if (applyHelper->shouldUseIntRowptrs()) {
4848 &applyHelper->handle_int,
modeKK,
4852 &applyHelper->handle,
modeKK,
4857template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4861 Teuchos::ETransp
mode,
4865 const char fnName[] =
"Tpetra::CrsMatrix::apply";
4868 fnName <<
": Cannot call apply() until fillComplete() "
4869 "has been called.");
4871 if (
mode == Teuchos::NO_TRANS) {
4875 ProfilingRegion
regionTranspose(
"Tpetra::CrsMatrix::apply (transpose)");
4880template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4882Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node>>
4890 "This matrix (the source "
4891 "of the conversion) is not fill complete. You must first call "
4892 "fillComplete() (possibly with the domain and range Map) without an "
4893 "intervening call to resumeFill(), before you may call this method.");
4898 using ::Tpetra::Details::copyConvert;
4899 copyConvert(
newMatrix->getLocalMatrixDevice().values,
4900 this->getLocalMatrixDevice().values);
4904 newMatrix->fillComplete(this->getDomainMap(), this->getRangeMap());
4909template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4916 "Internal state is not consistent. "
4917 "Please report this bug to the Tpetra developers.";
4926 std::logic_error,
err);
4929 std::logic_error,
err <<
" Specifically, the matrix is fill complete, "
4930 "but its graph is NOT fill complete.");
4934 staticGraph_->getLocalAllocationSize() > 0 &&
4935 staticGraph_->getLocalNumRows() > 0 &&
4936 valuesUnpacked_wdv.extent(0) == 0,
4937 std::logic_error,
err);
4941template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4945 std::ostringstream
os;
4947 os <<
"Tpetra::CrsMatrix (Kokkos refactor): {";
4951 if (isFillComplete()) {
4952 os <<
"isFillComplete: true"
4953 <<
", global dimensions: [" << getGlobalNumRows() <<
", "
4954 << getGlobalNumCols() <<
"]"
4955 <<
", global number of entries: " << getGlobalNumEntries()
4958 os <<
"isFillComplete: false"
4959 <<
", global dimensions: [" << getGlobalNumRows() <<
", "
4960 << getGlobalNumCols() <<
"]}";
4965template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4968 const Teuchos::EVerbosityLevel
verbLevel)
const {
4971 using Teuchos::ArrayView;
4972 using Teuchos::Comm;
4974 using Teuchos::TypeNameTraits;
4975 using Teuchos::VERB_DEFAULT;
4976 using Teuchos::VERB_EXTREME;
4977 using Teuchos::VERB_HIGH;
4978 using Teuchos::VERB_LOW;
4979 using Teuchos::VERB_MEDIUM;
4980 using Teuchos::VERB_NONE;
4992 const int myRank = comm->getRank();
4993 const int numProcs = comm->getSize();
4995 for (
size_t dec = 10;
dec < getGlobalNumRows();
dec *= 10) {
4998 width = std::max<size_t>(
width,
static_cast<size_t>(11)) + 2;
5008 out <<
"Tpetra::CrsMatrix (Kokkos refactor):" <<
endl;
5017 out <<
"Template parameters:" <<
endl;
5024 if (isFillComplete()) {
5025 out <<
"isFillComplete: true" <<
endl
5026 <<
"Global dimensions: [" << getGlobalNumRows() <<
", "
5027 << getGlobalNumCols() <<
"]" <<
endl
5028 <<
"Global number of entries: " << getGlobalNumEntries() <<
endl
5030 <<
"Global max number of entries in a row: "
5031 << getGlobalMaxNumRowEntries() <<
endl;
5033 out <<
"isFillComplete: false" <<
endl
5034 <<
"Global dimensions: [" << getGlobalNumRows() <<
", "
5035 << getGlobalNumCols() <<
"]" <<
endl;
5046 <<
"Row Map:" <<
endl;
5056 getRowMap()->describe(
out,
vl);
5061 out <<
"Column Map: ";
5067 }
else if (getColMap() == getRowMap()) {
5069 out <<
"same as row Map" <<
endl;
5075 getColMap()->describe(
out,
vl);
5080 out <<
"Domain Map: ";
5082 if (getDomainMap().
is_null()) {
5086 }
else if (getDomainMap() == getRowMap()) {
5088 out <<
"same as row Map" <<
endl;
5090 }
else if (getDomainMap() == getColMap()) {
5092 out <<
"same as column Map" <<
endl;
5098 getDomainMap()->describe(
out,
vl);
5103 out <<
"Range Map: ";
5105 if (getRangeMap().
is_null()) {
5109 }
else if (getRangeMap() == getDomainMap()) {
5111 out <<
"same as domain Map" <<
endl;
5113 }
else if (getRangeMap() == getRowMap()) {
5115 out <<
"same as row Map" <<
endl;
5121 getRangeMap()->describe(
out,
vl);
5129 if (!staticGraph_->indicesAreAllocated()) {
5130 out <<
"Graph indices not allocated" <<
endl;
5132 out <<
"Number of allocated entries: "
5133 << staticGraph_->getLocalAllocationSize() <<
endl;
5135 out <<
"Number of entries: " << getLocalNumEntries() <<
endl
5136 <<
"Max number of entries per row: " << getLocalMaxNumRowEntries()
5152 out << std::setw(
width) <<
"Proc Rank"
5153 << std::setw(
width) <<
"Global Row"
5154 << std::setw(
width) <<
"Num Entries";
5156 out << std::setw(
width) <<
"(Index,Value)";
5159 for (
size_t r = 0;
r < getLocalNumRows(); ++
r) {
5160 const size_t nE = getNumEntriesInLocalRow(
r);
5166 if (isGloballyIndexed()) {
5167 global_inds_host_view_type
rowinds;
5168 values_host_view_type
rowvals;
5170 for (
size_t j = 0;
j <
nE; ++
j) {
5175 }
else if (isLocallyIndexed()) {
5176 local_inds_host_view_type
rowinds;
5177 values_host_view_type
rowvals;
5179 for (
size_t j = 0;
j <
nE; ++
j) {
5180 out <<
" (" << getColMap()->getGlobalElement(
rowinds[
j])
5197template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5212template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5215 const typename crs_graph_type::padding_type&
padding,
5216 const bool verbose) {
5220 using LO = local_ordinal_type;
5221 using row_ptrs_type =
5222 typename local_graph_device_type::row_map_type::non_const_type;
5224 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5227 ". Please report this bug to the Tpetra developers.";
5228 ProfilingRegion
regionCAP(
"Tpetra::CrsMatrix::applyCrsPadding");
5230 std::unique_ptr<std::string>
prefix;
5233 std::ostringstream
os;
5237 std::cerr <<
os.str();
5239 const int myRank = !verbose ? -1 : [&]() {
5240 auto map = this->getMap();
5241 if (map.is_null()) {
5244 auto comm = map->getComm();
5245 if (comm.is_null()) {
5248 return comm->getRank();
5252 if (!myGraph_->indicesAreAllocated()) {
5254 std::ostringstream os;
5255 os << *prefix <<
"Call allocateIndices" << endl;
5256 std::cerr << os.str();
5258 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
5270 std::ostringstream os;
5271 os << *prefix <<
"Allocate row_ptrs_beg: "
5272 << myGraph_->getRowPtrsUnpackedHost().extent(0) << endl;
5273 std::cerr << os.str();
5275 using Kokkos::view_alloc;
5276 using Kokkos::WithoutInitializing;
5277 row_ptrs_type row_ptr_beg(view_alloc(
"row_ptr_beg", WithoutInitializing),
5278 myGraph_->rowPtrsUnpacked_dev_.extent(0));
5280 Kokkos::deep_copy(execution_space(), row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
5282 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) : size_t(row_ptr_beg.extent(0) - 1);
5284 std::ostringstream os;
5285 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
5286 std::cerr << os.str();
5288 row_ptrs_type row_ptr_end(
5289 view_alloc(
"row_ptr_end", WithoutInitializing), N);
5291 row_ptrs_type num_row_entries_d;
5293 const bool refill_num_row_entries =
5294 myGraph_->k_numRowEntries_.extent(0) != 0;
5296 if (refill_num_row_entries) {
5299 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
5300 myGraph_->k_numRowEntries_);
5301 Kokkos::parallel_for(
5302 "Fill end row pointers", range_policy(0, N),
5303 KOKKOS_LAMBDA(
const size_t i) {
5304 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
5310 Kokkos::parallel_for(
5311 "Fill end row pointers", range_policy(0, N),
5312 KOKKOS_LAMBDA(
const size_t i) {
5313 row_ptr_end(i) = row_ptr_beg(i + 1);
5317 if (myGraph_->isGloballyIndexed()) {
5319 myGraph_->gblInds_wdv,
5320 valuesUnpacked_wdv, padding, myRank, verbose);
5321 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5322 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
5323 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(newValuesLen != newColIndsLen, std::logic_error,
5324 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5325 <<
" != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
5329 myGraph_->lclIndsUnpacked_wdv,
5330 valuesUnpacked_wdv, padding, myRank, verbose);
5331 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5332 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
5333 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(newValuesLen != newColIndsLen, std::logic_error,
5334 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5335 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
5339 if (refill_num_row_entries) {
5340 Kokkos::parallel_for(
5341 "Fill num entries", range_policy(0, N),
5342 KOKKOS_LAMBDA(
const size_t i) {
5343 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
5345 Kokkos::deep_copy(myGraph_->k_numRowEntries_, num_row_entries_d);
5349 std::ostringstream os;
5350 os << *prefix <<
"Assign myGraph_->rowPtrsUnpacked_; "
5351 <<
"old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
5352 <<
", new size: " << row_ptr_beg.extent(0) << endl;
5353 std::cerr << os.str();
5354 TEUCHOS_ASSERT(myGraph_->getRowPtrsUnpackedHost().extent(0) ==
5355 row_ptr_beg.extent(0));
5357 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
5360template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5361void CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5362 copyAndPermuteStaticGraph(
5363 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5364 const size_t numSameIDs,
5365 const LocalOrdinal permuteToLIDs[],
5366 const LocalOrdinal permuteFromLIDs[],
5367 const size_t numPermutes) {
5368 using Details::ProfilingRegion;
5370 using Teuchos::Array;
5371 using Teuchos::ArrayView;
5372 using LO = LocalOrdinal;
5373 using GO = GlobalOrdinal;
5374 const char tfecfFuncName[] =
"copyAndPermuteStaticGraph";
5375 const char suffix[] =
5376 " Please report this bug to the Tpetra developers.";
5377 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
5381 std::unique_ptr<std::string> prefix;
5383 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5384 std::ostringstream os;
5385 os << *prefix <<
"Start" << endl;
5387 const char*
const prefix_raw =
5388 verbose ? prefix.get()->c_str() :
nullptr;
5390 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed();
5395 const map_type& srcRowMap = *(srcMat.getRowMap());
5396 nonconst_global_inds_host_view_type rowInds;
5397 nonconst_values_host_view_type rowVals;
5398 const LO numSameIDs_as_LID =
static_cast<LO
>(numSameIDs);
5399 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5403 const GO sourceGID = srcRowMap.getGlobalElement(sourceLID);
5404 const GO targetGID = sourceGID;
5406 ArrayView<const GO> rowIndsConstView;
5407 ArrayView<const Scalar> rowValsConstView;
5409 if (sourceIsLocallyIndexed) {
5410 const size_t rowLength = srcMat.getNumEntriesInGlobalRow(sourceGID);
5411 if (rowLength >
static_cast<size_t>(rowInds.size())) {
5412 Kokkos::resize(rowInds, rowLength);
5413 Kokkos::resize(rowVals, rowLength);
5417 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds, std::make_pair((
size_t)0, rowLength));
5418 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals, std::make_pair((
size_t)0, rowLength));
5423 size_t checkRowLength = 0;
5424 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5425 rowValsView, checkRowLength);
5427 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength, std::logic_error,
5430 << sourceGID <<
", the source "
5431 "matrix's getNumEntriesInGlobalRow returns a row length "
5433 << rowLength <<
", but getGlobalRowCopy reports "
5435 << checkRowLength <<
"." << suffix);
5442 rowIndsConstView = Teuchos::ArrayView<const GO>(
5443 rowIndsView.data(), rowIndsView.extent(0),
5444 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5445 rowValsConstView = Teuchos::ArrayView<const Scalar>(
5446 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5447 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5451 global_inds_host_view_type rowIndsView;
5452 values_host_view_type rowValsView;
5453 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5458 rowIndsConstView = Teuchos::ArrayView<const GO>(
5459 rowIndsView.data(), rowIndsView.extent(0),
5460 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5461 rowValsConstView = Teuchos::ArrayView<const Scalar>(
5462 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5463 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5470 combineGlobalValues(targetGID, rowIndsConstView,
5472 prefix_raw, debug, verbose);
5476 std::ostringstream os;
5477 os << *prefix <<
"Do permutes" << endl;
5480 const map_type& tgtRowMap = *(this->getRowMap());
5481 for (
size_t p = 0; p < numPermutes; ++p) {
5482 const GO sourceGID = srcRowMap.getGlobalElement(permuteFromLIDs[p]);
5483 const GO targetGID = tgtRowMap.getGlobalElement(permuteToLIDs[p]);
5485 ArrayView<const GO> rowIndsConstView;
5486 ArrayView<const Scalar> rowValsConstView;
5488 if (sourceIsLocallyIndexed) {
5489 const size_t rowLength = srcMat.getNumEntriesInGlobalRow(sourceGID);
5490 if (rowLength >
static_cast<size_t>(rowInds.size())) {
5491 Kokkos::resize(rowInds, rowLength);
5492 Kokkos::resize(rowVals, rowLength);
5496 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds, std::make_pair((
size_t)0, rowLength));
5497 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals, std::make_pair((
size_t)0, rowLength));
5502 size_t checkRowLength = 0;
5503 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5504 rowValsView, checkRowLength);
5506 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength, std::logic_error,
5508 "source matrix global row index "
5509 << sourceGID <<
", "
5510 "getNumEntriesInGlobalRow returns a row length of "
5511 << rowLength <<
", but getGlobalRowCopy a row length of "
5512 << checkRowLength <<
"." << suffix);
5519 rowIndsConstView = Teuchos::ArrayView<const GO>(
5520 rowIndsView.data(), rowIndsView.extent(0),
5521 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5522 rowValsConstView = Teuchos::ArrayView<const Scalar>(
5523 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5524 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5528 global_inds_host_view_type rowIndsView;
5529 values_host_view_type rowValsView;
5530 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5535 rowIndsConstView = Teuchos::ArrayView<const GO>(
5536 rowIndsView.data(), rowIndsView.extent(0),
5537 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5538 rowValsConstView = Teuchos::ArrayView<const Scalar>(
5539 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5540 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5545 combineGlobalValues(targetGID, rowIndsConstView,
5547 prefix_raw, debug, verbose);
5551 std::ostringstream os;
5552 os << *prefix <<
"Done" << endl;
5556template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5557void CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5558 copyAndPermuteNonStaticGraph(
5559 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5560 const size_t numSameIDs,
5561 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
5562 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
5563 const size_t numPermutes) {
5564 using Details::ProfilingRegion;
5566 using Teuchos::Array;
5567 using Teuchos::ArrayView;
5568 using LO = LocalOrdinal;
5569 using GO = GlobalOrdinal;
5570 const char tfecfFuncName[] =
"copyAndPermuteNonStaticGraph";
5571 const char suffix[] =
5572 " Please report this bug to the Tpetra developers.";
5573 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
5577 std::unique_ptr<std::string> prefix;
5579 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5580 std::ostringstream os;
5581 os << *prefix <<
"Start" << endl;
5583 const char*
const prefix_raw =
5584 verbose ? prefix.get()->c_str() :
nullptr;
5587 using row_graph_type = RowGraph<LO, GO, Node>;
5588 const row_graph_type& srcGraph = *(srcMat.getGraph());
5590 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
5591 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
5592 applyCrsPadding(*padding, verbose);
5594 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed();
5599 const map_type& srcRowMap = *(srcMat.getRowMap());
5600 const LO numSameIDs_as_LID =
static_cast<LO
>(numSameIDs);
5601 using gids_type = nonconst_global_inds_host_view_type;
5602 using vals_type = nonconst_values_host_view_type;
5605 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5609 const GO sourceGID = srcRowMap.getGlobalElement(sourceLID);
5610 const GO targetGID = sourceGID;
5612 ArrayView<const GO> rowIndsConstView;
5613 ArrayView<const Scalar> rowValsConstView;
5615 if (sourceIsLocallyIndexed) {
5616 const size_t rowLength = srcMat.getNumEntriesInGlobalRow(sourceGID);
5617 if (rowLength >
static_cast<size_t>(rowInds.extent(0))) {
5618 Kokkos::resize(rowInds, rowLength);
5619 Kokkos::resize(rowVals, rowLength);
5623 gids_type rowIndsView = Kokkos::subview(rowInds, std::make_pair((
size_t)0, rowLength));
5624 vals_type rowValsView = Kokkos::subview(rowVals, std::make_pair((
size_t)0, rowLength));
5629 size_t checkRowLength = 0;
5630 srcMat.getGlobalRowCopy(sourceGID, rowIndsView, rowValsView,
5633 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength, std::logic_error,
5636 << sourceGID <<
", the source "
5637 "matrix's getNumEntriesInGlobalRow returns a row length "
5639 << rowLength <<
", but getGlobalRowCopy reports "
5641 << checkRowLength <<
"." << suffix);
5643 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5644 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar*
>(rowValsView.data()), rowLength);
5646 global_inds_host_view_type rowIndsView;
5647 values_host_view_type rowValsView;
5648 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5654 rowIndsConstView = Teuchos::ArrayView<const GO>(
5655 rowIndsView.data(), rowIndsView.extent(0),
5656 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5657 rowValsConstView = Teuchos::ArrayView<const Scalar>(
5658 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5659 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5665 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5666 rowValsConstView, prefix_raw, debug, verbose);
5670 std::ostringstream os;
5671 os << *prefix <<
"Do permutes" << endl;
5673 const LO*
const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
5674 const LO*
const permuteToLIDs = permuteToLIDs_dv.view_host().data();
5676 const map_type& tgtRowMap = *(this->getRowMap());
5677 for (
size_t p = 0; p < numPermutes; ++p) {
5678 const GO sourceGID = srcRowMap.getGlobalElement(permuteFromLIDs[p]);
5679 const GO targetGID = tgtRowMap.getGlobalElement(permuteToLIDs[p]);
5681 ArrayView<const GO> rowIndsConstView;
5682 ArrayView<const Scalar> rowValsConstView;
5684 if (sourceIsLocallyIndexed) {
5685 const size_t rowLength = srcMat.getNumEntriesInGlobalRow(sourceGID);
5686 if (rowLength >
static_cast<size_t>(rowInds.extent(0))) {
5687 Kokkos::resize(rowInds, rowLength);
5688 Kokkos::resize(rowVals, rowLength);
5692 gids_type rowIndsView = Kokkos::subview(rowInds, std::make_pair((
size_t)0, rowLength));
5693 vals_type rowValsView = Kokkos::subview(rowVals, std::make_pair((
size_t)0, rowLength));
5698 size_t checkRowLength = 0;
5699 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5700 rowValsView, checkRowLength);
5702 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength, std::logic_error,
5704 "source matrix global row index "
5705 << sourceGID <<
", "
5706 "getNumEntriesInGlobalRow returns a row length of "
5707 << rowLength <<
", but getGlobalRowCopy a row length of "
5708 << checkRowLength <<
"." << suffix);
5710 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5711 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar*
>(rowValsView.data()), rowLength);
5713 global_inds_host_view_type rowIndsView;
5714 values_host_view_type rowValsView;
5715 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5721 rowIndsConstView = Teuchos::ArrayView<const GO>(
5722 rowIndsView.data(), rowIndsView.extent(0),
5723 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5724 rowValsConstView = Teuchos::ArrayView<const Scalar>(
5725 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5726 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5732 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5733 rowValsConstView, prefix_raw, debug, verbose);
5737 std::ostringstream os;
5738 os << *prefix <<
"Done" << endl;
5742template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5746 const size_t numSameIDs,
5747 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
permuteToLIDs,
5748 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
permuteFromLIDs,
5757 ProfilingRegion
regionCAP(
"Tpetra::CrsMatrix::copyAndPermute");
5759 const bool verbose = Behavior::verbose(
"CrsMatrix");
5760 std::unique_ptr<std::string>
prefix;
5762 prefix = this->createPrefix(
"CrsMatrix",
"copyAndPermute");
5763 std::ostringstream
os;
5765 << *
prefix <<
" numSameIDs: " << numSameIDs <<
endl
5775 <<
"isStaticGraph: " << (isStaticGraph() ?
"true" :
"false")
5777 std::cerr <<
os.str();
5782 std::invalid_argument,
"permuteToLIDs.extent(0) = " <<
numPermute <<
"!= permuteFromLIDs.extent(0) = " <<
permuteFromLIDs.extent(0) <<
".");
5788 if (isStaticGraph()) {
5794 copyAndPermuteStaticGraph(
srcMat, numSameIDs,
5804 std::ostringstream
os;
5806 std::cerr <<
os.str();
5810template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5813 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
exportLIDs,
5814 Kokkos::DualView<char*, buffer_device_type>& exports,
5821 using Teuchos::outArg;
5822 using Teuchos::REDUCE_MAX;
5823 using Teuchos::reduceAll;
5827 ProfilingRegion
regionPAP(
"Tpetra::CrsMatrix::packAndPrepare");
5829 const bool debug = Behavior::debug(
"CrsMatrix");
5830 const bool verbose = Behavior::verbose(
"CrsMatrix");
5833 Teuchos::RCP<const Teuchos::Comm<int>>
pComm = this->getComm();
5834 if (
pComm.is_null()) {
5837 const Teuchos::Comm<int>& comm = *pComm;
5838 const int myRank = comm.getSize();
5840 std::unique_ptr<std::string> prefix;
5842 prefix = this->createPrefix(
"CrsMatrix",
"packAndPrepare");
5843 std::ostringstream os;
5844 os << *prefix <<
"Start" << endl
5846 << dualViewStatusToString(exportLIDs,
"exportLIDs")
5849 << dualViewStatusToString(exports,
"exports")
5852 << dualViewStatusToString(numPacketsPerLID,
"numPacketsPerLID")
5854 std::cerr << os.str();
5877 std::ostringstream msg;
5880 using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
5881 const crs_matrix_type* srcCrsMat =
5882 dynamic_cast<const crs_matrix_type*
>(&source);
5883 if (srcCrsMat !=
nullptr) {
5885 std::ostringstream os;
5886 os << *prefix <<
"Source matrix same (CrsMatrix) type as target; "
5889 std::cerr << os.str();
5892 srcCrsMat->packNew(exportLIDs, exports, numPacketsPerLID,
5893 constantNumPackets);
5894 }
catch (std::exception& e) {
5896 msg <<
"Proc " << myRank <<
": " << e.what() << std::endl;
5899 using Kokkos::HostSpace;
5900 using Kokkos::subview;
5901 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
5902 using range_type = Kokkos::pair<size_t, size_t>;
5905 std::ostringstream os;
5906 os << *prefix <<
"Source matrix NOT same (CrsMatrix) type as target"
5908 std::cerr << os.str();
5911 const row_matrix_type* srcRowMat =
5912 dynamic_cast<const row_matrix_type*
>(&source);
5913 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(srcRowMat ==
nullptr, std::invalid_argument,
5914 "The source object of the Import or Export operation is neither a "
5915 "CrsMatrix (with the same template parameters as the target object), "
5916 "nor a RowMatrix (with the same first four template parameters as the "
5927 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
5928 auto exportLIDs_h = exportLIDs.view_host();
5929 Teuchos::ArrayView<const LO> exportLIDs_av(exportLIDs_h.data(),
5930 exportLIDs_h.size());
5934 Teuchos::Array<char> exports_a;
5940 numPacketsPerLID.clear_sync_state();
5941 numPacketsPerLID.modify_host();
5942 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5943 Teuchos::ArrayView<size_t> numPacketsPerLID_av(numPacketsPerLID_h.data(),
5944 numPacketsPerLID_h.size());
5949 srcRowMat->pack(exportLIDs_av, exports_a, numPacketsPerLID_av,
5950 constantNumPackets);
5951 }
catch (std::exception& e) {
5953 msg <<
"Proc " << myRank <<
": " << e.what() << std::endl;
5957 const size_t newAllocSize =
static_cast<size_t>(exports_a.size());
5958 if (
static_cast<size_t>(exports.extent(0)) < newAllocSize) {
5959 const std::string oldLabel = exports.view_device().label();
5960 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
5961 exports = exports_type(newLabel, newAllocSize);
5966 exports.modify_host();
5968 auto exports_h = exports.view_host();
5969 auto exports_h_sub = subview(exports_h, range_type(0, newAllocSize));
5973 typedef typename exports_type::t_host::execution_space HES;
5974 typedef Kokkos::Device<HES, HostSpace> host_device_type;
5975 Kokkos::View<const char*, host_device_type>
5976 exports_a_kv(exports_a.getRawPtr(), newAllocSize);
5978 Kokkos::deep_copy(exports_h_sub, exports_a_kv);
5983 reduceAll<int, int>(comm, REDUCE_MAX, lclBad, outArg(gblBad));
5986 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::logic_error,
5987 "packNew() or pack() threw an exception on "
5988 "one or more participating processes.");
5991 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(lclBad != 0, std::logic_error,
5992 "packNew threw an exception on one "
5993 "or more participating processes. Here is this process' error "
5999 std::ostringstream os;
6000 os << *prefix <<
"packAndPrepare: Done!" << endl
6010 std::cerr << os.str();
6014template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6016CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6017 packRow(
char exports[],
6018 const size_t offset,
6019 const size_t numEnt,
6020 const GlobalOrdinal gidsIn[],
6021 const impl_scalar_type valsIn[],
6022 const size_t numBytesPerValue)
const {
6023 using Kokkos::subview;
6026 typedef LocalOrdinal LO;
6027 typedef GlobalOrdinal GO;
6028 typedef impl_scalar_type ST;
6036 const LO numEntLO =
static_cast<size_t>(numEnt);
6038 const size_t numEntBeg = offset;
6039 const size_t numEntLen = PackTraits<LO>::packValueCount(numEntLO);
6040 const size_t gidsBeg = numEntBeg + numEntLen;
6041 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount(gid);
6042 const size_t valsBeg = gidsBeg + gidsLen;
6043 const size_t valsLen = numEnt * numBytesPerValue;
6045 char*
const numEntOut = exports + numEntBeg;
6046 char*
const gidsOut = exports + gidsBeg;
6047 char*
const valsOut = exports + valsBeg;
6049 size_t numBytesOut = 0;
6051 numBytesOut += PackTraits<LO>::packValue(numEntOut, numEntLO);
6054 Kokkos::pair<int, size_t> p;
6055 p = PackTraits<GO>::packArray(gidsOut, gidsIn, numEnt);
6056 errorCode += p.first;
6057 numBytesOut += p.second;
6059 p = PackTraits<ST>::packArray(valsOut, valsIn, numEnt);
6060 errorCode += p.first;
6061 numBytesOut += p.second;
6064 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6065 TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != expectedNumBytes, std::logic_error,
6068 << numBytesOut <<
" != expectedNumBytes = "
6069 << expectedNumBytes <<
".");
6070 TEUCHOS_TEST_FOR_EXCEPTION(errorCode != 0, std::runtime_error,
6072 "PackTraits::packArray returned a nonzero error code");
6077template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6079CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6080 unpackRow(GlobalOrdinal gidsOut[],
6081 impl_scalar_type valsOut[],
6082 const char imports[],
6083 const size_t offset,
6084 const size_t numBytes,
6085 const size_t numEnt,
6086 const size_t numBytesPerValue) {
6087 using Kokkos::subview;
6090 typedef LocalOrdinal LO;
6091 typedef GlobalOrdinal GO;
6092 typedef impl_scalar_type ST;
6094 Details::ProfilingRegion region_upack_row(
6095 "Tpetra::CrsMatrix::unpackRow",
6098 if (numBytes == 0) {
6101 const int myRank = this->getMap()->getComm()->getRank();
6102 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6103 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6104 "number of entries to unpack (as reported by numPacketsPerLID) "
6105 "for this row numEnt="
6106 << numEnt <<
" != 0.");
6111 if (numEnt == 0 && numBytes != 0) {
6112 const int myRank = this->getMap()->getComm()->getRank();
6113 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6114 "unpackRow: The number of entries to unpack (as reported by "
6115 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6117 << numBytes <<
" != 0.");
6123 const size_t numEntBeg = offset;
6124 const size_t numEntLen = PackTraits<LO>::packValueCount(lid);
6125 const size_t gidsBeg = numEntBeg + numEntLen;
6126 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount(gid);
6127 const size_t valsBeg = gidsBeg + gidsLen;
6128 const size_t valsLen = numEnt * numBytesPerValue;
6130 const char*
const numEntIn = imports + numEntBeg;
6131 const char*
const gidsIn = imports + gidsBeg;
6132 const char*
const valsIn = imports + valsBeg;
6134 size_t numBytesOut = 0;
6137 numBytesOut += PackTraits<LO>::unpackValue(numEntOut, numEntIn);
6138 if (
static_cast<size_t>(numEntOut) != numEnt ||
6139 numEntOut ==
static_cast<LO
>(0)) {
6140 const int myRank = this->getMap()->getComm()->getRank();
6141 std::ostringstream os;
6142 os <<
"(Proc " << myRank <<
") CrsMatrix::unpackRow: ";
6143 bool firstErrorCondition =
false;
6144 if (
static_cast<size_t>(numEntOut) != numEnt) {
6145 os <<
"Number of entries from numPacketsPerLID numEnt=" << numEnt
6146 <<
" does not equal number of entries unpacked from imports "
6148 << numEntOut <<
".";
6149 firstErrorCondition =
true;
6151 if (numEntOut ==
static_cast<LO
>(0)) {
6152 if (firstErrorCondition) {
6155 os <<
"Number of entries unpacked from imports buffer numEntOut=0, "
6156 "but number of bytes to unpack for this row numBytes="
6158 <<
" != 0. This should never happen, since packRow should only "
6159 "ever pack rows with a nonzero number of entries. In this case, "
6160 "the number of entries from numPacketsPerLID is numEnt="
6164 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, os.str());
6168 Kokkos::pair<int, size_t> p;
6169 p = PackTraits<GO>::unpackArray(gidsOut, gidsIn, numEnt);
6170 errorCode += p.first;
6171 numBytesOut += p.second;
6173 p = PackTraits<ST>::unpackArray(valsOut, valsIn, numEnt);
6174 errorCode += p.first;
6175 numBytesOut += p.second;
6178 TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = " << numBytesOut <<
" != numBytes = " << numBytes <<
".");
6180 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6181 TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != expectedNumBytes, std::logic_error,
6184 << numBytesOut <<
" != expectedNumBytes = "
6185 << expectedNumBytes <<
".");
6187 TEUCHOS_TEST_FOR_EXCEPTION(errorCode != 0, std::runtime_error,
6189 "PackTraits::unpackArray returned a nonzero error code");
6194template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6195void CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6196 allocatePackSpaceNew(Kokkos::DualView<char*, buffer_device_type>& exports,
6197 size_t& totalNumEntries,
6198 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs)
const {
6199 using Details::Behavior;
6202 typedef impl_scalar_type IST;
6203 typedef LocalOrdinal LO;
6204 typedef GlobalOrdinal GO;
6210 const bool verbose = Behavior::verbose(
"CrsMatrix");
6211 std::unique_ptr<std::string> prefix;
6213 prefix = this->
createPrefix(
"CrsMatrix",
"allocatePackSpaceNew");
6214 std::ostringstream os;
6215 os << *prefix <<
"Before:"
6223 std::cerr << os.str();
6228 const LO numExportLIDs =
static_cast<LO
>(exportLIDs.extent(0));
6230 TEUCHOS_ASSERT(!exportLIDs.need_sync_host());
6231 auto exportLIDs_h = exportLIDs.view_host();
6234 totalNumEntries = 0;
6235 for (LO i = 0; i < numExportLIDs; ++i) {
6236 const LO lclRow = exportLIDs_h[i];
6237 size_t curNumEntries = this->getNumEntriesInLocalRow(lclRow);
6240 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid()) {
6243 totalNumEntries += curNumEntries;
6254 const size_t allocSize =
6255 static_cast<size_t>(numExportLIDs) *
sizeof(LO) +
6256 totalNumEntries * (
sizeof(IST) +
sizeof(GO));
6257 if (
static_cast<size_t>(exports.extent(0)) < allocSize) {
6258 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6260 const std::string oldLabel = exports.view_device().label();
6261 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6262 exports = exports_type(newLabel, allocSize);
6266 std::ostringstream os;
6267 os << *prefix <<
"After:"
6275 std::cerr << os.str();
6279template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6281 packNew(
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
exportLIDs,
6282 Kokkos::DualView<char*, buffer_device_type>& exports,
6287 if (this->isStaticGraph()) {
6288 using ::Tpetra::Details::packCrsMatrixNew;
6297template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6300 Kokkos::DualView<char*, buffer_device_type>& exports,
6311 using ST = impl_scalar_type;
6314 const bool verbose = Behavior::verbose(
"CrsMatrix");
6315 std::unique_ptr<std::string>
prefix;
6317 prefix = this->createPrefix(
"CrsMatrix",
"packNonStaticNew");
6318 std::ostringstream
os;
6320 std::cerr <<
os.str();
6323 const size_t numExportLIDs =
static_cast<size_t>(exportLIDs.extent(0));
6324 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numExportLIDs !=
static_cast<size_t>(numPacketsPerLID.extent(0)),
6325 std::invalid_argument,
"exportLIDs.size() = " << numExportLIDs <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.extent(0) <<
".");
6330 constantNumPackets = 0;
6335 size_t totalNumEntries = 0;
6336 this->allocatePackSpaceNew(exports, totalNumEntries, exportLIDs);
6337 const size_t bufSize =
static_cast<size_t>(exports.extent(0));
6340 exports.clear_sync_state();
6341 exports.modify_host();
6342 auto exports_h = exports.view_host();
6344 std::ostringstream os;
6345 os << *prefix <<
"After marking exports as modified on host, "
6346 << dualViewStatusToString(exports,
"exports") << endl;
6347 std::cerr << os.str();
6351 auto exportLIDs_h = exportLIDs.view_host();
6354 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->clear_sync_state();
6355 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->modify_host();
6356 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6361 auto maxRowNumEnt = this->getLocalMaxNumRowEntries();
6364 typename global_inds_host_view_type::non_const_type gidsIn_k;
6365 if (this->isLocallyIndexed()) {
6367 typename global_inds_host_view_type::non_const_type(
"packGids",
6372 for (
size_t i = 0; i < numExportLIDs; ++i) {
6373 const LO lclRow = exportLIDs_h[i];
6375 size_t numBytes = 0;
6376 size_t numEnt = this->getNumEntriesInLocalRow(lclRow);
6383 numPacketsPerLID_h[i] = 0;
6387 if (this->isLocallyIndexed()) {
6388 typename global_inds_host_view_type::non_const_type gidsIn;
6389 values_host_view_type valsIn;
6393 local_inds_host_view_type lidsIn;
6394 this->getLocalRowView(lclRow, lidsIn, valsIn);
6395 const map_type&
colMap = *(this->getColMap());
6396 for (
size_t k = 0; k < numEnt; ++k) {
6397 gidsIn_k[k] =
colMap.getGlobalElement(lidsIn[k]);
6399 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0), GO(numEnt)));
6401 const size_t numBytesPerValue =
6402 PackTraits<ST>::packValueCount(valsIn[0]);
6403 numBytes = this->
packRow(exports_h.data(), offset, numEnt,
6404 gidsIn.data(), valsIn.data(),
6406 }
else if (this->isGloballyIndexed()) {
6407 global_inds_host_view_type gidsIn;
6408 values_host_view_type valsIn;
6414 const map_type&
rowMap = *(this->getRowMap());
6415 const GO gblRow =
rowMap.getGlobalElement(lclRow);
6416 this->getGlobalRowView(gblRow, gidsIn, valsIn);
6418 const size_t numBytesPerValue =
6419 PackTraits<ST>::packValueCount(valsIn[0]);
6420 numBytes = this->
packRow(exports_h.data(), offset, numEnt,
6421 gidsIn.data(), valsIn.data(),
6428 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(offset > bufSize || offset + numBytes > bufSize, std::logic_error,
6429 "First invalid offset into 'exports' pack buffer at index i = " << i
6430 <<
". exportLIDs_h[i]: " << exportLIDs_h[i] <<
", bufSize: " << bufSize <<
", offset: " << offset <<
", numBytes: " << numBytes <<
".");
6434 numPacketsPerLID_h[i] = numBytes;
6439 std::ostringstream os;
6440 os << *prefix <<
"Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
6447 std::cerr << os.str();
6451template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6453CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6454 combineGlobalValuesRaw(
const LocalOrdinal lclRow,
6455 const LocalOrdinal numEnt,
6456 const impl_scalar_type vals[],
6457 const GlobalOrdinal cols[],
6459 const char*
const prefix,
6461 const bool verbose) {
6462 using GO = GlobalOrdinal;
6466 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
6467 Teuchos::ArrayView<const GO> cols_av(numEnt == 0 ?
nullptr : cols, numEnt);
6468 Teuchos::ArrayView<const Scalar> vals_av(numEnt == 0 ?
nullptr : reinterpret_cast<const Scalar*>(vals), numEnt);
6473 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
6474 prefix, debug, verbose);
6478template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6479void CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6480 combineGlobalValues(
6481 const GlobalOrdinal globalRowIndex,
6482 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
6483 const Teuchos::ArrayView<const Scalar>& values,
6485 const char*
const prefix,
6487 const bool verbose) {
6488 const char tfecfFuncName[] =
"combineGlobalValues: ";
6490 if (isStaticGraph()) {
6494 if (combineMode ==
ADD) {
6495 sumIntoGlobalValues(globalRowIndex, columnIndices, values);
6496 }
else if (combineMode ==
REPLACE) {
6497 replaceGlobalValues(globalRowIndex, columnIndices, values);
6498 }
else if (combineMode ==
ABSMAX) {
6499 using ::Tpetra::Details::AbsMax;
6501 this->
template transformGlobalValues<AbsMax<Scalar>>(globalRowIndex,
6504 }
else if (combineMode ==
INSERT) {
6505 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isStaticGraph() && combineMode ==
INSERT,
6506 std::invalid_argument,
6507 "INSERT combine mode is forbidden "
6508 "if the matrix has a static (const) graph (i.e., was "
6509 "constructed with the CrsMatrix constructor that takes a "
6510 "const CrsGraph pointer).");
6512 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::logic_error,
6513 "Invalid combine mode; should "
6515 "Please report this bug to the Tpetra developers.");
6518 if (combineMode ==
ADD || combineMode ==
INSERT) {
6525 insertGlobalValuesFilteredChecked(globalRowIndex,
6526 columnIndices, values, prefix, debug, verbose);
6537 else if (combineMode ==
ABSMAX) {
6538 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6539 !isStaticGraph() && combineMode ==
ABSMAX, std::logic_error,
6540 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6542 }
else if (combineMode ==
REPLACE) {
6543 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6544 !isStaticGraph() && combineMode ==
REPLACE, std::logic_error,
6545 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6548 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6549 true, std::logic_error,
6550 "Should never get here! Please report this "
6551 "bug to the Tpetra developers.");
6556template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6559 Kokkos::DualView<char*, buffer_device_type> imports,
6568 ProfilingRegion
regionUAC(
"Tpetra::CrsMatrix::unpackAndCombine");
6570 const bool debug = Behavior::debug(
"CrsMatrix");
6571 const bool verbose = Behavior::verbose(
"CrsMatrix");
6576 {
"ADD",
"REPLACE",
"ABSMAX",
"INSERT",
"ZERO"};
6578 std::unique_ptr<std::string>
prefix;
6580 prefix = this->createPrefix(
"CrsMatrix",
"unpackAndCombine");
6581 std::ostringstream
os;
6584 << dualViewStatusToString(
importLIDs,
"importLIDs")
6587 << dualViewStatusToString(imports,
"imports")
6596 std::cerr <<
os.str();
6602 std::ostringstream
os;
6603 os <<
"Invalid combine mode. Valid modes are {";
6614 std::invalid_argument,
"importLIDs.extent(0)=" <<
importLIDs.extent(0) <<
" != numPacketsPerLID.extent(0)=" <<
numPacketsPerLID.extent(0) <<
".");
6622 using Teuchos::reduceAll;
6623 std::unique_ptr<std::ostringstream>
msg(
new std::ostringstream());
6629 }
catch (std::exception&
e) {
6634 const Teuchos::Comm<int>& comm = *(this->getComm());
6642 std::ostringstream
os;
6643 os <<
"Proc " << comm.getRank() <<
": " <<
msg->str() <<
endl;
6644 msg = std::unique_ptr<std::ostringstream>(
new std::ostringstream());
6647 <<
"unpackAndCombineImpl "
6648 "threw an exception on one or more participating processes: "
6659 std::ostringstream
os;
6662 << dualViewStatusToString(
importLIDs,
"importLIDs")
6665 << dualViewStatusToString(imports,
"imports")
6670 std::cerr <<
os.str();
6674template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6677 const Kokkos::DualView<
const local_ordinal_type*,
6679 Kokkos::DualView<char*, buffer_device_type> imports,
6683 const bool verbose) {
6685 "Tpetra::CrsMatrix::unpackAndCombineImpl",
6689 std::unique_ptr<std::string>
prefix;
6692 std::ostringstream
os;
6693 os << *
prefix <<
"isStaticGraph(): "
6694 << (isStaticGraph() ?
"true" :
"false")
6695 <<
", importLIDs.extent(0): "
6697 <<
", imports.extent(0): "
6698 << imports.extent(0)
6699 <<
", numPacketsPerLID.extent(0): "
6702 std::cerr <<
os.str();
6705 if (isStaticGraph()) {
6706 using Details::unpackCrsMatrixAndCombineNew;
6707 unpackCrsMatrixAndCombineNew(*
this, imports, numPacketsPerLID,
6708 importLIDs, constantNumPackets,
6712 using padding_type =
typename crs_graph_type::padding_type;
6713 std::unique_ptr<padding_type> padding;
6715 padding = myGraph_->computePaddingForCrsMatrixUnpack(
6716 importLIDs, imports, numPacketsPerLID, verbose);
6717 }
catch (std::exception& e) {
6718 const auto rowMap = getRowMap();
6719 const auto comm =
rowMap.is_null() ? Teuchos::null :
rowMap->getComm();
6720 const int myRank = comm.is_null() ? -1 : comm->getRank();
6721 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"Proc " << myRank <<
": "
6722 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
6723 "threw an exception: "
6727 std::ostringstream os;
6728 os << *prefix <<
"Call applyCrsPadding" << endl;
6729 std::cerr << os.str();
6731 applyCrsPadding(*padding, verbose);
6734 std::ostringstream os;
6735 os << *prefix <<
"Call unpackAndCombineImplNonStatic" << endl;
6736 std::cerr << os.str();
6738 unpackAndCombineImplNonStatic(importLIDs, imports,
6745 std::ostringstream os;
6746 os << *prefix <<
"Done" << endl;
6747 std::cerr << os.str();
6751template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6752void CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6753 unpackAndCombineImplNonStatic(
6754 const Kokkos::DualView<
const local_ordinal_type*,
6755 buffer_device_type>& importLIDs,
6756 Kokkos::DualView<char*, buffer_device_type> imports,
6757 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6758 const size_t constantNumPackets,
6760 using Details::Behavior;
6763 using Details::PackTraits;
6764 using Details::ScalarViewTraits;
6765 using Kokkos::MemoryUnmanaged;
6766 using Kokkos::subview;
6769 using LO = LocalOrdinal;
6770 using GO = GlobalOrdinal;
6771 using ST = impl_scalar_type;
6772 using size_type =
typename Teuchos::ArrayView<LO>::size_type;
6774 typename View<int*, device_type>::host_mirror_type::execution_space;
6775 using pair_type = std::pair<typename View<int*, HES>::size_type,
6776 typename View<int*, HES>::size_type>;
6777 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
6778 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
6779 const char tfecfFuncName[] =
"unpackAndCombineImplNonStatic";
6781 const bool debug = Behavior::debug(
"CrsMatrix");
6782 const bool verbose = Behavior::verbose(
"CrsMatrix");
6783 std::unique_ptr<std::string> prefix;
6785 prefix = this->
createPrefix(
"CrsMatrix", tfecfFuncName);
6786 std::ostringstream os;
6787 os << *prefix << endl;
6788 std::cerr << os.str();
6790 const char*
const prefix_raw =
6791 verbose ? prefix.get()->c_str() :
nullptr;
6793 const size_type numImportLIDs = importLIDs.extent(0);
6794 if (combineMode ==
ZERO || numImportLIDs == 0) {
6798 Details::ProfilingRegion region_unpack_and_combine_impl_non_static(
6799 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
6803 if (imports.need_sync_host()) {
6804 imports.sync_host();
6806 auto imports_h = imports.view_host();
6809 if (numPacketsPerLID.need_sync_host()) {
6810 numPacketsPerLID.sync_host();
6812 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6814 TEUCHOS_ASSERT(!importLIDs.need_sync_host());
6815 auto importLIDs_h = importLIDs.view_host();
6817 size_t numBytesPerValue;
6828 numBytesPerValue = PackTraits<ST>::packValueCount(val);
6833 size_t maxRowNumEnt = 0;
6834 for (size_type i = 0; i < numImportLIDs; ++i) {
6835 const size_t numBytes = numPacketsPerLID_h[i];
6836 if (numBytes == 0) {
6841 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(offset + numBytes >
size_t(imports_h.extent(0)),
6842 std::logic_error,
": At local row index importLIDs_h[i=" << i <<
"]=" << importLIDs_h[i] <<
", offset (=" << offset <<
") + numBytes (=" << numBytes <<
") > "
6843 "imports_h.extent(0)="
6844 << imports_h.extent(0) <<
".");
6849 const size_t theNumBytes =
6850 PackTraits<LO>::packValueCount(numEntLO);
6851 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(theNumBytes > numBytes, std::logic_error,
": theNumBytes=" << theNumBytes <<
" > numBytes = " << numBytes <<
".");
6853 const char*
const inBuf = imports_h.data() + offset;
6854 const size_t actualNumBytes =
6855 PackTraits<LO>::unpackValue(numEntLO, inBuf);
6858 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(actualNumBytes > numBytes, std::logic_error,
": At i=" << i <<
", actualNumBytes=" << actualNumBytes <<
" > numBytes=" << numBytes <<
".");
6859 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numEntLO == 0, std::logic_error,
6860 ": At local row index "
6862 << i <<
"]=" << importLIDs_h[i] <<
", "
6863 "the number of entries read from the packed data is "
6865 << numEntLO <<
", but numBytes=" << numBytes
6869 maxRowNumEnt = std::max(
size_t(numEntLO), maxRowNumEnt);
6877 View<GO*, HES> gblColInds;
6878 View<LO*, HES> lclColInds;
6879 View<ST*, HES> vals;
6892 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
6893 gid, maxRowNumEnt,
"gids");
6894 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
6895 lid, maxRowNumEnt,
"lids");
6896 vals = ScalarViewTraits<ST, HES>::allocateArray(
6897 val, maxRowNumEnt,
"vals");
6901 for (size_type i = 0; i < numImportLIDs; ++i) {
6902 const size_t numBytes = numPacketsPerLID_h[i];
6903 if (numBytes == 0) {
6907 const char*
const inBuf = imports_h.data() + offset;
6908 (void)PackTraits<LO>::unpackValue(numEntLO, inBuf);
6910 const size_t numEnt =
static_cast<size_t>(numEntLO);
6912 const LO lclRow = importLIDs_h[i];
6914 gids_out_type gidsOut = subview(gblColInds, pair_type(0, numEnt));
6915 vals_out_type valsOut = subview(vals, pair_type(0, numEnt));
6917 const size_t numBytesOut =
6918 unpackRow(gidsOut.data(), valsOut.data(), imports_h.data(),
6919 offset, numBytes, numEnt, numBytesPerValue);
6920 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(numBytes != numBytesOut, std::logic_error,
": At i=" << i <<
", numBytes=" << numBytes <<
" != numBytesOut=" << numBytesOut <<
".");
6922 const ST*
const valsRaw =
const_cast<const ST*
>(valsOut.data());
6923 const GO*
const gidsRaw =
const_cast<const GO*
>(gidsOut.data());
6924 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
6925 combineMode, prefix_raw, debug, verbose);
6931 std::ostringstream os;
6932 os << *prefix <<
"Done" << endl;
6933 std::cerr << os.str();
6937template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6938Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
6941 const bool force)
const {
6942 using Teuchos::null;
6947 !this->hasColMap(), std::runtime_error,
6948 "Tpetra::CrsMatrix::getColumn"
6949 "MapMultiVector: You may only call this method if the matrix has a "
6950 "column Map. If the matrix does not yet have a column Map, you should "
6951 "first call fillComplete (with domain and range Map if necessary).");
6956 !this->getGraph()->isFillComplete(), std::runtime_error,
6958 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
6959 "this matrix's graph is fill complete.");
6977 if (importMV_.is_null() || importMV_->getNumVectors() !=
numVecs) {
6994template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6995Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
6998 const bool force)
const {
6999 using Teuchos::null;
7006 !this->getGraph()->isFillComplete(), std::runtime_error,
7008 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7009 "matrix's graph is fill complete.");
7029 if (exportMV_.is_null() || exportMV_->getNumVectors() !=
numVecs) {
7039template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7043 myGraph_.is_null(), std::logic_error,
7044 "Tpetra::CrsMatrix::"
7045 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7046 "was created with a constant graph (that is, when it was created using "
7047 "the version of its constructor that takes an RCP<const CrsGraph>). "
7048 "This is because the matrix is not allowed to modify the graph in that "
7049 "case, but removing empty processes requires modifying the graph.");
7050 myGraph_->removeEmptyProcessesInPlace(
newMap);
7054 this->map_ = this->getRowMap();
7058 staticGraph_ = Teuchos::rcp_const_cast<const Graph>(myGraph_);
7061template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7062Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
7067 const Teuchos::RCP<const map_type>& domainMap,
7068 const Teuchos::RCP<const map_type>&
rangeMap,
7069 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
7071 using Teuchos::Array;
7072 using Teuchos::ArrayView;
7073 using Teuchos::ParameterList;
7076 using Teuchos::rcp_implicit_cast;
7077 using Teuchos::sublist;
7080 using crs_matrix_type =
7082 const char errPfx[] =
"Tpetra::CrsMatrix::add: ";
7086 std::unique_ptr<std::string>
prefix;
7088 prefix = this->createPrefix(
"CrsMatrix",
"add");
7089 std::ostringstream
os;
7091 std::cerr <<
os.str();
7094 const crs_matrix_type&
B = *
this;
7095 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7096 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7114 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7115 "then you must supply a nonnull domain Map to this method.");
7125 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7126 "then you must supply a nonnull range Map to this method.");
7140 std::invalid_argument,
7141 errPfx <<
"The input RowMatrix A must have a domain Map "
7142 "which is the same as (isSameAs) this RowMatrix's "
7145 errPfx <<
"The input RowMatrix A must have a range Map "
7146 "which is the same as (isSameAs) this RowMatrix's range "
7150 std::invalid_argument,
7151 errPfx <<
"The input domain Map must be the same as "
7152 "(isSameAs) this RowMatrix's domain Map.");
7155 std::invalid_argument,
7156 errPfx <<
"The input range Map must be the same as "
7157 "(isSameAs) this RowMatrix's range Map.");
7162 std::invalid_argument,
7163 errPfx <<
"The input domain Map must be the same as "
7164 "(isSameAs) this RowMatrix's domain Map.");
7166 std::invalid_argument,
7167 errPfx <<
"The input range Map must be the same as "
7168 "(isSameAs) this RowMatrix's range Map.");
7171 std::invalid_argument,
errPfx <<
"If neither A nor B "
7172 "have a domain and range Map, then you must supply a "
7173 "nonnull domain and range Map to this method.");
7205 for (LO localRow = 0; localRow <
localNumRows; ++localRow) {
7206 const size_t A_numEntries =
A.getNumEntriesInLocalRow(localRow);
7212 for (LO localRow = 0; localRow <
localNumRows; ++localRow) {
7213 const size_t B_numEntries =
B.getNumEntriesInLocalRow(localRow);
7233 "be the same for statically allocated matrices, to ensure "
7234 "that there is sufficient space to do the addition.");
7238 errPfx <<
"C should not be null at this point. "
7239 "Please report this bug to the Tpetra developers.");
7242 std::ostringstream
os;
7243 os << *
prefix <<
"Compute C = alpha*A + beta*B" <<
endl;
7244 std::cerr <<
os.str();
7246 using gids_type = nonconst_global_inds_host_view_type;
7247 using vals_type = nonconst_values_host_view_type;
7301 std::ostringstream
os;
7303 std::cerr <<
os.str();
7310 }
else if (verbose) {
7311 std::ostringstream
os;
7312 os << *
prefix <<
"Do NOT call fillComplete on C" <<
endl;
7313 std::cerr <<
os.str();
7317 std::ostringstream
os;
7319 std::cerr <<
os.str();
7324template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7327 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>&
rowTransfer,
7328 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>>&
domainTransfer,
7329 const Teuchos::RCP<const map_type>& domainMap,
7330 const Teuchos::RCP<const map_type>&
rangeMap,
7331 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
7338 using Teuchos::ArrayRCP;
7339 using Teuchos::ArrayView;
7340 using Teuchos::Comm;
7341 using Teuchos::ParameterList;
7345 typedef node_type
NT;
7350 const bool debug = Behavior::debug(
"CrsMatrix");
7351 const bool verbose = Behavior::verbose(
"CrsMatrix");
7352 int MyPID = getComm()->getRank();
7357 this->createPrefix(
"CrsMatrix",
"transferAndFillComplete");
7358 std::ostringstream
os;
7360 std::cerr <<
os.str();
7367 bool reverseMode =
false;
7368 bool restrictComm =
false;
7370 int mm_optimization_core_count =
7371 Behavior::TAFC_OptimizationCoreCount();
7372 RCP<ParameterList> matrixparams;
7373 bool overrideAllreduce =
false;
7374 bool useKokkosPath =
false;
7375 if (!params.is_null()) {
7376 matrixparams = sublist(params,
"CrsMatrix");
7377 reverseMode = params->get(
"Reverse Mode", reverseMode);
7378 useKokkosPath = params->get(
"TAFC: use kokkos path", useKokkosPath);
7379 restrictComm = params->get(
"Restrict Communicator", restrictComm);
7380 auto& slist = params->sublist(
"matrixmatrix: kernel params",
false);
7381 isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
7382 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
7384 overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
7385 if (getComm()->getSize() < mm_optimization_core_count && isMM) isMM =
false;
7386 if (reverseMode) isMM =
false;
7390 std::shared_ptr<::Tpetra::Details::CommRequest> iallreduceRequest;
7392 int reduced_mismatch = 0;
7393 if (isMM && !overrideAllreduce) {
7395 const bool source_vals = !getGraph()->getImporter().is_null();
7396 const bool target_vals = !(rowTransfer.getExportLIDs().size() == 0 ||
7397 rowTransfer.getRemoteLIDs().size() == 0);
7398 mismatch = (source_vals != target_vals) ? 1 : 0;
7400 ::Tpetra::Details::iallreduce(mismatch, reduced_mismatch,
7401 Teuchos::REDUCE_MAX, *(getComm()));
7404#ifdef HAVE_TPETRA_MMM_TIMINGS
7405 using Teuchos::TimeMonitor;
7407 if (!params.is_null())
7408 label = params->get(
"Timer Label", label);
7409 std::string prefix = std::string(
"Tpetra ") + label + std::string(
": ");
7412 std::ostringstream os;
7420 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC All") + tlstr));
7428 const import_type* xferAsImport =
dynamic_cast<const import_type*
>(&rowTransfer);
7429 const export_type* xferAsExport =
dynamic_cast<const export_type*
>(&rowTransfer);
7430 TEUCHOS_TEST_FOR_EXCEPTION(
7431 xferAsImport ==
nullptr && xferAsExport ==
nullptr, std::invalid_argument,
7432 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
7433 "argument must be either an Import or an Export, and its template "
7434 "parameters must match the corresponding template parameters of the "
7442 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
7443 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
7445 if (!domainTransfer.is_null()) {
7446 TEUCHOS_TEST_FOR_EXCEPTION(
7447 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7448 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
7449 "argument must be either an Import or an Export, and its template "
7450 "parameters must match the corresponding template parameters of the "
7453 TEUCHOS_TEST_FOR_EXCEPTION(
7454 (xferAsImport !=
nullptr || !xferDomainAsImport.is_null()) &&
7455 ((xferAsImport !=
nullptr && xferDomainAsImport.is_null()) ||
7456 (xferAsImport ==
nullptr && !xferDomainAsImport.is_null())),
7457 std::invalid_argument,
7458 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7459 "arguments must be of the same type (either Import or Export).");
7461 TEUCHOS_TEST_FOR_EXCEPTION(
7462 (xferAsExport !=
nullptr || !xferDomainAsExport.is_null()) &&
7463 ((xferAsExport !=
nullptr && xferDomainAsExport.is_null()) ||
7464 (xferAsExport ==
nullptr && !xferDomainAsExport.is_null())),
7465 std::invalid_argument,
7466 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7467 "arguments must be of the same type (either Import or Export).");
7472 const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
7476 RCP<const map_type> MyRowMap = reverseMode ? rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
7477 RCP<const map_type> MyColMap;
7479 RCP<const map_type> MyRangeMap = !rangeMap.is_null() ? rangeMap : getRangeMap();
7480 RCP<const map_type> BaseRowMap = MyRowMap;
7481 RCP<const map_type> BaseDomainMap = MyDomainMap;
7489 if (!destMat.is_null()) {
7500 const bool NewFlag = !destMat->getGraph()->isLocallyIndexed() &&
7501 !destMat->getGraph()->isGloballyIndexed();
7502 TEUCHOS_TEST_FOR_EXCEPTION(
7503 !NewFlag, std::invalid_argument,
7504 "Tpetra::CrsMatrix::"
7505 "transferAndFillComplete: The input argument 'destMat' is only allowed "
7506 "to be nonnull, if its graph is empty (neither locally nor globally "
7515 TEUCHOS_TEST_FOR_EXCEPTION(
7516 !destMat->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
7517 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
7518 "input argument 'destMat' is not the same as the (row) Map specified "
7519 "by the input argument 'rowTransfer'.");
7520 TEUCHOS_TEST_FOR_EXCEPTION(
7521 !destMat->checkSizes(*
this), std::invalid_argument,
7522 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
7523 "destination matrix, but checkSizes() indicates that it is not a legal "
7524 "legal target for redistribution from the source matrix (*this). This "
7525 "may mean that they do not have the same dimensions.");
7539 TEUCHOS_TEST_FOR_EXCEPTION(
7540 !(reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
7541 std::invalid_argument,
7542 "Tpetra::CrsMatrix::transferAndFillComplete: "
7543 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7544 TEUCHOS_TEST_FOR_EXCEPTION(
7545 !(!reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
7546 std::invalid_argument,
7547 "Tpetra::CrsMatrix::transferAndFillComplete: "
7548 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7551 TEUCHOS_TEST_FOR_EXCEPTION(
7552 !xferDomainAsImport.is_null() && !xferDomainAsImport->getTargetMap()->isSameAs(*
domainMap),
7553 std::invalid_argument,
7554 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
7555 "argument must be the same as the rebalanced domain map 'domainMap'");
7557 TEUCHOS_TEST_FOR_EXCEPTION(
7558 !xferDomainAsExport.is_null() && !xferDomainAsExport->getSourceMap()->isSameAs(*
domainMap),
7559 std::invalid_argument,
7560 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
7561 "argument must be the same as the rebalanced domain map 'domainMap'");
7574 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7575 ArrayView<const LO> ExportLIDs = reverseMode ? rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
7576 auto RemoteLIDs = reverseMode ? rowTransfer.getExportLIDs_dv() : rowTransfer.getRemoteLIDs_dv();
7577 auto PermuteToLIDs = reverseMode ? rowTransfer.getPermuteFromLIDs_dv() : rowTransfer.getPermuteToLIDs_dv();
7578 auto PermuteFromLIDs = reverseMode ? rowTransfer.getPermuteToLIDs_dv() : rowTransfer.getPermuteFromLIDs_dv();
7579 Distributor& Distor = rowTransfer.getDistributor();
7582 Teuchos::Array<int> SourcePids;
7585 RCP<const map_type> ReducedRowMap, ReducedColMap,
7586 ReducedDomainMap, ReducedRangeMap;
7587 RCP<const Comm<int>> ReducedComm;
7591 if (destMat.is_null()) {
7592 destMat = rcp(
new this_CRS_type(MyRowMap, 0, matrixparams));
7599#ifdef HAVE_TPETRA_MMM_TIMINGS
7600 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrictComm")));
7602 ReducedRowMap = MyRowMap->removeEmptyProcesses();
7603 ReducedComm = ReducedRowMap.is_null() ? Teuchos::null : ReducedRowMap->getComm();
7604 destMat->removeEmptyProcessesInPlace(ReducedRowMap);
7606 ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ? ReducedRowMap : MyDomainMap->replaceCommWithSubset(ReducedComm);
7607 ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ? ReducedRowMap : MyRangeMap->replaceCommWithSubset(ReducedComm);
7610 MyRowMap = ReducedRowMap;
7611 MyDomainMap = ReducedDomainMap;
7612 MyRangeMap = ReducedRangeMap;
7615 if (!ReducedComm.is_null()) {
7616 MyPID = ReducedComm->getRank();
7621 ReducedComm = MyRowMap->getComm();
7628 RCP<const import_type> MyImporter = getGraph()->getImporter();
7631 bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
7633 if (!restrictComm && !MyImporter.is_null() && bSameDomainMap) {
7634#ifdef HAVE_TPETRA_MMM_TIMINGS
7635 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs same map")));
7643 Import_Util::getPids(*MyImporter, SourcePids,
false);
7644 }
else if (restrictComm && !MyImporter.is_null() && bSameDomainMap) {
7647#ifdef HAVE_TPETRA_MMM_TIMINGS
7648 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs restricted comm")));
7650 IntVectorType SourceDomain_pids(getDomainMap(),
true);
7651 IntVectorType SourceCol_pids(getColMap());
7653 SourceDomain_pids.putScalar(MyPID);
7655 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter,
INSERT);
7656 SourcePids.resize(getColMap()->getLocalNumElements());
7657 SourceCol_pids.get1dCopy(SourcePids());
7658 }
else if (MyImporter.is_null()) {
7660#ifdef HAVE_TPETRA_MMM_TIMINGS
7661 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs all local entries")));
7663 SourcePids.resize(getColMap()->getLocalNumElements());
7664 SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
7665 }
else if (!MyImporter.is_null() &&
7666 !domainTransfer.is_null()) {
7671#ifdef HAVE_TPETRA_MMM_TIMINGS
7672 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs rectangular case")));
7676 IntVectorType TargetDomain_pids(
domainMap);
7677 TargetDomain_pids.putScalar(MyPID);
7680 IntVectorType SourceDomain_pids(getDomainMap());
7683 IntVectorType SourceCol_pids(getColMap());
7685 if (!reverseMode && !xferDomainAsImport.is_null()) {
7686 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport,
INSERT);
7687 }
else if (reverseMode && !xferDomainAsExport.is_null()) {
7688 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport,
INSERT);
7689 }
else if (!reverseMode && !xferDomainAsExport.is_null()) {
7690 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport,
INSERT);
7691 }
else if (reverseMode && !xferDomainAsImport.is_null()) {
7692 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport,
INSERT);
7694 TEUCHOS_TEST_FOR_EXCEPTION(
7695 true, std::logic_error,
7696 "Tpetra::CrsMatrix::"
7697 "transferAndFillComplete: Should never get here! "
7698 "Please report this bug to a Tpetra developer.");
7700 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter,
INSERT);
7701 SourcePids.resize(getColMap()->getLocalNumElements());
7702 SourceCol_pids.get1dCopy(SourcePids());
7703 }
else if (!MyImporter.is_null() &&
7704 BaseDomainMap->isSameAs(*BaseRowMap) &&
7705 getDomainMap()->isSameAs(*getRowMap())) {
7707#ifdef HAVE_TPETRA_MMM_TIMINGS
7708 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs query import")));
7711 IntVectorType TargetRow_pids(
domainMap);
7712 IntVectorType SourceRow_pids(getRowMap());
7713 IntVectorType SourceCol_pids(getColMap());
7715 TargetRow_pids.putScalar(MyPID);
7716 if (!reverseMode && xferAsImport !=
nullptr) {
7717 SourceRow_pids.doExport(TargetRow_pids, *xferAsImport,
INSERT);
7718 }
else if (reverseMode && xferAsExport !=
nullptr) {
7719 SourceRow_pids.doExport(TargetRow_pids, *xferAsExport,
INSERT);
7720 }
else if (!reverseMode && xferAsExport !=
nullptr) {
7721 SourceRow_pids.doImport(TargetRow_pids, *xferAsExport,
INSERT);
7722 }
else if (reverseMode && xferAsImport !=
nullptr) {
7723 SourceRow_pids.doImport(TargetRow_pids, *xferAsImport,
INSERT);
7725 TEUCHOS_TEST_FOR_EXCEPTION(
7726 true, std::logic_error,
7727 "Tpetra::CrsMatrix::"
7728 "transferAndFillComplete: Should never get here! "
7729 "Please report this bug to a Tpetra developer.");
7732 SourceCol_pids.doImport(SourceRow_pids, *MyImporter,
INSERT);
7733 SourcePids.resize(getColMap()->getLocalNumElements());
7734 SourceCol_pids.get1dCopy(SourcePids());
7736 TEUCHOS_TEST_FOR_EXCEPTION(
7737 true, std::invalid_argument,
7738 "Tpetra::CrsMatrix::"
7739 "transferAndFillComplete: This method only allows either domainMap == "
7740 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
7741 "getDomainMap () == getRowMap ()).");
7745 size_t constantNumPackets = destMat->constantNumberOfPackets();
7747#ifdef HAVE_TPETRA_MMM_TIMINGS
7748 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC reallocate buffers")));
7750 if (constantNumPackets == 0) {
7751 destMat->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7752 RemoteLIDs.view_host().size());
7758 const size_t rbufLen = RemoteLIDs.view_host().size() * constantNumPackets;
7759 destMat->reallocImportsIfNeeded(rbufLen,
false,
nullptr);
7765#ifdef HAVE_TPETRA_MMM_TIMINGS
7766 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC pack and prepare")));
7771 using Teuchos::outArg;
7772 using Teuchos::REDUCE_MAX;
7773 using Teuchos::reduceAll;
7774 RCP<const Teuchos::Comm<int>> comm = this->getComm();
7775 const int myRank = comm->getRank();
7777 std::ostringstream errStrm;
7781 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
7784 destMat->numExportPacketsPerLID_.modify_host();
7785 numExportPacketsPerLID =
7787 }
catch (std::exception& e) {
7788 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw: "
7789 << e.what() << std::endl;
7792 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw "
7793 "an exception not a subclass of std::exception"
7798 if (!comm.is_null()) {
7799 reduceAll<int, int>(*comm, REDUCE_MAX, lclErr, outArg(gblErr));
7803 TEUCHOS_TEST_FOR_EXCEPTION(
7804 true, std::runtime_error,
7805 "getArrayViewFromDualView threw an "
7806 "exception on at least one process.");
7810 std::ostringstream os;
7811 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
7813 std::cerr << os.str();
7818 numExportPacketsPerLID,
7821 constantNumPackets);
7822 }
catch (std::exception& e) {
7823 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw: "
7824 << e.what() << std::endl;
7827 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw "
7828 "an exception not a subclass of std::exception"
7834 std::ostringstream os;
7835 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
7837 std::cerr << os.str();
7840 if (!comm.is_null()) {
7841 reduceAll<int, int>(*comm, REDUCE_MAX, lclErr, outArg(gblErr));
7845 TEUCHOS_TEST_FOR_EXCEPTION(
7846 true, std::runtime_error,
7847 "packCrsMatrixWithOwningPIDs threw an "
7848 "exception on at least one process.");
7852 destMat->numExportPacketsPerLID_.modify_host();
7853 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7856 std::ostringstream os;
7857 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
7859 std::cerr << os.str();
7863 numExportPacketsPerLID,
7866 constantNumPackets);
7868 std::ostringstream os;
7869 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
7871 std::cerr << os.str();
7878#ifdef HAVE_TPETRA_MMM_TIMINGS
7879 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs exchange remote data")));
7881 if (!communication_needed) {
7883 std::ostringstream os;
7884 os << *verbosePrefix <<
"Communication not needed" << std::endl;
7885 std::cerr << os.str();
7889 if (constantNumPackets == 0) {
7891 std::ostringstream os;
7892 os << *verbosePrefix <<
"Reverse mode, variable # packets / LID"
7894 std::cerr << os.str();
7899 destMat->numExportPacketsPerLID_.sync_host();
7900 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7902 destMat->numImportPacketsPerLID_.sync_host();
7903 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7907 std::ostringstream os;
7908 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
7910 std::cerr << os.str();
7912 Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
7913 destMat->numImportPacketsPerLID_.view_host());
7915 std::ostringstream os;
7916 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
7918 std::cerr << os.str();
7921 size_t totalImportPackets = 0;
7923 totalImportPackets += numImportPacketsPerLID[i];
7928 destMat->reallocImportsIfNeeded(totalImportPackets, verbose,
7929 verbosePrefix.get());
7930 destMat->imports_.modify_host();
7931 auto hostImports = destMat->imports_.view_host();
7934 destMat->exports_.sync_host();
7935 auto hostExports = destMat->exports_.view_host();
7937 std::ostringstream os;
7938 os << *verbosePrefix <<
"Calling 4-arg doReversePostsAndWaits"
7940 std::cerr << os.str();
7942 Distor.doReversePostsAndWaits(hostExports,
7943 numExportPacketsPerLID,
7945 numImportPacketsPerLID);
7947 std::ostringstream os;
7948 os << *verbosePrefix <<
"Finished 4-arg doReversePostsAndWaits"
7950 std::cerr << os.str();
7954 std::ostringstream os;
7955 os << *verbosePrefix <<
"Reverse mode, constant # packets / LID"
7957 std::cerr << os.str();
7959 destMat->imports_.modify_host();
7960 auto hostImports = destMat->imports_.view_host();
7963 destMat->exports_.sync_host();
7964 auto hostExports = destMat->exports_.view_host();
7966 std::ostringstream os;
7967 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
7969 std::cerr << os.str();
7971 Distor.doReversePostsAndWaits(hostExports,
7975 std::ostringstream os;
7976 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
7978 std::cerr << os.str();
7982 if (constantNumPackets == 0) {
7984 std::ostringstream os;
7985 os << *verbosePrefix <<
"Forward mode, variable # packets / LID"
7987 std::cerr << os.str();
7992 destMat->numExportPacketsPerLID_.sync_host();
7993 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7995 destMat->numImportPacketsPerLID_.sync_host();
7996 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7999 std::ostringstream os;
8000 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8002 std::cerr << os.str();
8004 Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8005 destMat->numImportPacketsPerLID_.view_host());
8007 std::ostringstream os;
8008 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8010 std::cerr << os.str();
8013 size_t totalImportPackets = 0;
8015 totalImportPackets += numImportPacketsPerLID[i];
8020 destMat->reallocImportsIfNeeded(totalImportPackets, verbose,
8021 verbosePrefix.get());
8022 destMat->imports_.modify_host();
8023 auto hostImports = destMat->imports_.view_host();
8026 destMat->exports_.sync_host();
8027 auto hostExports = destMat->exports_.view_host();
8029 std::ostringstream os;
8030 os << *verbosePrefix <<
"Calling 4-arg doPostsAndWaits"
8032 std::cerr << os.str();
8034 Distor.doPostsAndWaits(hostExports,
8035 numExportPacketsPerLID,
8037 numImportPacketsPerLID);
8039 std::ostringstream os;
8040 os << *verbosePrefix <<
"Finished 4-arg doPostsAndWaits"
8042 std::cerr << os.str();
8046 std::ostringstream os;
8047 os << *verbosePrefix <<
"Forward mode, constant # packets / LID"
8049 std::cerr << os.str();
8051 destMat->imports_.modify_host();
8052 auto hostImports = destMat->imports_.view_host();
8055 destMat->exports_.sync_host();
8056 auto hostExports = destMat->exports_.view_host();
8058 std::ostringstream os;
8059 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8061 std::cerr << os.str();
8063 Distor.doPostsAndWaits(hostExports,
8067 std::ostringstream os;
8068 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8070 std::cerr << os.str();
8081 bool runOnHost = std::is_same_v<typename device_type::memory_space, Kokkos::HostSpace> && !useKokkosPath;
8083 Teuchos::Array<int> RemotePids;
8085 Teuchos::Array<int> TargetPids;
8091 destMat->numImportPacketsPerLID_.modify_host();
8093#ifdef HAVE_TPETRA_MMM_TIMINGS
8094 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8096 ArrayRCP<size_t> CSR_rowptr;
8097 ArrayRCP<GO> CSR_colind_GID;
8098 ArrayRCP<LO> CSR_colind_LID;
8099 ArrayRCP<Scalar> CSR_vals;
8101 destMat->imports_.sync_device();
8102 destMat->numImportPacketsPerLID_.sync_device();
8104 size_t N = BaseRowMap->getLocalNumElements();
8106 auto RemoteLIDs_d = RemoteLIDs.view_device();
8107 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8108 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8113 destMat->imports_.view_device(),
8114 destMat->numImportPacketsPerLID_.view_device(),
8128 if (
typeid(LO) ==
typeid(GO)) {
8129 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
8131 CSR_colind_LID.resize(CSR_colind_GID.size());
8133 CSR_colind_LID.resize(CSR_colind_GID.size());
8138 for (
size_t i = 0; i < static_cast<size_t>(TargetPids.size()); i++) {
8139 if (TargetPids[i] == -1) TargetPids[i] = MyPID;
8141#ifdef HAVE_TPETRA_MMM_TIMINGS
8142 tmCopySPRdata = Teuchos::null;
8151 std::ostringstream os;
8152 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8154 std::cerr << os.str();
8157#ifdef HAVE_TPETRA_MMM_TIMINGS
8158 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8160 Import_Util::lowCommunicationMakeColMapAndReindexSerial(CSR_rowptr(),
8170 std::ostringstream os;
8171 os << *verbosePrefix <<
"restrictComm="
8172 << (restrictComm ?
"true" :
"false") << std::endl;
8173 std::cerr << os.str();
8180#ifdef HAVE_TPETRA_MMM_TIMINGS
8181 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8184 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ? ReducedRowMap : MyColMap->replaceCommWithSubset(ReducedComm);
8185 MyColMap = ReducedColMap;
8190 std::ostringstream os;
8191 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8192 std::cerr << os.str();
8194 destMat->replaceColMap(MyColMap);
8201 if (ReducedComm.is_null()) {
8203 std::ostringstream os;
8204 os << *verbosePrefix <<
"I am no longer in the communicator; "
8207 std::cerr << os.str();
8216 if ((!reverseMode && xferAsImport !=
nullptr) ||
8217 (reverseMode && xferAsExport !=
nullptr)) {
8219 std::ostringstream os;
8220 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8221 std::cerr << os.str();
8223#ifdef HAVE_TPETRA_MMM_TIMINGS
8224 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8226 Import_Util::sortCrsEntries(CSR_rowptr(),
8229 }
else if ((!reverseMode && xferAsExport !=
nullptr) ||
8230 (reverseMode && xferAsImport !=
nullptr)) {
8232 std::ostringstream os;
8233 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8235 std::cerr << os.str();
8237#ifdef HAVE_TPETRA_MMM_TIMINGS
8238 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8240 Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
8243 if (CSR_rowptr[N] !=
static_cast<size_t>(CSR_vals.size())) {
8244 CSR_colind_LID.resize(CSR_rowptr[N]);
8245 CSR_vals.resize(CSR_rowptr[N]);
8248 TEUCHOS_TEST_FOR_EXCEPTION(
8249 true, std::logic_error,
8250 "Tpetra::CrsMatrix::"
8251 "transferAndFillComplete: Should never get here! "
8252 "Please report this bug to a Tpetra developer.");
8259 std::ostringstream os;
8260 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8261 std::cerr << os.str();
8270#ifdef HAVE_TPETRA_MMM_TIMINGS
8271 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8273 destMat->setAllValues(CSR_rowptr, CSR_colind_LID, CSR_vals);
8284 destMat->numImportPacketsPerLID_.modify_host();
8286#ifdef HAVE_TPETRA_MMM_TIMINGS
8287 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8289 ArrayRCP<size_t> CSR_rowptr;
8290 ArrayRCP<GO> CSR_colind_GID;
8291 ArrayRCP<LO> CSR_colind_LID;
8292 ArrayRCP<Scalar> CSR_vals;
8294 destMat->imports_.sync_device();
8295 destMat->numImportPacketsPerLID_.sync_device();
8297 size_t N = BaseRowMap->getLocalNumElements();
8299 auto RemoteLIDs_d = RemoteLIDs.view_device();
8300 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8301 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8303 Kokkos::View<size_t*, device_type> CSR_rowptr_d;
8304 Kokkos::View<GO*, device_type> CSR_colind_GID_d;
8305 Kokkos::View<LO*, device_type> CSR_colind_LID_d;
8306 Kokkos::View<impl_scalar_type*, device_type> CSR_vals_d;
8307 Kokkos::View<int*, device_type> TargetPids_d;
8312 destMat->imports_.view_device(),
8313 destMat->numImportPacketsPerLID_.view_device(),
8325 Kokkos::resize(CSR_colind_LID_d, CSR_colind_GID_d.size());
8327#ifdef HAVE_TPETRA_MMM_TIMINGS
8328 tmCopySPRdata = Teuchos::null;
8337 std::ostringstream os;
8338 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8340 std::cerr << os.str();
8343#ifdef HAVE_TPETRA_MMM_TIMINGS
8344 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8346 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr_d,
8356 std::ostringstream os;
8357 os << *verbosePrefix <<
"restrictComm="
8358 << (restrictComm ?
"true" :
"false") << std::endl;
8359 std::cerr << os.str();
8366#ifdef HAVE_TPETRA_MMM_TIMINGS
8367 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8370 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ? ReducedRowMap : MyColMap->replaceCommWithSubset(ReducedComm);
8371 MyColMap = ReducedColMap;
8376 std::ostringstream os;
8377 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8378 std::cerr << os.str();
8380 destMat->replaceColMap(MyColMap);
8387 if (ReducedComm.is_null()) {
8389 std::ostringstream os;
8390 os << *verbosePrefix <<
"I am no longer in the communicator; "
8393 std::cerr << os.str();
8403 if ((!reverseMode && xferAsImport !=
nullptr) ||
8404 (reverseMode && xferAsExport !=
nullptr)) {
8406 std::ostringstream os;
8407 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8408 std::cerr << os.str();
8410#ifdef HAVE_TPETRA_MMM_TIMINGS
8411 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8413 Import_Util::sortCrsEntries(CSR_rowptr_d,
8416 }
else if ((!reverseMode && xferAsExport !=
nullptr) ||
8417 (reverseMode && xferAsImport !=
nullptr)) {
8419 std::ostringstream os;
8420 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8422 std::cerr << os.str();
8424#ifdef HAVE_TPETRA_MMM_TIMINGS
8425 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8427 Import_Util::sortAndMergeCrsEntries(CSR_rowptr_d,
8431 TEUCHOS_TEST_FOR_EXCEPTION(
8432 true, std::logic_error,
8433 "Tpetra::CrsMatrix::"
8434 "transferAndFillComplete: Should never get here! "
8435 "Please report this bug to a Tpetra developer.");
8443 std::ostringstream os;
8444 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8445 std::cerr << os.str();
8449#ifdef HAVE_TPETRA_MMM_TIMINGS
8450 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8452 destMat->setAllValues(CSR_rowptr_d, CSR_colind_LID_d, CSR_vals_d);
8460#ifdef HAVE_TPETRA_MMM_TIMINGS
8461 RCP<TimeMonitor> tmIESFC = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC build importer and esfc"))));
8464 Teuchos::ParameterList esfc_params;
8466 RCP<import_type> MyImport;
8469 if (iallreduceRequest.get() !=
nullptr) {
8471 std::ostringstream os;
8472 os << *verbosePrefix <<
"Calling iallreduceRequest->wait()"
8474 std::cerr << os.str();
8476 iallreduceRequest->wait();
8477 if (reduced_mismatch != 0) {
8483#ifdef HAVE_TPETRA_MMM_TIMINGS
8484 Teuchos::TimeMonitor MMisMM(*TimeMonitor::getNewTimer(prefix + std::string(
"isMM Block")));
8489 std::ostringstream os;
8490 os << *verbosePrefix <<
"Getting CRS pointers" << endl;
8491 std::cerr << os.str();
8494 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
8495 Teuchos::ArrayRCP<int> type3PIDs;
8496 auto rowptr = getCrsGraph()->getLocalRowPtrsHost();
8497 auto colind = getCrsGraph()->getLocalIndicesHost();
8500 std::ostringstream os;
8501 os << *verbosePrefix <<
"Calling reverseNeighborDiscovery" << std::endl;
8502 std::cerr << os.str();
8506#ifdef HAVE_TPETRA_MMM_TIMINGS
8507 TimeMonitor tm_rnd(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMrevNeighDis")));
8509 Import_Util::reverseNeighborDiscovery(*
this,
8521 std::ostringstream os;
8522 os << *verbosePrefix <<
"Done with reverseNeighborDiscovery" << std::endl;
8523 std::cerr << os.str();
8526 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
8527 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
8529 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs();
8530 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
8532 const int numCols = getGraph()->getColMap()->getLocalNumElements();
8534 std::vector<bool> IsOwned(numCols,
true);
8535 std::vector<int> SentTo(numCols, -1);
8536 if (!MyImporter.is_null()) {
8537 for (
auto&& rlid : MyImporter->getRemoteLIDs()) {
8538 IsOwned[rlid] =
false;
8542 std::vector<std::pair<int, GO>> usrtg;
8543 usrtg.reserve(TEPID2.size());
8546 const auto&
colMap = *(this->getColMap());
8548 const LO row = TELID2[i];
8549 const int pid = TEPID2[i];
8550 for (
auto j = rowptr[row]; j < rowptr[row + 1]; ++j) {
8551 const int col = colind[j];
8552 if (IsOwned[col] && SentTo[col] != pid) {
8554 GO gid =
colMap.getGlobalElement(col);
8555 usrtg.push_back(std::pair<int, GO>(pid, gid));
8562 std::sort(usrtg.begin(), usrtg.end());
8563 auto eopg = std ::unique(usrtg.begin(), usrtg.end());
8565 usrtg.erase(eopg, usrtg.end());
8568 Teuchos::ArrayRCP<int> EPID2 = Teuchos::arcp(
new int[type2_us_size], 0, type2_us_size,
true);
8569 Teuchos::ArrayRCP<LO> ELID2 = Teuchos::arcp(
new LO[type2_us_size], 0, type2_us_size,
true);
8572 for (
auto&& p : usrtg) {
8573 EPID2[pos] = p.first;
8574 ELID2[pos] = this->getDomainMap()->getLocalElement(p.second);
8578 Teuchos::ArrayView<int> EPID3 = type3PIDs();
8579 Teuchos::ArrayView<LO> ELID3 = type3LIDs();
8580 GO InfGID = std::numeric_limits<GO>::max();
8581 int InfPID = INT_MAX;
8585#define TPETRA_MIN3(x, y, z) ((x) < (y) ? (std::min(x, z)) : (std::min(y, z)))
8586 int i1 = 0, i2 = 0, i3 = 0;
8587 int Len1 = EPID1.size();
8588 int Len2 = EPID2.size();
8589 int Len3 = EPID3.size();
8591 int MyLen = Len1 + Len2 + Len3;
8592 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(
new LO[MyLen], 0, MyLen,
true);
8593 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(
new int[MyLen], 0, MyLen,
true);
8596 while (i1 < Len1 || i2 < Len2 || i3 < Len3) {
8597 int PID1 = (i1 < Len1) ? (EPID1[i1]) : InfPID;
8598 int PID2 = (i2 < Len2) ? (EPID2[i2]) : InfPID;
8599 int PID3 = (i3 < Len3) ? (EPID3[i3]) : InfPID;
8601 GO GID1 = (i1 < Len1) ? getDomainMap()->getGlobalElement(ELID1[i1]) : InfGID;
8602 GO GID2 = (i2 < Len2) ? getDomainMap()->getGlobalElement(ELID2[i2]) : InfGID;
8603 GO GID3 = (i3 < Len3) ? getDomainMap()->getGlobalElement(ELID3[i3]) : InfGID;
8605 int MIN_PID = TPETRA_MIN3(PID1, PID2, PID3);
8606 GO MIN_GID = TPETRA_MIN3(((PID1 == MIN_PID) ? GID1 : InfGID), ((PID2 == MIN_PID) ? GID2 : InfGID), ((PID3 == MIN_PID) ? GID3 : InfGID));
8610 bool added_entry =
false;
8612 if (PID1 == MIN_PID && GID1 == MIN_GID) {
8613 userExportLIDs[iloc] = ELID1[i1];
8614 userExportPIDs[iloc] = EPID1[i1];
8619 if (PID2 == MIN_PID && GID2 == MIN_GID) {
8621 userExportLIDs[iloc] = ELID2[i2];
8622 userExportPIDs[iloc] = EPID2[i2];
8628 if (PID3 == MIN_PID && GID3 == MIN_GID) {
8630 userExportLIDs[iloc] = ELID3[i3];
8631 userExportPIDs[iloc] = EPID3[i3];
8639 std::ostringstream os;
8640 os << *verbosePrefix <<
"Create Import" << std::endl;
8641 std::cerr << os.str();
8644#ifdef HAVE_TPETRA_MMM_TIMINGS
8645 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMIportCtor")));
8647 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(
new Teuchos::ParameterList());
8649 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
8650 MyImport = rcp(
new import_type(MyDomainMap,
8653 userExportLIDs.view(0, iloc).getConst(),
8654 userExportPIDs.view(0, iloc).getConst(),
8658 std::ostringstream os;
8659 os << *verbosePrefix <<
"Call expertStaticFillComplete" << std::endl;
8660 std::cerr << os.str();
8664#ifdef HAVE_TPETRA_MMM_TIMINGS
8665 TimeMonitor esfc(*TimeMonitor::getNewTimer(prefix + std::string(
"isMM::destMat->eSFC")));
8666 esfc_params.set(
"Timer Label", label + std::string(
"isMM eSFC"));
8668 if (!params.is_null())
8669 esfc_params.set(
"compute global constants", params->get(
"compute global constants",
true));
8670 destMat->expertStaticFillComplete(MyDomainMap, MyRangeMap, MyImport, Teuchos::null, rcp(
new Teuchos::ParameterList(esfc_params)));
8675#ifdef HAVE_TPETRA_MMM_TIMINGS
8676 TimeMonitor MMnotMMblock(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMblock")));
8679 std::ostringstream os;
8680 os << *verbosePrefix <<
"Create Import" << std::endl;
8681 std::cerr << os.str();
8684#ifdef HAVE_TPETRA_MMM_TIMINGS
8685 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMCreateImporter")));
8687 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(
new Teuchos::ParameterList);
8688 mypars->set(
"Timer Label",
"notMMFrom_tAFC");
8689 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
8690 MyImport = rcp(
new import_type(MyDomainMap, MyColMap, RemotePids, mypars));
8693 std::ostringstream os;
8694 os << *verbosePrefix <<
"Call expertStaticFillComplete" << endl;
8695 std::cerr << os.str();
8698#ifdef HAVE_TPETRA_MMM_TIMINGS
8699 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string(
"notMMdestMat->expertStaticFillComplete")));
8700 esfc_params.set(
"Timer Label", prefix + std::string(
"notMM eSFC"));
8702 esfc_params.set(
"Timer Label", std::string(
"notMM eSFC"));
8705 if (!params.is_null()) {
8706 esfc_params.set(
"compute global constants",
8707 params->get(
"compute global constants",
true));
8709 destMat->expertStaticFillComplete(MyDomainMap, MyRangeMap,
8710 MyImport, Teuchos::null,
8711 rcp(
new Teuchos::ParameterList(esfc_params)));
8714#ifdef HAVE_TPETRA_MMM_TIMINGS
8715 tmIESFC = Teuchos::null;
8719 std::ostringstream os;
8720 os << *verbosePrefix <<
"Done" << endl;
8721 std::cerr << os.str();
8725template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8729 const Teuchos::RCP<const map_type>& domainMap,
8730 const Teuchos::RCP<const map_type>&
rangeMap,
8731 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
8735template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8740 const Teuchos::RCP<const map_type>& domainMap,
8741 const Teuchos::RCP<const map_type>&
rangeMap,
8742 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
8746template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8750 const Teuchos::RCP<const map_type>& domainMap,
8751 const Teuchos::RCP<const map_type>&
rangeMap,
8752 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
8756template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8761 const Teuchos::RCP<const map_type>& domainMap,
8762 const Teuchos::RCP<const map_type>&
rangeMap,
8763 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const {
8775#define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
8777 template class CrsMatrix<SCALAR, LO, GO, NODE>;
8779#define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO, SI, LO, GO, NODE) \
8781 template Teuchos::RCP<CrsMatrix<SO, LO, GO, NODE>> \
8782 CrsMatrix<SI, LO, GO, NODE>::convert<SO>() const;
8784#define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8786 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>> \
8787 importAndFillCompleteCrsMatrix(const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE>>& sourceMatrix, \
8788 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8789 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8790 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
8791 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8792 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8793 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& domainMap, \
8794 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8795 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8796 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& rangeMap, \
8797 const Teuchos::RCP<Teuchos::ParameterList>& params);
8799#define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
8801 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>> \
8802 importAndFillCompleteCrsMatrix(const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE>>& sourceMatrix, \
8803 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8804 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8805 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
8806 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8807 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8808 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
8809 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8810 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8811 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& domainMap, \
8812 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8813 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8814 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& rangeMap, \
8815 const Teuchos::RCP<Teuchos::ParameterList>& params);
8817#define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8819 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>> \
8820 exportAndFillCompleteCrsMatrix(const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE>>& sourceMatrix, \
8821 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8822 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8823 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
8824 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8825 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8826 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& domainMap, \
8827 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8828 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8829 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& rangeMap, \
8830 const Teuchos::RCP<Teuchos::ParameterList>& params);
8832#define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
8834 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>> \
8835 exportAndFillCompleteCrsMatrix(const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE>>& sourceMatrix, \
8836 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8837 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8838 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
8839 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8840 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8841 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
8842 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8843 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8844 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& domainMap, \
8845 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8846 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8847 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>>& rangeMap, \
8848 const Teuchos::RCP<Teuchos::ParameterList>& params);
8850#define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO, NODE) \
8851 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
8852 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8853 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8854 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
8855 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)