1008 this->exports_.sync_host();
1010 this->exports_.sync_device();
1014 std::ostringstream os;
1015 os << *prefix <<
"5.1. After packAndPrepare, "
1018 std::cerr << os.str();
1023 if (constantNumPackets != 0) {
1024 ProfilingRegion region_reallocImportsIfNeeded(
"Tpetra::DistObject::beginTransfer::reallocImportsIfNeeded");
1030 const size_t rbufLen = remoteLIDs.extent(0) * constantNumPackets;
1031 reallocImportsIfNeeded(rbufLen, verbose, prefix.get(), canTryAliasing, CM);
1040 std::ostringstream os;
1041 os << *prefix <<
"7.0. "
1042 << (revOp == DoReverse ?
"Reverse" :
"Forward")
1044 std::cerr << os.str();
1047 doPostRecvs(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1052 doPostSends(distributorPlan, constantNumPackets, commOnHost, prefix);
1059 if (!needCommunication) {
1061 std::ostringstream os;
1062 os << *prefix <<
"Comm not needed; skipping" << endl;
1063 std::cerr << os.str();
1071 if (constantNumPackets != 0) {
1072 ProfilingRegion region_reallocImportsIfNeeded(
"Tpetra::DistObject::beginTransfer::reallocImportsIfNeeded");
1078 const size_t rbufLen = remoteLIDs.extent(0) * constantNumPackets;
1079 reallocImportsIfNeeded(rbufLen, verbose, prefix.get(), canTryAliasing, CM);
1085 std::ostringstream os;
1086 os << *prefix <<
"7.0. "
1087 << (revOp == DoReverse ?
"Reverse" :
"Forward")
1089 std::cerr << os.str();
1092 doPostRecvs(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1097 if (constantNumPackets == 0) {
1099 std::ostringstream os;
1100 os << *prefix <<
"3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1102 std::cerr << os.str();
1106 this->reallocArraysForNumPacketsPerLid(exportLIDs.extent(0),
1107 remoteLIDs.extent(0));
1111 std::ostringstream os;
1112 os << *prefix <<
"4. packAndPrepare: before, "
1115 std::cerr << os.str();
1118 doPackAndPrepare(src, exportLIDs, constantNumPackets, execution_space());
1121 this->exports_.sync_host();
1123 this->exports_.sync_device();
1127 std::ostringstream os;
1128 os << *prefix <<
"5.1. After packAndPrepare, "
1131 std::cerr << os.str();
1137 doPostSends(distributorPlan, constantNumPackets, commOnHost, prefix);
1148 if (needCopyAndPermute) {
1151 std::ostringstream os;
1152 os << *prefix <<
"2. copyAndPermute" << endl;
1153 std::cerr << os.str();
1157 ProfilingRegion region_cp(
"Tpetra::DistObject::beginTransfer::copyAndPermute");
1159 this->copyAndPermute(src, numSameIDs, permuteToLIDs, permuteFromLIDs, CM);
1163 std::ostringstream os;
1164 os << *prefix <<
"After copyAndPermute:" << endl
1171 std::cerr << os.str();
1177template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1178void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1179 endTransfer(
const SrcDistObject& src,
1180 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1181 const char modeString[],
1182 const ReverseOption revOp,
1184 bool restrictedMode) {
1185 using Details::Behavior;
1187 using Details::ProfilingRegion;
1188 using Kokkos::Compat::create_const_view;
1189 using Kokkos::Compat::getArrayView;
1190 using Kokkos::Compat::getConstArrayView;
1191 using Kokkos::Compat::getKokkosViewDeepCopy;
1193 using ::Tpetra::Details::dualViewStatusToString;
1194 using ::Tpetra::Details::getArrayViewFromDualView;
1196 const bool commOnHost = !Behavior::assumeMpiIsGPUAware();
1197 const char funcNameHost[] =
"Tpetra::DistObject::endTransfer[Host]";
1198 const char funcNameDevice[] =
"Tpetra::DistObject::endTransfer[Device]";
1199 const char* funcName = commOnHost ? funcNameHost : funcNameDevice;
1200 ProfilingRegion region_doTransfer(funcName);
1201 const bool verbose = Behavior::verbose(
"DistObject");
1202 std::shared_ptr<std::string> prefix;
1204 std::ostringstream os;
1205 prefix = this->
createPrefix(
"DistObject",
"doTransfer");
1206 os << *prefix <<
"Source type: " << Teuchos::typeName(src)
1207 <<
", Target type: " << Teuchos::typeName(*
this) << endl;
1208 std::cerr << os.str();
1221 const bool debug = Behavior::debug(
"DistObject");
1223 if (!restrictedMode && revOp == DoForward) {
1224 const bool myMapSameAsTransferTgtMap =
1225 this->getMap()->isSameAs(*(transfer.getTargetMap()));
1226 TEUCHOS_TEST_FOR_EXCEPTION(!myMapSameAsTransferTgtMap, std::invalid_argument,
1227 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
1228 "communication, the target DistObject's Map must be the same "
1229 "(in the sense of Tpetra::Map::isSameAs) as the input "
1230 "Export/Import object's target Map.");
1231 }
else if (!restrictedMode && revOp == DoReverse) {
1232 const bool myMapSameAsTransferSrcMap =
1233 this->getMap()->isSameAs(*(transfer.getSourceMap()));
1234 TEUCHOS_TEST_FOR_EXCEPTION(!myMapSameAsTransferSrcMap, std::invalid_argument,
1235 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
1236 "communication, the target DistObject's Map must be the same "
1237 "(in the sense of Tpetra::Map::isSameAs) as the input "
1238 "Export/Import object's source Map.");
1239 }
else if (restrictedMode && revOp == DoForward) {
1240 const bool myMapLocallyFittedTransferTgtMap =
1241 this->getMap()->isLocallyFitted(*(transfer.getTargetMap()));
1242 TEUCHOS_TEST_FOR_EXCEPTION(!myMapLocallyFittedTransferTgtMap, std::invalid_argument,
1243 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
1244 "communication using restricted mode, Export/Import object's "
1245 "target Map must be locally fitted (in the sense of "
1246 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1248 const bool myMapLocallyFittedTransferSrcMap =
1249 this->getMap()->isLocallyFitted(*(transfer.getSourceMap()));
1250 TEUCHOS_TEST_FOR_EXCEPTION(!myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1251 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
1252 "communication using restricted mode, Export/Import object's "
1253 "source Map must be locally fitted (in the sense of "
1254 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1260 const this_type* srcDistObj =
dynamic_cast<const this_type*
>(&src);
1261 if (srcDistObj !=
nullptr) {
1262 if (revOp == DoForward) {
1263 const bool srcMapSameAsImportSrcMap =
1264 srcDistObj->getMap()->isSameAs(*(transfer.getSourceMap()));
1265 TEUCHOS_TEST_FOR_EXCEPTION(!srcMapSameAsImportSrcMap, std::invalid_argument,
1266 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
1267 "communication, the source DistObject's Map must be the same "
1268 "as the input Export/Import object's source Map.");
1270 const bool srcMapSameAsImportTgtMap =
1271 srcDistObj->getMap()->isSameAs(*(transfer.getTargetMap()));
1272 TEUCHOS_TEST_FOR_EXCEPTION(!srcMapSameAsImportTgtMap, std::invalid_argument,
1273 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
1274 "communication, the source DistObject's Map must be the same "
1275 "as the input Export/Import object's target Map.");
1280 Distributor& distor = transfer.getDistributor();
1281 const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1283 TEUCHOS_TEST_FOR_EXCEPTION(debug && restrictedMode &&
1284 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1285 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1286 std::invalid_argument,
1287 "Tpetra::DistObject::" << modeString <<
": Transfer object "
1288 "cannot have permutes in restricted mode.");
1292 std::ostringstream os;
1293 os << *prefix <<
"doTransfer: Use new interface; "
1295 << (commOnHost ?
"true" :
"false") << endl;
1296 std::cerr << os.str();
1299 using const_lo_dv_type =
1300 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1301 const_lo_dv_type remoteLIDs = (revOp == DoForward) ? transfer.getRemoteLIDs_dv() : transfer.getExportLIDs_dv();
1303 size_t constantNumPackets = this->constantNumberOfPackets();
1308 bool needCommunication =
true;
1311 const this_type* srcDistObj =
dynamic_cast<const this_type*
>(&src);
1313 if (revOp == DoReverse && !this->isDistributed()) {
1314 needCommunication =
false;
1323 else if (revOp == DoForward && srcDistObj != NULL &&
1324 !srcDistObj->isDistributed()) {
1325 needCommunication =
false;
1328 if (!needCommunication) {
1330 std::ostringstream os;
1331 os << *prefix <<
"Comm not needed; skipping" << endl;
1332 std::cerr << os.str();
1335 distributorActor_.doWaitsRecv(distributorPlan);
1338 std::ostringstream os;
1339 os << *prefix <<
"8. unpackAndCombine - remoteLIDs " << remoteLIDs.extent(0) <<
", constantNumPackets " << constantNumPackets << endl;
1340 std::cerr << os.str();
1342 doUnpackAndCombine(remoteLIDs, constantNumPackets, CM, execution_space());
1344 distributorActor_.doWaitsSend(distributorPlan);
1349 std::ostringstream os;
1350 os << *prefix <<
"9. Done!" << endl;
1351 std::cerr << os.str();
1355 std::ostringstream os;
1356 os << *prefix <<
"Tpetra::DistObject::doTransfer: Done!" << endl;
1357 std::cerr << os.str();
1361template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1362void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1363 doPostRecvs(
const Details::DistributorPlan& distributorPlan,
1364 size_t constantNumPackets,
1366 std::shared_ptr<std::string> prefix,
1367 const bool canTryAliasing,
1369 using Details::ProfilingRegion;
1370 using Kokkos::Compat::create_const_view;
1372 using ::Tpetra::Details::dualViewStatusToString;
1373 using ::Tpetra::Details::getArrayViewFromDualView;
1375 const char funcNameHost[] =
"Tpetra::DistObject::doPostRecvs[Host]";
1376 const char funcNameDevice[] =
"Tpetra::DistObject::doPostRecvs[Device]";
1377 const char* funcName = commOnHost ? funcNameHost : funcNameDevice;
1378 ProfilingRegion region_dpr(funcName);
1382 if (constantNumPackets == 0) {
1384 std::ostringstream os;
1385 os << *prefix <<
"7.1. Variable # packets / LID: first comm "
1386 <<
"(commOnHost = " << (commOnHost ?
"true" :
"false") <<
")"
1388 std::cerr << os.str();
1390 size_t totalImportPackets = 0;
1392 if (this->numExportPacketsPerLID_.need_sync_host()) {
1393 this->numExportPacketsPerLID_.sync_host();
1395 if (this->numImportPacketsPerLID_.need_sync_host()) {
1396 this->numImportPacketsPerLID_.sync_host();
1398 this->numImportPacketsPerLID_.modify_host();
1400 create_const_view(this->numExportPacketsPerLID_.view_host());
1401 auto numImp_h = this->numImportPacketsPerLID_.view_host();
1405 std::ostringstream os;
1406 os << *prefix <<
"Call doPostsAndWaits"
1408 std::cerr << os.str();
1410 distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1413 std::ostringstream os;
1414 os << *prefix <<
"Count totalImportPackets" << std::endl;
1415 std::cerr << os.str();
1417 using the_dev_type =
typename decltype(numImp_h)::device_type;
1418 totalImportPackets = countTotalImportPackets<the_dev_type>(numImp_h);
1420 this->numExportPacketsPerLID_.sync_device();
1421 this->numImportPacketsPerLID_.sync_device();
1422 this->numImportPacketsPerLID_.modify_device();
1423 auto numExp_d = create_const_view(this->numExportPacketsPerLID_.view_device());
1424 auto numImp_d = this->numImportPacketsPerLID_.view_device();
1428 std::ostringstream os;
1429 os << *prefix <<
"Call doPostsAndWaits"
1431 std::cerr << os.str();
1434 distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1437 std::ostringstream os;
1438 os << *prefix <<
"Count totalImportPackets" << std::endl;
1439 std::cerr << os.str();
1441 using the_dev_type =
typename decltype(numImp_d)::device_type;
1442 totalImportPackets = countTotalImportPackets<the_dev_type>(numImp_d);
1446 std::ostringstream os;
1447 os << *prefix <<
"totalImportPackets=" << totalImportPackets << endl;
1448 std::cerr << os.str();
1450 this->reallocImportsIfNeeded(totalImportPackets, verbose,
1451 prefix.get(), canTryAliasing, CM);
1453 std::ostringstream os;
1454 os << *prefix <<
"7.3. Second comm" << std::endl;
1455 std::cerr << os.str();
1461 this->numImportPacketsPerLID_.sync_host();
1470 auto numImportPacketsPerLID_av =
1478 this->imports_.clear_sync_state();
1481 std::ostringstream os;
1482 os << *prefix <<
"Comm on "
1483 << (commOnHost ?
"host" :
"device")
1484 <<
"; call doPostRecvs" << endl;
1485 std::cerr << os.str();
1489 this->imports_.modify_host();
1490 distributorActor_.doPostRecvs(distributorPlan,
1491 this->imports_.view_host(),
1492 numImportPacketsPerLID_av);
1494 this->imports_.modify_device();
1495 distributorActor_.doPostRecvs(distributorPlan,
1496 this->imports_.view_device(),
1497 numImportPacketsPerLID_av);
1501 std::ostringstream os;
1502 os << *prefix <<
"7.1. Const # packets per LID: " << endl
1509 std::cerr << os.str();
1516 this->imports_.clear_sync_state();
1519 std::ostringstream os;
1520 os << *prefix <<
"7.2. Comm on "
1521 << (commOnHost ?
"host" :
"device")
1522 <<
"; call doPostRecvs" << endl;
1523 std::cerr << os.str();
1526 this->imports_.modify_host();
1527 distributorActor_.doPostRecvs(distributorPlan,
1529 this->imports_.view_host());
1531 this->imports_.modify_device();
1532 distributorActor_.doPostRecvs(distributorPlan,
1534 this->imports_.view_device());
1539template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1540void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1541 doPostSends(
const Details::DistributorPlan& distributorPlan,
1542 size_t constantNumPackets,
1544 std::shared_ptr<std::string> prefix) {
1545 using Details::ProfilingRegion;
1546 using Kokkos::Compat::create_const_view;
1548 using ::Tpetra::Details::getArrayViewFromDualView;
1550 const char funcNameHost[] =
"Tpetra::DistObject::doPostSends[Host]";
1551 const char funcNameDevice[] =
"Tpetra::DistObject::doPostSends[Device]";
1552 const char* funcName = commOnHost ? funcNameHost : funcNameDevice;
1553 ProfilingRegion region_dps(funcName);
1557 std::ostringstream os;
1558 os << *prefix <<
"Comm on "
1559 << (commOnHost ?
"host" :
"device")
1560 <<
"; call doPostSends" << endl;
1561 std::cerr << os.str();
1564 if (constantNumPackets == 0) {
1568 this->numExportPacketsPerLID_.sync_host();
1569 this->numImportPacketsPerLID_.sync_host();
1578 auto numExportPacketsPerLID_av =
1580 auto numImportPacketsPerLID_av =
1584 distributorActor_.doPostSends(distributorPlan,
1585 create_const_view(this->exports_.view_host()),
1586 numExportPacketsPerLID_av,
1587 this->imports_.view_host(),
1588 numImportPacketsPerLID_av);
1591 Kokkos::fence(
"DistObject::doPostSends-1");
1592 distributorActor_.doPostSends(distributorPlan,
1593 create_const_view(this->exports_.view_device()),
1594 numExportPacketsPerLID_av,
1595 this->imports_.view_device(),
1596 numImportPacketsPerLID_av);
1600 distributorActor_.doPostSends(distributorPlan,
1601 create_const_view(this->exports_.view_host()),
1603 this->imports_.view_host());
1606 Kokkos::fence(
"DistObject::doPostSends-2");
1607 distributorActor_.doPostSends(distributorPlan,
1608 create_const_view(this->exports_.view_device()),
1610 this->imports_.view_device());
1615template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1616void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1617 doPackAndPrepare(
const SrcDistObject& src,
1618 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1619 size_t& constantNumPackets,
1620 const execution_space& space) {
1621 using Details::ProfilingRegion;
1625 ProfilingRegion region_pp(
"Tpetra::DistObject::doPackAndPrepare");
1644 std::ostringstream lclErrStrm;
1645 bool lclSuccess =
false;
1647 this->packAndPrepare(src, exportLIDs, this->exports_,
1648 this->numExportPacketsPerLID_,
1649 constantNumPackets, space);
1651 }
catch (std::exception& e) {
1652 lclErrStrm <<
"packAndPrepare threw an exception: "
1656 lclErrStrm <<
"packAndPrepare threw an exception "
1657 "not a subclass of std::exception.";
1659 const char gblErrMsgHeader[] =
1660 "Tpetra::DistObject "
1661 "threw an exception in packAndPrepare on "
1662 "one or more processes in the DistObject's communicator.";
1663 auto comm = getMap()->getComm();
1664 Details::checkGlobalError(std::cerr, lclSuccess,
1665 lclErrStrm.str().c_str(),
1666 gblErrMsgHeader, *comm);
1668 this->packAndPrepare(src, exportLIDs, this->exports_,
1669 this->numExportPacketsPerLID_,
1670 constantNumPackets, space);
1674template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1675void DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1676 doUnpackAndCombine(
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1677 size_t constantNumPackets,
1679 const execution_space& space) {
1680 using Details::ProfilingRegion;
1684 ProfilingRegion region_uc(
"Tpetra::DistObject::doUnpackAndCombine");
1687 std::ostringstream lclErrStrm;
1688 bool lclSuccess =
false;
1691 this->numImportPacketsPerLID_,
1692 constantNumPackets, CM, space);
1694 }
catch (std::exception& e) {
1695 lclErrStrm <<
"doUnpackAndCombine threw an exception: "
1699 lclErrStrm <<
"doUnpackAndCombine threw an exception "
1700 "not a subclass of std::exception.";
1702 const char gblErrMsgHeader[] =
1703 "Tpetra::DistObject "
1704 "threw an exception in unpackAndCombine on "
1705 "one or more processes in the DistObject's communicator.";
1706 auto comm = getMap()->getComm();
1707 Details::checkGlobalError(std::cerr, lclSuccess,
1708 lclErrStrm.str().c_str(),
1709 gblErrMsgHeader, *comm);
1712 this->numImportPacketsPerLID_,
1713 constantNumPackets, CM, space);
1717template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1721 const Kokkos::DualView<
1724 const Kokkos::DualView<
1729template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1732 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
permuteToLIDs,
1733 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
permuteFromLIDs,
1745 space.fence(
"Tpetra::DistObject::copyAndPermute-1");
1752template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1755 const Kokkos::DualView<
1766template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1769 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
exportLIDs,
1770 Kokkos::DualView<packet_type*, buffer_device_type>& exports,
1786 space.fence(
"Tpetra::DistObject::packAndPrepare-1");
1797template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1811template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1813 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
importLIDs,
1814 Kokkos::DualView<packet_type*, buffer_device_type> imports,
1817 const execution_space& space) {
1819 space.fence(
"Tpetra::DistObject::unpackAndCombine-1");
1825 execution_space().fence(
"Tpetra::DistObject::unpackAndCombine-2");
1829template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1831 std::ostream&
os)
const {
1833 using Teuchos::FancyOStream;
1834 using Teuchos::getFancyOStream;
1836 using Teuchos::rcpFromRef;
1839 this->describe(*
out, Teuchos::VERB_DEFAULT);
1842template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1843std::unique_ptr<std::string>
1846 auto map = this->getMap();
1847 auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
1851template <
class DistObjectType>
1853 Teuchos::RCP<DistObjectType>&
input,
1854 const Teuchos::RCP<
const Map<
typename DistObjectType::local_ordinal_type,
1855 typename DistObjectType::global_ordinal_type,
1856 typename DistObjectType::node_type>>&
newMap) {
1859 input = Teuchos::null;
1863template <
class DistObjectType>
1865 auto newMap =
input->getMap()->removeEmptyProcesses();
1870#define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1871 template class DistObject<SCALAR, LO, GO, NODE>;
1875#define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1876 template class DistObject<char, LO, GO, NODE>;