10#ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
11#define TPETRA_BLOCKCRSMATRIX_DEF_HPP
19#include "Tpetra_BlockMultiVector.hpp"
22#include "Teuchos_TimeMonitor.hpp"
23#ifdef HAVE_TPETRA_DEBUG
27#include "KokkosSparse_spmv.hpp"
34struct BlockCrsRowStruct {
35 T totalNumEntries, totalNumBytes, maxRowLength;
36 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct() =
default;
37 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct& b) =
default;
38 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
39 : totalNumEntries(numEnt)
40 , totalNumBytes(numBytes)
41 , maxRowLength(rowLength) {}
45static KOKKOS_INLINE_FUNCTION
void operator+=(BlockCrsRowStruct<T>& a,
const BlockCrsRowStruct<T>& b) {
46 a.totalNumEntries += b.totalNumEntries;
47 a.totalNumBytes += b.totalNumBytes;
48 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
51template <
typename T,
typename ExecSpace>
52struct BlockCrsReducer {
53 typedef BlockCrsReducer reducer;
55 typedef Kokkos::View<value_type, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
58 KOKKOS_INLINE_FUNCTION
59 BlockCrsReducer(value_type& val)
62 KOKKOS_INLINE_FUNCTION
void join(value_type& dst, value_type& src)
const { dst += src; }
63 KOKKOS_INLINE_FUNCTION
void join(value_type& dst,
const value_type& src)
const { dst += src; }
64 KOKKOS_INLINE_FUNCTION
void init(value_type& val)
const { val = value_type(); }
65 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
66 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
75template <
class Scalar,
class LO,
class GO,
class Node>
76class GetLocalDiagCopy {
78 typedef typename Node::device_type device_type;
79 typedef size_t diag_offset_type;
80 typedef Kokkos::View<
const size_t*, device_type,
81 Kokkos::MemoryUnmanaged>
83 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
84 typedef typename global_graph_type::local_graph_device_type local_graph_device_type;
85 typedef typename local_graph_device_type::row_map_type row_offsets_type;
86 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
87 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
88 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
91 GetLocalDiagCopy(
const diag_type& diag,
92 const values_type& val,
93 const diag_offsets_type& diagOffsets,
94 const row_offsets_type& ptr,
97 , diagOffsets_(diagOffsets)
99 , blockSize_(blockSize)
100 , offsetPerBlock_(blockSize_ * blockSize_)
103 KOKKOS_INLINE_FUNCTION
void
104 operator()(
const LO& lclRowInd)
const {
108 const size_t absOffset = ptr_[lclRowInd];
111 const size_t relOffset = diagOffsets_[lclRowInd];
114 const size_t pointOffset = (absOffset + relOffset) * offsetPerBlock_;
119 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
120 const_little_block_type;
121 const_little_block_type D_in(val_.data() + pointOffset,
122 blockSize_, blockSize_);
123 auto D_out = Kokkos::subview(diag_, lclRowInd, ALL(), ALL());
129 diag_offsets_type diagOffsets_;
130 row_offsets_type ptr_;
137template <
class Scalar,
class LO,
class GO,
class Node>
139BlockCrsMatrix<Scalar, LO, GO, Node>::
140 markLocalErrorAndGetStream() {
141 *(this->localError_) =
true;
142 if ((*errs_).is_null()) {
143 *errs_ = Teuchos::rcp(
new std::ostringstream());
148template <
class Scalar,
class LO,
class GO,
class Node>
167template <
class Scalar,
class LO,
class GO,
class Node>
173 , rowMeshMap_(*(
graph.getRowMap()))
186 !graph_.
isSorted(), std::invalid_argument,
188 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
189 "rows (isSorted() is false). This class assumes sorted rows.");
191 graphRCP_ = Teuchos::rcpFromRef(graph_);
201 "BlockCrsMatrix constructor: The input blockSize = "
202 <<
blockSize <<
" <= 0. The block size must be positive.");
216 ptrHost_ =
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"),
ptr_h.extent(0));
217 Kokkos::deep_copy(ptrHost_,
ptr_h);
220 indHost_ =
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"),
ind_h.extent(0));
222 Kokkos::deep_copy(indHost_,
ind_h);
225 val_ =
decltype(val_)(impl_scalar_type_dualview(
"val",
numValEnt));
229template <
class Scalar,
class LO,
class GO,
class Node>
232 const typename local_matrix_device_type::values_type&
values,
236 , rowMeshMap_(*(
graph.getRowMap()))
249 !graph_.
isSorted(), std::invalid_argument,
251 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
252 "rows (isSorted() is false). This class assumes sorted rows.");
254 graphRCP_ = Teuchos::rcpFromRef(graph_);
264 "BlockCrsMatrix constructor: The input blockSize = "
265 <<
blockSize <<
" <= 0. The block size must be positive.");
279 ptrHost_ =
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"),
ptr_h.extent(0));
280 Kokkos::deep_copy(ptrHost_,
ptr_h);
283 indHost_ =
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"),
ind_h.extent(0));
284 Kokkos::deep_copy(indHost_,
ind_h);
288 val_ =
decltype(val_)(
values);
292template <
class Scalar,
class LO,
class GO,
class Node>
300 , rowMeshMap_(*(
graph.getRowMap()))
314 !graph_.
isSorted(), std::invalid_argument,
316 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
317 "rows (isSorted() is false). This class assumes sorted rows.");
319 graphRCP_ = Teuchos::rcpFromRef(graph_);
329 "BlockCrsMatrix constructor: The input blockSize = "
330 <<
blockSize <<
" <= 0. The block size must be positive.");
340 ptrHost_ =
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"),
ptr_h.extent(0));
342 Kokkos::deep_copy(ptrHost_,
ptr_h);
345 indHost_ =
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"),
ind_h.extent(0));
347 Kokkos::deep_copy(indHost_,
ind_h);
350 val_ =
decltype(val_)(impl_scalar_type_dualview(
"val",
numValEnt));
354template <
class Scalar,
class LO,
class GO,
class Node>
355Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
359 return Teuchos::rcp(
new map_type(domainPointMap_));
362template <
class Scalar,
class LO,
class GO,
class Node>
363Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
367 return Teuchos::rcp(
new map_type(rangePointMap_));
370template <
class Scalar,
class LO,
class GO,
class Node>
371Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
374 return graph_.getRowMap();
377template <
class Scalar,
class LO,
class GO,
class Node>
378Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
381 return graph_.getColMap();
384template <
class Scalar,
class LO,
class GO,
class Node>
388 return graph_.getGlobalNumRows();
391template <
class Scalar,
class LO,
class GO,
class Node>
395 return graph_.getLocalNumRows();
398template <
class Scalar,
class LO,
class GO,
class Node>
402 return graph_.getLocalMaxNumRowEntries();
405template <
class Scalar,
class LO,
class GO,
class Node>
409 Teuchos::ETransp
mode,
414 mode != Teuchos::NO_TRANS &&
mode != Teuchos::TRANS &&
mode != Teuchos::CONJ_TRANS,
415 std::invalid_argument,
416 "Tpetra::BlockCrsMatrix::apply: "
417 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
418 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
421 !
X.isConstantStride() || !
Y.isConstantStride(),
422 std::invalid_argument,
423 "Tpetra::BlockCrsMatrix::apply: "
424 "X and Y must both be constant stride");
432 }
catch (std::invalid_argument&
e) {
434 true, std::invalid_argument,
435 "Tpetra::BlockCrsMatrix::"
436 "apply: Either the input MultiVector X or the output MultiVector Y "
437 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
438 "graph. BlockMultiVector's constructor threw the following exception: "
448 }
catch (std::invalid_argument&
e) {
450 true, std::invalid_argument,
451 "Tpetra::BlockCrsMatrix::"
452 "apply: The implementation method applyBlock complained about having "
453 "an invalid argument. It reported the following: "
455 }
catch (std::logic_error&
e) {
457 true, std::invalid_argument,
458 "Tpetra::BlockCrsMatrix::"
459 "apply: The implementation method applyBlock complained about a "
460 "possible bug in its implementation. It reported the following: "
462 }
catch (std::exception&
e) {
464 true, std::invalid_argument,
465 "Tpetra::BlockCrsMatrix::"
466 "apply: The implementation method applyBlock threw an exception which "
467 "is neither std::invalid_argument nor std::logic_error, but is a "
468 "subclass of std::exception. It reported the following: "
472 true, std::logic_error,
473 "Tpetra::BlockCrsMatrix::"
474 "apply: The implementation method applyBlock threw an exception which "
475 "is not an instance of a subclass of std::exception. This probably "
476 "indicates a bug. Please report this to the Tpetra developers.");
480template <
class Scalar,
class LO,
class GO,
class Node>
484 Teuchos::ETransp
mode,
488 X.getBlockSize() !=
Y.getBlockSize(), std::invalid_argument,
489 "Tpetra::BlockCrsMatrix::applyBlock: "
490 "X and Y have different block sizes. X.getBlockSize() = "
491 <<
X.getBlockSize() <<
" != Y.getBlockSize() = "
492 <<
Y.getBlockSize() <<
".");
494 if (
mode == Teuchos::NO_TRANS) {
496 }
else if (
mode == Teuchos::TRANS ||
mode == Teuchos::CONJ_TRANS) {
500 true, std::invalid_argument,
501 "Tpetra::BlockCrsMatrix::"
502 "applyBlock: Invalid 'mode' argument. Valid values are "
503 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
507template <
class Scalar,
class LO,
class GO,
class Node>
517 "destMatrix is required to be null.");
531template <
class Scalar,
class LO,
class GO,
class Node>
541 "destMatrix is required to be null.");
555template <
class Scalar,
class LO,
class GO,
class Node>
558 auto val_d = val_.getDeviceView(Access::OverwriteAll);
562template <
class Scalar,
class LO,
class GO,
class Node>
574template <
class Scalar,
class LO,
class GO,
class Node>
577 Kokkos::MemoryUnmanaged>& offsets)
const {
578 graph_.getLocalDiagOffsets(offsets);
581template <
class Scalar,
class LO,
class GO,
class Node>
584 Kokkos::MemoryUnmanaged>& diag,
586 Kokkos::MemoryUnmanaged>& offsets)
const {
587 using Kokkos::parallel_for;
588 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
590 const LO
lclNumMeshRows =
static_cast<LO
>(rowMeshMap_.getLocalNumElements());
591 const LO
blockSize = this->getBlockSize();
593 static_cast<LO
>(diag.extent(1)) <
blockSize ||
594 static_cast<LO
>(diag.extent(2)) <
blockSize,
595 std::invalid_argument,
prefix <<
"The input Kokkos::View is not big enough to hold all the data.");
597 prefix <<
"offsets.size() = " << offsets.size() <<
" < local number of "
601 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
608 auto val_d = val_.getDeviceView(Access::ReadOnly);
611 graph_.getLocalGraphDevice().row_map, blockSize_));
614template <
class Scalar,
class LO,
class GO,
class Node>
626template <
class Scalar,
class LO,
class GO,
class Node>
637template <
class Scalar,
class LO,
class GO,
class Node>
640 nonconst_local_inds_host_view_type&
Indices,
641 nonconst_values_host_view_type&
Values,
647 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
654 for (LO
i = 0;
i <
numInds * blockSize_ * blockSize_; ++
i) {
660template <
class Scalar,
class LO,
class GO,
class Node>
666 if (!rowMeshMap_.isNodeLocalElement(
localRowInd)) {
671 return static_cast<LO
>(0);
674 const LO
LINV = Teuchos::OrdinalTraits<LO>::invalid();
690template <
class Scalar,
class LO,
class GO,
class Node>
696 if (!rowMeshMap_.isNodeLocalElement(
localRowInd)) {
701 return static_cast<LO
>(0);
708 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
709 const ptrdiff_t STINV = Teuchos::OrdinalTraits<ptrdiff_t>::invalid();
715 if (offsets[
k] !=
STINV) {
725template <
class Scalar,
class LO,
class GO,
class Node>
731 if (!rowMeshMap_.isNodeLocalElement(
localRowInd)) {
736 return static_cast<LO
>(0);
738 const impl_scalar_type*
const vIn =
reinterpret_cast<const impl_scalar_type*
>(vals);
739 auto val_out = getValuesHost(localRowInd);
740 impl_scalar_type* vOut =
const_cast<impl_scalar_type*
>(val_out.data());
742 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
743 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
744 size_t pointOffset = 0;
747 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
748 const size_t blockOffset = offsets[k] * perBlockSize;
749 if (blockOffset != STINV) {
750 little_block_type A_old = getNonConstLocalBlockFromInput(vOut, blockOffset);
751 const_little_block_type A_new = getConstLocalBlockFromInput(vIn, pointOffset);
759template <
class Scalar,
class LO,
class GO,
class Node>
765 if (!rowMeshMap_.isNodeLocalElement(
localRowInd)) {
770 return static_cast<LO
>(0);
778 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
779 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
795template <
class Scalar,
class LO,
class GO,
class Node>
797 impl_scalar_type_dualview::t_host::const_type
800 return val_.getHostView(Access::ReadOnly);
803template <
class Scalar,
class LO,
class GO,
class Node>
804typename BlockCrsMatrix<Scalar, LO, GO, Node>::
805 impl_scalar_type_dualview::t_dev::const_type
806 BlockCrsMatrix<Scalar, LO, GO, Node>::
807 getValuesDevice()
const {
808 return val_.getDeviceView(Access::ReadOnly);
811template <
class Scalar,
class LO,
class GO,
class Node>
812typename BlockCrsMatrix<Scalar, LO, GO, Node>::
813 impl_scalar_type_dualview::t_host
816 return val_.getHostView(Access::ReadWrite);
819template <
class Scalar,
class LO,
class GO,
class Node>
821 impl_scalar_type_dualview::t_dev
824 return val_.getDeviceView(Access::ReadWrite);
827template <
class Scalar,
class LO,
class GO,
class Node>
828typename BlockCrsMatrix<Scalar, LO, GO, Node>::
829 impl_scalar_type_dualview::t_host::const_type
832 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
833 auto val = val_.getHostView(Access::ReadOnly);
838template <
class Scalar,
class LO,
class GO,
class Node>
840 impl_scalar_type_dualview::t_dev::const_type
843 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
844 auto val = val_.getDeviceView(Access::ReadOnly);
849template <
class Scalar,
class LO,
class GO,
class Node>
853 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
854 auto val = val_.getHostView(Access::ReadWrite);
859template <
class Scalar,
class LO,
class GO,
class Node>
863 const size_t perBlockSize =
static_cast<LO
>(offsetPerBlock());
864 auto val = val_.getDeviceView(Access::ReadWrite);
869template <
class Scalar,
class LO,
class GO,
class Node>
874 if (
numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid()) {
875 return static_cast<LO
>(0);
881template <
class Scalar,
class LO,
class GO,
class Node>
882typename BlockCrsMatrix<Scalar, LO, GO, Node>::local_matrix_device_type
885 auto numCols = this->graph_.getColMap()->getLocalNumElements();
886 auto val = val_.getDeviceView(Access::ReadWrite);
887 const LO
blockSize = this->getBlockSize();
888 const auto graph = this->graph_.getLocalGraphDevice();
890 return local_matrix_device_type(
"Tpetra::BlockCrsMatrix::lclMatrixDevice",
897template <
class Scalar,
class LO,
class GO,
class Node>
901 const Teuchos::ETransp
mode,
911 true, std::logic_error,
912 "Tpetra::BlockCrsMatrix::apply: "
913 "transpose and conjugate transpose modes are not yet implemented.");
916template <
class Scalar,
class LO,
class GO,
class Node>
917void BlockCrsMatrix<Scalar, LO, GO, Node>::
918 applyBlockNoTrans(
const BlockMultiVector<Scalar, LO, GO, Node>& X,
919 BlockMultiVector<Scalar, LO, GO, Node>& Y,
924 typedef ::Tpetra::Import<LO, GO, Node> import_type;
925 typedef ::Tpetra::Export<LO, GO, Node> export_type;
926 const Scalar zero = STS::zero();
927 const Scalar one = STS::one();
928 RCP<const import_type>
import = graph_.getImporter();
930 RCP<const export_type> theExport = graph_.getExporter();
931 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
936 }
else if (beta != one) {
940 const BMV* X_colMap = NULL;
941 if (
import.is_null()) {
944 }
catch (std::exception& e) {
945 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
946 "operator= threw an exception: "
957 if ((*X_colMap_).is_null() ||
958 (**X_colMap_).getNumVectors() != X.getNumVectors() ||
959 (**X_colMap_).getBlockSize() != X.getBlockSize()) {
960 *X_colMap_ = rcp(
new BMV(*(graph_.getColMap()), getBlockSize(),
961 static_cast<LO
>(X.getNumVectors())));
963 (*X_colMap_)->getMultiVectorView().doImport(X.getMultiVectorView(), **pointImporter_,
::Tpetra::REPLACE);
965 X_colMap = &(**X_colMap_);
966 }
catch (std::exception& e) {
967 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
968 "operator= threw an exception: "
974 BMV* Y_rowMap = NULL;
975 if (theExport.is_null()) {
977 }
else if ((*Y_rowMap_).is_null() ||
978 (**Y_rowMap_).getNumVectors() != Y.getNumVectors() ||
979 (**Y_rowMap_).getBlockSize() != Y.getBlockSize()) {
980 *Y_rowMap_ = rcp(
new BMV(*(graph_.getRowMap()), getBlockSize(),
981 static_cast<LO
>(X.getNumVectors())));
983 Y_rowMap = &(**Y_rowMap_);
984 }
catch (std::exception& e) {
985 TEUCHOS_TEST_FOR_EXCEPTION(
986 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
987 "operator= threw an exception: "
994 localApplyBlockNoTrans(*X_colMap, *Y_rowMap, alpha, beta);
995 }
catch (std::exception& e) {
996 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1000 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1001 "an exception not a subclass of std::exception.");
1004 if (!theExport.is_null()) {
1010template <
class Scalar,
class LO,
class GO,
class Node>
1011void BlockCrsMatrix<Scalar, LO, GO, Node>::localApplyBlockNoTrans(
1012 const BlockMultiVector<Scalar, LO, GO, Node>& X,
1013 BlockMultiVector<Scalar, LO, GO, Node>& Y,
const Scalar alpha,
1014 const Scalar beta) {
1016 "Tpetra::BlockCrsMatrix::localApplyBlockNoTrans");
1017 const impl_scalar_type alpha_impl = alpha;
1018 const auto graph = this->graph_.getLocalGraphDevice();
1020 mv_type X_mv = X.getMultiVectorView();
1021 mv_type Y_mv = Y.getMultiVectorView();
1022 auto X_lcl = X_mv.getLocalViewDevice(Access::ReadOnly);
1023 auto Y_lcl = Y_mv.getLocalViewDevice(Access::ReadWrite);
1025 auto A_lcl = getLocalMatrixDevice();
1026 if (!applyHelper.get()) {
1028 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map);
1030 if (applyHelper->shouldUseIntRowptrs()) {
1031 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
1033 &applyHelper->handle_int, KokkosSparse::NoTranspose,
1034 alpha_impl, A_lcl_int_rowptrs, X_lcl, beta, Y_lcl);
1037 &applyHelper->handle, KokkosSparse::NoTranspose,
1038 alpha_impl, A_lcl, X_lcl, beta, Y_lcl);
1042template <
class Scalar,
class LO,
class GO,
class Node>
1043LO BlockCrsMatrix<Scalar, LO, GO, Node>::
1044 findRelOffsetOfColumnIndex(
const LO localRowIndex,
1045 const LO colIndexToFind,
1046 const LO hint)
const {
1047 const size_t absStartOffset = ptrHost_[localRowIndex];
1048 const size_t absEndOffset = ptrHost_[localRowIndex + 1];
1049 const LO numEntriesInRow =
static_cast<LO
>(absEndOffset - absStartOffset);
1051 const LO*
const curInd = indHost_.data() + absStartOffset;
1054 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1061 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid();
1066 const LO maxNumEntriesForLinearSearch = 32;
1067 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1072 const LO* beg = curInd;
1073 const LO* end = curInd + numEntriesInRow;
1074 std::pair<const LO*, const LO*> p =
1075 std::equal_range(beg, end, colIndexToFind);
1076 if (p.first != p.second) {
1078 relOffset =
static_cast<LO
>(p.first - beg);
1081 for (LO k = 0; k < numEntriesInRow; ++k) {
1082 if (colIndexToFind == curInd[k]) {
1092template <
class Scalar,
class LO,
class GO,
class Node>
1093LO BlockCrsMatrix<Scalar, LO, GO, Node>::
1094 offsetPerBlock()
const {
1095 return offsetPerBlock_;
1098template <
class Scalar,
class LO,
class GO,
class Node>
1100BlockCrsMatrix<Scalar, LO, GO, Node>::
1101 getConstLocalBlockFromInput(
const impl_scalar_type* val,
1102 const size_t pointOffset)
const {
1104 const LO rowStride = blockSize_;
1105 return const_little_block_type(val + pointOffset, blockSize_, rowStride);
1108template <
class Scalar,
class LO,
class GO,
class Node>
1110BlockCrsMatrix<Scalar, LO, GO, Node>::
1111 getNonConstLocalBlockFromInput(impl_scalar_type* val,
1112 const size_t pointOffset)
const {
1114 const LO rowStride = blockSize_;
1115 return little_block_type(val + pointOffset, blockSize_, rowStride);
1118template <
class Scalar,
class LO,
class GO,
class Node>
1119typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1120BlockCrsMatrix<Scalar, LO, GO, Node>::
1121 getNonConstLocalBlockFromInputHost(impl_scalar_type* val,
1122 const size_t pointOffset)
const {
1124 const LO rowStride = blockSize_;
1125 const size_t bs2 = blockSize_ * blockSize_;
1126 return little_block_host_type(val + bs2 * pointOffset, blockSize_, rowStride);
1129template <
class Scalar,
class LO,
class GO,
class Node>
1131BlockCrsMatrix<Scalar, LO, GO, Node>::
1132 getLocalBlockDeviceNonConst(
const LO localRowInd,
const LO localColInd)
const {
1133 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1135 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1136 const LO relBlockOffset = this->findRelOffsetOfColumnIndex(localRowInd, localColInd);
1137 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid()) {
1138 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1139 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesDeviceNonConst();
1140 auto r_val = getNonConstLocalBlockFromInput(vals.data(), absBlockOffset);
1143 return little_block_type();
1147template <
class Scalar,
class LO,
class GO,
class Node>
1148typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1149BlockCrsMatrix<Scalar, LO, GO, Node>::
1150 getLocalBlockHostNonConst(
const LO localRowInd,
const LO localColInd)
const {
1151 using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1153 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1154 const LO relBlockOffset = this->findRelOffsetOfColumnIndex(localRowInd, localColInd);
1155 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid()) {
1156 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1157 auto vals =
const_cast<this_BCRS_type&
>(*this).getValuesHostNonConst();
1158 auto r_val = getNonConstLocalBlockFromInputHost(vals.data(), absBlockOffset);
1161 return little_block_host_type();
1165template <
class Scalar,
class LO,
class GO,
class Node>
1166bool BlockCrsMatrix<Scalar, LO, GO, Node>::
1167 checkSizes(const ::Tpetra::SrcDistObject& source) {
1169 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_BCRS_type;
1170 const this_BCRS_type* src =
dynamic_cast<const this_BCRS_type*
>(&source);
1173 std::ostream& err = markLocalErrorAndGetStream();
1174 err <<
"checkSizes: The source object of the Import or Export "
1175 "must be a BlockCrsMatrix with the same template parameters as the "
1181 if (src->getBlockSize() != this->getBlockSize()) {
1182 std::ostream& err = markLocalErrorAndGetStream();
1183 err <<
"checkSizes: The source and target objects of the Import or "
1184 <<
"Export must have the same block sizes. The source's block "
1185 <<
"size = " << src->getBlockSize() <<
" != the target's block "
1186 <<
"size = " << this->getBlockSize() <<
"." << endl;
1188 if (!src->graph_.isFillComplete()) {
1189 std::ostream& err = markLocalErrorAndGetStream();
1190 err <<
"checkSizes: The source object of the Import or Export is "
1191 "not fill complete. Both source and target objects must be fill "
1195 if (!this->graph_.isFillComplete()) {
1196 std::ostream& err = markLocalErrorAndGetStream();
1197 err <<
"checkSizes: The target object of the Import or Export is "
1198 "not fill complete. Both source and target objects must be fill "
1202 if (src->graph_.getColMap().is_null()) {
1203 std::ostream& err = markLocalErrorAndGetStream();
1204 err <<
"checkSizes: The source object of the Import or Export does "
1205 "not have a column Map. Both source and target objects must have "
1209 if (this->graph_.getColMap().is_null()) {
1210 std::ostream& err = markLocalErrorAndGetStream();
1211 err <<
"checkSizes: The target object of the Import or Export does "
1212 "not have a column Map. Both source and target objects must have "
1218 return !(*(this->localError_));
1221template <
class Scalar,
class LO,
class GO,
class Node>
1224 const size_t numSameIDs,
1231 using ::Tpetra::Details::Behavior;
1232 using ::Tpetra::Details::dualViewStatusToString;
1233 using ::Tpetra::Details::ProfilingRegion;
1236 ProfilingRegion
profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
1237 const bool debug = Behavior::debug();
1238 const bool verbose = Behavior::verbose();
1243 std::ostringstream
os;
1244 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1245 os <<
"Proc " <<
myRank <<
": BlockCrsMatrix::copyAndPermute : " <<
endl;
1250 if (*(this->localError_)) {
1251 std::ostream&
err = this->markLocalErrorAndGetStream();
1253 <<
"The target object of the Import or Export is already in an error state."
1262 std::ostringstream
os;
1266 std::cerr <<
os.str();
1273 std::ostream&
err = this->markLocalErrorAndGetStream();
1281 std::ostream&
err = this->markLocalErrorAndGetStream();
1283 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
1290 std::ostream&
err = this->markLocalErrorAndGetStream();
1292 <<
"The source (input) object of the Import or "
1293 "Export is either not a BlockCrsMatrix, or does not have the right "
1294 "template parameters. checkSizes() should have caught this. "
1295 "Please report this bug to the Tpetra developers."
1301#ifdef HAVE_TPETRA_DEBUG
1316#ifdef HAVE_TPETRA_DEBUG
1326 std::ostringstream
os;
1328 <<
"canUseLocalColumnIndices: "
1332 std::cerr <<
os.str();
1341#ifdef HAVE_TPETRA_DEBUG
1342 if (!
srcRowMap.isNodeLocalElement(localRow)) {
1350 values_host_view_type
vals;
1356 if (numEntries > 0) {
1358 if (
err != numEntries) {
1360 if (!
dstRowMap.isNodeLocalElement(localRow)) {
1361#ifdef HAVE_TPETRA_DEBUG
1370 for (LO
k = 0;
k < numEntries; ++
k) {
1373#ifdef HAVE_TPETRA_DEBUG
1389 values_host_view_type
vals;
1393 if (numEntries > 0) {
1395 if (
err != numEntries) {
1397#ifdef HAVE_TPETRA_DEBUG
1398 for (LO
c = 0;
c < numEntries; ++
c) {
1409 const size_t maxNumEnt = src->graph_.getLocalMaxNumRowEntries();
1415 values_host_view_type
vals;
1423 }
catch (std::exception&
e) {
1425 std::ostringstream
os;
1426 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1427 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" localRow "
1428 << localRow <<
", src->getLocalRowView() threw an exception: "
1430 std::cerr <<
os.str();
1435 if (numEntries > 0) {
1436 if (
static_cast<size_t>(numEntries) >
static_cast<size_t>(
lclDstCols.size())) {
1439 std::ostringstream
os;
1440 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1441 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" localRow "
1442 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
1444 std::cerr <<
os.str();
1450 for (LO
j = 0;
j < numEntries; ++
j) {
1454#ifdef HAVE_TPETRA_DEBUG
1462 }
catch (std::exception&
e) {
1464 std::ostringstream
os;
1465 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1466 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" localRow "
1467 << localRow <<
", this->replaceLocalValues() threw an exception: "
1469 std::cerr <<
os.str();
1473 if (
err != numEntries) {
1476 std::ostringstream
os;
1477 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1478 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" "
1480 << localRow <<
", this->replaceLocalValues "
1482 <<
err <<
" instead of numEntries = "
1483 << numEntries <<
endl;
1484 std::cerr <<
os.str();
1497 values_host_view_type
vals;
1503 }
catch (std::exception&
e) {
1505 std::ostringstream
os;
1506 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1507 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"permute\" "
1510 <<
", src->getLocalRowView() threw an exception: " <<
e.what();
1511 std::cerr <<
os.str();
1516 if (numEntries > 0) {
1517 if (
static_cast<size_t>(numEntries) >
static_cast<size_t>(
lclDstCols.size())) {
1523 for (LO
j = 0;
j < numEntries; ++
j) {
1527#ifdef HAVE_TPETRA_DEBUG
1533 if (
err != numEntries) {
1542 std::ostream&
err = this->markLocalErrorAndGetStream();
1543#ifdef HAVE_TPETRA_DEBUG
1544 err <<
"copyAndPermute: The graph structure of the source of the "
1545 "Import or Export must be a subset of the graph structure of the "
1547 err <<
"invalidSrcCopyRows = [";
1551 typename std::set<LO>::const_iterator
itp1 =
it;
1557 err <<
"], invalidDstCopyRows = [";
1561 typename std::set<LO>::const_iterator
itp1 =
it;
1567 err <<
"], invalidDstCopyCols = [";
1571 typename std::set<LO>::const_iterator
itp1 =
it;
1577 err <<
"], invalidDstPermuteCols = [";
1581 typename std::set<LO>::const_iterator
itp1 =
it;
1587 err <<
"], invalidPermuteFromRows = [";
1591 typename std::set<LO>::const_iterator
itp1 =
it;
1599 err <<
"copyAndPermute: The graph structure of the source of the "
1600 "Import or Export must be a subset of the graph structure of the "
1607 std::ostringstream
os;
1608 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1609 const bool lclSuccess = !(*(this->localError_));
1610 os <<
"*** Proc " <<
myRank <<
": copyAndPermute "
1615 os <<
": error messages: " << this->errorMessages();
1617 std::cerr <<
os.str();
1641template <
class LO,
class GO>
1646 using ::Tpetra::Details::PackTraits;
1655 const size_t numEntLen = PackTraits<LO>::packValueCount(numEntLO);
1656 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount(gid);
1657 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
1658 return numEntLen + gidsLen + valsLen;
1672template <
class ST,
class LO,
class GO>
1674unpackRowCount(
const typename ::Tpetra::Details::PackTraits<LO>::input_buffer_type& imports,
1675 const size_t offset,
1676 const size_t numBytes,
1678 using Kokkos::subview;
1679 using ::Tpetra::Details::PackTraits;
1681 if (numBytes == 0) {
1683 return static_cast<size_t>(0);
1686 const size_t theNumBytes = PackTraits<LO>::packValueCount(numEntLO);
1687 TEUCHOS_TEST_FOR_EXCEPTION(theNumBytes > numBytes, std::logic_error,
1690 << theNumBytes <<
" < numBytes = " << numBytes
1692 const char*
const inBuf = imports.data() + offset;
1693 const size_t actualNumBytes = PackTraits<LO>::unpackValue(numEntLO, inBuf);
1694 TEUCHOS_TEST_FOR_EXCEPTION(actualNumBytes > numBytes, std::logic_error,
1697 << actualNumBytes <<
" < numBytes = " << numBytes
1699 return static_cast<size_t>(numEntLO);
1706template <
class ST,
class LO,
class GO>
1708packRowForBlockCrs(
const typename ::Tpetra::Details::PackTraits<LO>::output_buffer_type exports,
1709 const size_t offset,
1710 const size_t numEnt,
1711 const typename ::Tpetra::Details::PackTraits<GO>::input_array_type& gidsIn,
1712 const typename ::Tpetra::Details::PackTraits<ST>::input_array_type& valsIn,
1713 const size_t numBytesPerValue,
1714 const size_t blockSize) {
1715 using Kokkos::subview;
1716 using ::Tpetra::Details::PackTraits;
1722 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1725 const LO numEntLO =
static_cast<size_t>(numEnt);
1727 const size_t numEntBeg = offset;
1728 const size_t numEntLen = PackTraits<LO>::packValueCount(numEntLO);
1729 const size_t gidsBeg = numEntBeg + numEntLen;
1730 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount(gid);
1731 const size_t valsBeg = gidsBeg + gidsLen;
1732 const size_t valsLen = numScalarEnt * numBytesPerValue;
1734 char*
const numEntOut = exports.data() + numEntBeg;
1735 char*
const gidsOut = exports.data() + gidsBeg;
1736 char*
const valsOut = exports.data() + valsBeg;
1738 size_t numBytesOut = 0;
1740 numBytesOut += PackTraits<LO>::packValue(numEntOut, numEntLO);
1743 Kokkos::pair<int, size_t> p;
1744 p = PackTraits<GO>::packArray(gidsOut, gidsIn.data(), numEnt);
1745 errorCode += p.first;
1746 numBytesOut += p.second;
1748 p = PackTraits<ST>::packArray(valsOut, valsIn.data(), numScalarEnt);
1749 errorCode += p.first;
1750 numBytesOut += p.second;
1753 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1754 TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != expectedNumBytes, std::logic_error,
1755 "packRowForBlockCrs: numBytesOut = " << numBytesOut
1756 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1758 TEUCHOS_TEST_FOR_EXCEPTION(errorCode != 0, std::runtime_error,
1759 "packRowForBlockCrs: "
1760 "PackTraits::packArray returned a nonzero error code");
1766template <
class ST,
class LO,
class GO>
1768unpackRowForBlockCrs(
const typename ::Tpetra::Details::PackTraits<GO>::output_array_type& gidsOut,
1769 const typename ::Tpetra::Details::PackTraits<ST>::output_array_type& valsOut,
1770 const typename ::Tpetra::Details::PackTraits<int>::input_buffer_type& imports,
1771 const size_t offset,
1772 const size_t numBytes,
1773 const size_t numEnt,
1774 const size_t numBytesPerValue,
1775 const size_t blockSize) {
1776 using ::Tpetra::Details::PackTraits;
1778 if (numBytes == 0) {
1782 const size_t numScalarEnt = numEnt * blockSize * blockSize;
1783 TEUCHOS_TEST_FOR_EXCEPTION(
static_cast<size_t>(imports.extent(0)) <= offset,
1784 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = " << imports.extent(0) <<
" <= offset = " << offset <<
".");
1785 TEUCHOS_TEST_FOR_EXCEPTION(
static_cast<size_t>(imports.extent(0)) < offset + numBytes,
1786 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = " << imports.extent(0) <<
" < offset + numBytes = " << (offset + numBytes) <<
".");
1791 const size_t numEntBeg = offset;
1792 const size_t numEntLen = PackTraits<LO>::packValueCount(lid);
1793 const size_t gidsBeg = numEntBeg + numEntLen;
1794 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount(gid);
1795 const size_t valsBeg = gidsBeg + gidsLen;
1796 const size_t valsLen = numScalarEnt * numBytesPerValue;
1798 const char*
const numEntIn = imports.data() + numEntBeg;
1799 const char*
const gidsIn = imports.data() + gidsBeg;
1800 const char*
const valsIn = imports.data() + valsBeg;
1802 size_t numBytesOut = 0;
1805 numBytesOut += PackTraits<LO>::unpackValue(numEntOut, numEntIn);
1806 TEUCHOS_TEST_FOR_EXCEPTION(
static_cast<size_t>(numEntOut) != numEnt, std::logic_error,
1807 "unpackRowForBlockCrs: Expected number of entries " << numEnt
1808 <<
" != actual number of entries " << numEntOut <<
".");
1811 Kokkos::pair<int, size_t> p;
1812 p = PackTraits<GO>::unpackArray(gidsOut.data(), gidsIn, numEnt);
1813 errorCode += p.first;
1814 numBytesOut += p.second;
1816 p = PackTraits<ST>::unpackArray(valsOut.data(), valsIn, numScalarEnt);
1817 errorCode += p.first;
1818 numBytesOut += p.second;
1821 TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != numBytes, std::logic_error,
1822 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1823 <<
" != numBytes = " << numBytes <<
".");
1825 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
1826 TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != expectedNumBytes, std::logic_error,
1827 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
1828 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
1830 TEUCHOS_TEST_FOR_EXCEPTION(errorCode != 0, std::runtime_error,
1831 "unpackRowForBlockCrs: "
1832 "PackTraits::unpackArray returned a nonzero error code");
1838template <
class Scalar,
class LO,
class GO,
class Node>
1845 Kokkos::DualView<
size_t*,
1849 using ::Tpetra::Details::Behavior;
1850 using ::Tpetra::Details::dualViewStatusToString;
1851 using ::Tpetra::Details::PackTraits;
1852 using ::Tpetra::Details::ProfilingRegion;
1854 typedef typename Kokkos::View<int*, device_type>::host_mirror_type::execution_space
host_exec;
1858 ProfilingRegion
profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
1860 const bool debug = Behavior::debug();
1861 const bool verbose = Behavior::verbose();
1866 std::ostringstream
os;
1867 const int myRank = this->graph_.getRowMap()->getComm()->getRank();
1868 os <<
"Proc " <<
myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
1873 if (*(this->localError_)) {
1874 std::ostream&
err = this->markLocalErrorAndGetStream();
1876 <<
"The target object of the Import or Export is already in an error state."
1885 std::ostringstream
os;
1887 <<
prefix <<
" " << dualViewStatusToString(
exportLIDs,
"exportLIDs") << std::endl
1888 <<
prefix <<
" " << dualViewStatusToString(exports,
"exports") << std::endl
1890 std::cerr <<
os.str();
1897 std::ostream&
err = this->markLocalErrorAndGetStream();
1899 <<
"exportLIDs.extent(0) = " <<
exportLIDs.extent(0)
1901 <<
"." << std::endl;
1905 std::ostream&
err = this->markLocalErrorAndGetStream();
1906 err <<
prefix <<
"exportLIDs be sync'd to host." << std::endl;
1912 std::ostream&
err = this->markLocalErrorAndGetStream();
1914 <<
"The source (input) object of the Import or "
1915 "Export is either not a BlockCrsMatrix, or does not have the right "
1916 "template parameters. checkSizes() should have caught this. "
1917 "Please report this bug to the Tpetra developers."
1934 const size_t blockSize =
static_cast<size_t>(src->getBlockSize());
1938 auto val_host = val_.getHostView(Access::ReadOnly);
1975 std::ostringstream
os;
1977 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
1979 std::cerr <<
os.str();
1985 if (exports.extent(0) != totalNumBytes) {
1986 const std::string
oldLabel = exports.view_device().label();
1988 exports = Kokkos::DualView<packet_type*, buffer_device_type>(
newLabel, totalNumBytes);
1990 if (totalNumEntries > 0) {
1995 Kokkos::parallel_scan(
policy,
1996 [=](
const size_t&
i,
size_t& update,
const bool&
final) {
1997 if (
final)
offset(
i) = update;
2002 std::ostream&
err = this->markLocalErrorAndGetStream();
2004 <<
"At end of method, the final offset (in bytes) "
2006 << totalNumBytes <<
". "
2007 <<
"Please report this bug to the Tpetra developers." << std::endl;
2021 exports.modify_host();
2023 typedef Kokkos::TeamPolicy<host_exec> policy_type;
2026 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO) * maxRowLength));
2031 Kokkos::parallel_for(
policy,
2032 [=](
const typename policy_type::member_type&
member) {
2033 const size_t i =
member.league_rank();
2034 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2039 values_host_view_type
vals;
2059 Kokkos::View<const GO*, host_exec>(
gblColInds.data(), maxRowLength),
2068 std::ostringstream
os;
2070 <<
"numBytes computed from packRowForBlockCrs is different from "
2071 <<
"precomputed offset values, LID = " <<
i << std::endl;
2072 std::cerr <<
os.str();
2081 std::ostringstream
os;
2082 const bool lclSuccess = !(*(this->localError_));
2086 std::cerr <<
os.str();
2090template <
class Scalar,
class LO,
class GO,
class Node>
2097 Kokkos::DualView<
size_t*,
2103 using ::Tpetra::Details::Behavior;
2104 using ::Tpetra::Details::dualViewStatusToString;
2105 using ::Tpetra::Details::PackTraits;
2106 using ::Tpetra::Details::ProfilingRegion;
2108 typename Kokkos::View<int*, device_type>::host_mirror_type::execution_space;
2110 ProfilingRegion
profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
2111 const bool verbose = Behavior::verbose();
2116 std::ostringstream
os;
2117 auto map = this->graph_.getRowMap();
2118 auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
2119 const int myRank = comm.is_null() ? -1 : comm->getRank();
2120 os <<
"Proc " <<
myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
2124 std::cerr <<
os.str();
2129 if (*(this->localError_)) {
2130 std::ostream&
err = this->markLocalErrorAndGetStream();
2131 std::ostringstream
os;
2132 os <<
prefix <<
"{Im/Ex}port target is already in error." <<
endl;
2134 std::cerr <<
os.str();
2144 std::ostream&
err = this->markLocalErrorAndGetStream();
2145 std::ostringstream
os;
2150 std::cerr <<
os.str();
2159 std::ostream&
err = this->markLocalErrorAndGetStream();
2160 std::ostringstream
os;
2162 <<
"Invalid CombineMode value " <<
combineMode <<
". Valid "
2163 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
2166 std::cerr <<
os.str();
2172 if (this->graph_.getColMap().is_null()) {
2173 std::ostream&
err = this->markLocalErrorAndGetStream();
2174 std::ostringstream
os;
2175 os <<
prefix <<
"Target matrix's column Map is null." <<
endl;
2177 std::cerr <<
os.str();
2189 const size_t blockSize = this->getBlockSize();
2199 auto val_host = val_.getHostView(Access::ReadOnly);
2203 const size_t maxRowNumEnt = graph_.getLocalMaxNumRowEntries();
2207 std::ostringstream
os;
2215 std::cerr <<
os.str();
2220 std::ostringstream
os;
2221 os <<
prefix <<
"Nothing to unpack. Done!" << std::endl;
2222 std::cerr <<
os.str();
2229 if (imports.need_sync_host()) {
2230 imports.sync_host();
2242 std::ostream&
err = this->markLocalErrorAndGetStream();
2243 std::ostringstream
os;
2244 os <<
prefix <<
"All input DualViews must be sync'd to host by now. "
2245 <<
"imports_nc.need_sync_host()="
2246 << (imports.need_sync_host() ?
"true" :
"false")
2247 <<
", numPacketsPerLID.need_sync_host()="
2249 <<
", importLIDs.need_sync_host()="
2250 << (
importLIDs.need_sync_host() ?
"true" :
"false")
2253 std::cerr <<
os.str();
2269 Kokkos::parallel_scan(
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets",
policy,
2270 [=](
const size_t&
i,
size_t& update,
const bool&
final) {
2271 if (
final)
offset(
i) = update;
2282 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
2286 using policy_type = Kokkos::TeamPolicy<host_exec>;
2293 using pair_type = Kokkos::pair<size_t, size_t>;
2296 getValuesHostNonConst();
2298 Kokkos::parallel_for(
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack",
policy,
2299 [=, *
this](
const typename policy_type::member_type&
member) {
2300 const size_t i =
member.league_rank();
2315 std::ostringstream
os;
2317 <<
"At i = " <<
i <<
", numEnt = " <<
numEnt
2320 std::cerr <<
os.str();
2338 imports.view_host(),
2341 }
catch (std::exception&
e) {
2344 std::ostringstream
os;
2345 os <<
prefix <<
"At i = " <<
i <<
", unpackRowForBlockCrs threw: "
2346 <<
e.what() <<
endl;
2347 std::cerr <<
os.str();
2355 std::ostringstream
os;
2357 <<
"At i = " <<
i <<
", numBytes = " <<
numBytes
2360 std::cerr <<
os.str();
2368 if (
lidsOut(
k) == Teuchos::OrdinalTraits<LO>::invalid()) {
2370 std::ostringstream
os;
2372 <<
"At i = " <<
i <<
", GID " <<
gidsOut(
k)
2373 <<
" is not owned by the calling process."
2375 std::cerr <<
os.str();
2396 std::ostringstream
os;
2398 <<
" != numCombd = " <<
numCombd <<
"."
2400 std::cerr <<
os.str();
2409 std::ostream&
err = this->markLocalErrorAndGetStream();
2412 err <<
" Please run again with the environment variable setting "
2413 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
2419 std::ostringstream
os;
2420 const bool lclSuccess = !(*(this->localError_));
2424 std::cerr <<
os.str();
2428template <
class Scalar,
class LO,
class GO,
class Node>
2431 using Teuchos::TypeNameTraits;
2432 std::ostringstream
os;
2433 os <<
"\"Tpetra::BlockCrsMatrix\": { "
2434 <<
"Template parameters: { "
2441 <<
", Global dimensions: ["
2442 << graph_.getDomainMap()->getGlobalNumElements() <<
", "
2443 << graph_.getRangeMap()->getGlobalNumElements() <<
"]"
2444 <<
", Block size: " << getBlockSize()
2449template <
class Scalar,
class LO,
class GO,
class Node>
2452 const Teuchos::EVerbosityLevel
verbLevel)
const {
2453 using Teuchos::ArrayRCP;
2454 using Teuchos::CommRequest;
2455 using Teuchos::FancyOStream;
2456 using Teuchos::getFancyOStream;
2457 using Teuchos::ireceive;
2458 using Teuchos::isend;
2459 using Teuchos::outArg;
2460 using Teuchos::TypeNameTraits;
2461 using Teuchos::VERB_DEFAULT;
2462 using Teuchos::VERB_LOW;
2463 using Teuchos::VERB_MEDIUM;
2464 using Teuchos::VERB_NONE;
2468 using Teuchos::VERB_EXTREME;
2469 using Teuchos::wait;
2472 const Teuchos::EVerbosityLevel
vl =
2482 out <<
"\"Tpetra::BlockCrsMatrix\":" <<
endl;
2485 out <<
"Template parameters:" <<
endl;
2494 <<
"Global dimensions: ["
2495 << graph_.getDomainMap()->getGlobalNumElements() <<
", "
2496 << graph_.getRangeMap()->getGlobalNumElements() <<
"]" <<
endl;
2503 const Teuchos::Comm<int>& comm = *(graph_.getMap()->getComm());
2504 const int myRank = comm.getRank();
2508 getRowMap()->describe(
out,
vl);
2511 if (getColMap() == getRowMap()) {
2513 out <<
"Column Map: same as row Map" <<
endl;
2519 getColMap()->describe(
out,
vl);
2522 if (!getDomainMap().
is_null()) {
2523 if (getDomainMap() == getRowMap()) {
2525 out <<
"Domain Map: same as row Map" <<
endl;
2527 }
else if (getDomainMap() == getColMap()) {
2529 out <<
"Domain Map: same as column Map" <<
endl;
2535 getDomainMap()->describe(
out,
vl);
2538 if (!getRangeMap().
is_null()) {
2539 if (getRangeMap() == getDomainMap()) {
2541 out <<
"Range Map: same as domain Map" <<
endl;
2543 }
else if (getRangeMap() == getRowMap()) {
2545 out <<
"Range Map: same as row Map" <<
endl;
2551 getRangeMap()->describe(
out,
vl);
2557 const Teuchos::Comm<int>& comm = *(graph_.getMap()->getComm());
2558 const int myRank = comm.getRank();
2559 const int numProcs = comm.getSize();
2577 values_host_view_type
vals;
2678template <
class Scalar,
class LO,
class GO,
class Node>
2679Teuchos::RCP<const Teuchos::Comm<int> >
2682 return graph_.getComm();
2685template <
class Scalar,
class LO,
class GO,
class Node>
2689 return graph_.getGlobalNumCols();
2692template <
class Scalar,
class LO,
class GO,
class Node>
2696 return graph_.getLocalNumCols();
2699template <
class Scalar,
class LO,
class GO,
class Node>
2702 return graph_.getIndexBase();
2705template <
class Scalar,
class LO,
class GO,
class Node>
2709 return graph_.getGlobalNumEntries();
2712template <
class Scalar,
class LO,
class GO,
class Node>
2716 return graph_.getLocalNumEntries();
2719template <
class Scalar,
class LO,
class GO,
class Node>
2723 return graph_.getNumEntriesInGlobalRow(
globalRow);
2726template <
class Scalar,
class LO,
class GO,
class Node>
2730 return graph_.getGlobalMaxNumRowEntries();
2733template <
class Scalar,
class LO,
class GO,
class Node>
2736 return graph_.hasColMap();
2739template <
class Scalar,
class LO,
class GO,
class Node>
2742 return graph_.isLocallyIndexed();
2745template <
class Scalar,
class LO,
class GO,
class Node>
2748 return graph_.isGloballyIndexed();
2751template <
class Scalar,
class LO,
class GO,
class Node>
2754 return graph_.isFillComplete();
2757template <
class Scalar,
class LO,
class GO,
class Node>
2763template <
class Scalar,
class LO,
class GO,
class Node>
2766 nonconst_global_inds_host_view_type& ,
2767 nonconst_values_host_view_type& ,
2770 true, std::logic_error,
2771 "Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
2772 "This class doesn't support global matrix indexing.");
2775template <
class Scalar,
class LO,
class GO,
class Node>
2778 global_inds_host_view_type& ,
2779 values_host_view_type& )
const {
2781 true, std::logic_error,
2782 "Tpetra::BlockCrsMatrix::getGlobalRowView: "
2783 "This class doesn't support global matrix indexing.");
2786template <
class Scalar,
class LO,
class GO,
class Node>
2789 local_inds_host_view_type&
colInds,
2790 values_host_view_type&
vals)
const {
2791 if (!rowMeshMap_.isNodeLocalElement(
localRowInd)) {
2792 colInds = local_inds_host_view_type();
2793 vals = values_host_view_type();
2803template <
class Scalar,
class LO,
class GO,
class Node>
2806 local_inds_host_view_type&
colInds,
2807 nonconst_values_host_view_type&
vals)
const {
2808 if (!rowMeshMap_.isNodeLocalElement(
localRowInd)) {
2809 colInds = nonconst_local_inds_host_view_type();
2810 vals = nonconst_values_host_view_type();
2821template <
class Scalar,
class LO,
class GO,
class Node>
2840 LO
bs = getBlockSize();
2841 for (
size_t r = 0;
r < getLocalNumRows();
r++) {
2844 for (
int b = 0;
b <
bs;
b++) {
2854template <
class Scalar,
class LO,
class GO,
class Node>
2856 leftScale(const ::Tpetra::Vector<Scalar, LO, GO, Node>& ) {
2858 true, std::logic_error,
2859 "Tpetra::BlockCrsMatrix::leftScale: "
2860 "not implemented.");
2863template <
class Scalar,
class LO,
class GO,
class Node>
2865 rightScale(const ::Tpetra::Vector<Scalar, LO, GO, Node>& ) {
2867 true, std::logic_error,
2868 "Tpetra::BlockCrsMatrix::rightScale: "
2869 "not implemented.");
2872template <
class Scalar,
class LO,
class GO,
class Node>
2873Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
2879template <
class Scalar,
class LO,
class GO,
class Node>
2880typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
2884 true, std::logic_error,
2885 "Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
2886 "not implemented.");
2896#define TPETRA_BLOCKCRSMATRIX_INSTANT(S, LO, GO, NODE) \
2897 template class BlockCrsMatrix<S, LO, GO, NODE>;
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh,...
void exportAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Export< LO, GO, Node > &exporter) const
Import from this to the given destination matrix, and make the result fill complete.
virtual bool isLocallyIndexed() const override
Whether matrix indices are locally indexed.
virtual bool isFillComplete() const override
Whether fillComplete() has been called.
virtual size_t getGlobalMaxNumRowEntries() const override
The maximum number of entries in any row over all processes in the matrix's communicator.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
virtual void getLocalRowCopy(LO LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Not implemented.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
virtual void getGlobalRowView(GO GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const override
Print a description of this object to the given output stream.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
LO local_ordinal_type
The type of local indices.
virtual void getGlobalRowCopy(GO GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Get a copy of the given global row's entries.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
virtual size_t getLocalNumCols() const override
The number of columns needed to apply the forward operator on this node.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
virtual bool hasColMap() const override
Whether this matrix has a well-defined column Map.
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
size_t getLocalNumRows() const override
get the local number of block rows
virtual size_t getLocalNumEntries() const override
The local number of stored (structurally nonzero) entries.
impl_scalar_type_dualview::t_host getValuesHostNonConst() const
Get the host or device View of the matrix's values (val_).
Kokkos::View< impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const override
The Frobenius norm of the matrix.
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const override
Get the (mesh) graph.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
local_matrix_device_type getLocalMatrixDevice() const
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
std::string description() const override
One-line description of this object.
void importAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Import< LO, GO, Node > &importer) const
Import from this to the given destination matrix, and make the result fill complete.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
size_t getNumEntriesInLocalRow(const LO localRowInd) const override
Return the number of entries in the given row on the calling process.
virtual bool supportsRowViews() const override
Whether this object implements getLocalRowView() and getGlobalRowView().
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh,...
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const override
The current number of entries on the calling process in the specified global row.
Node::device_type device_type
The Kokkos::Device specialization that this class uses.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
Kokkos::View< const impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > const_little_block_type
The type used to access const matrix blocks.
void getLocalRowViewNonConst(LO LocalRow, local_inds_host_view_type &indices, nonconst_values_host_view_type &values) const
char packet_type
Implementation detail; tells.
typename DistObject< Scalar, LO, GO, Node >::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
virtual void packAndPrepare(const SrcDistObject &sourceObj, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the right with the given Vector x.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which this matrix is distributed.
global_size_t getGlobalNumRows() const override
get the global number of block rows
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the left with the given Vector x.
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
virtual bool isGloballyIndexed() const override
Whether matrix indices are globally indexed.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
MultiVector for multiple degrees of freedom per mesh point.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Struct that holds views of the contents of a CrsMatrix.
One or more distributed dense vectors.
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
Kokkos::LayoutRight BlockCrsMatrixLittleBlockArrayLayout
give an option to use layoutleft
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
size_t global_size_t
Global size_t object.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ INSERT
Insert new values that don't currently exist.
@ ZERO
Replace old values with zero.