10#ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP 
   11#define TPETRA_BLOCKCRSMATRIX_DEF_HPP 
   19#include "Tpetra_BlockMultiVector.hpp" 
   22#include "Teuchos_TimeMonitor.hpp" 
   23#ifdef HAVE_TPETRA_DEBUG 
   27#include "KokkosSparse_spmv.hpp" 
   34struct BlockCrsRowStruct {
 
   35  T totalNumEntries, totalNumBytes, maxRowLength;
 
   36  KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct()                           = 
default;
 
   37  KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct& b) = 
default;
 
   38  KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt, 
const T& numBytes, 
const T& rowLength)
 
   39    : totalNumEntries(numEnt)
 
   40    , totalNumBytes(numBytes)
 
   41    , maxRowLength(rowLength) {}
 
   45static KOKKOS_INLINE_FUNCTION 
void operator+=(BlockCrsRowStruct<T>& a, 
const BlockCrsRowStruct<T>& b) {
 
   46  a.totalNumEntries += b.totalNumEntries;
 
   47  a.totalNumBytes += b.totalNumBytes;
 
   48  a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
 
   51template <
typename T, 
typename ExecSpace>
 
   52struct BlockCrsReducer {
 
   53  typedef BlockCrsReducer reducer;
 
   55  typedef Kokkos::View<value_type, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
 
   58  KOKKOS_INLINE_FUNCTION
 
   59  BlockCrsReducer(value_type& val)
 
   62  KOKKOS_INLINE_FUNCTION 
void join(value_type& dst, value_type& src)
 const { dst += src; }
 
   63  KOKKOS_INLINE_FUNCTION 
void join(value_type& dst, 
const value_type& src)
 const { dst += src; }
 
   64  KOKKOS_INLINE_FUNCTION 
void init(value_type& val)
 const { val = value_type(); }
 
   65  KOKKOS_INLINE_FUNCTION value_type& reference() { 
return *value; }
 
   66  KOKKOS_INLINE_FUNCTION result_view_type view()
 const { 
return result_view_type(value); }
 
   75template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
   76class GetLocalDiagCopy {
 
   78  typedef typename Node::device_type device_type;
 
   79  typedef size_t diag_offset_type;
 
   80  typedef Kokkos::View<
const size_t*, device_type,
 
   81                       Kokkos::MemoryUnmanaged>
 
   83  typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
 
   84  typedef typename global_graph_type::local_graph_device_type local_graph_device_type;
 
   85  typedef typename local_graph_device_type::row_map_type row_offsets_type;
 
   86  typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
 
   87  typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
 
   88  typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
 
   91  GetLocalDiagCopy(
const diag_type& diag,
 
   92                   const values_type& val,
 
   93                   const diag_offsets_type& diagOffsets,
 
   94                   const row_offsets_type& ptr,
 
   97    , diagOffsets_(diagOffsets)
 
   99    , blockSize_(blockSize)
 
  100    , offsetPerBlock_(blockSize_ * blockSize_)
 
  103  KOKKOS_INLINE_FUNCTION 
void 
  104  operator()(
const LO& lclRowInd)
 const {
 
  108    const size_t absOffset = ptr_[lclRowInd];
 
  111    const size_t relOffset = diagOffsets_[lclRowInd];
 
  114    const size_t pointOffset = (absOffset + relOffset) * offsetPerBlock_;
 
  119                         device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
 
  120        const_little_block_type;
 
  121    const_little_block_type D_in(val_.data() + pointOffset,
 
  122                                 blockSize_, blockSize_);
 
  123    auto D_out = Kokkos::subview(diag_, lclRowInd, ALL(), ALL());
 
  129  diag_offsets_type diagOffsets_;
 
  130  row_offsets_type ptr_;
 
  137template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  139BlockCrsMatrix<Scalar, LO, GO, Node>::
 
  140    markLocalErrorAndGetStream() {
 
  141  *(this->localError_) = 
true;
 
  142  if ((*errs_).is_null()) {
 
  143    *errs_ = Teuchos::rcp(
new std::ostringstream());
 
  148template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  167template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  173  , rowMeshMap_(*(
graph.getRowMap()))
 
  186      !graph_.
isSorted(), std::invalid_argument,
 
  188      "BlockCrsMatrix constructor: The input CrsGraph does not have sorted " 
  189      "rows (isSorted() is false).  This class assumes sorted rows.");
 
  191  graphRCP_ = Teuchos::rcpFromRef(graph_);
 
  201      "BlockCrsMatrix constructor: The input blockSize = " 
  202          << 
blockSize << 
" <= 0.  The block size must be positive.");
 
  216    ptrHost_           = 
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), 
ptr_h.extent(0));
 
  217    Kokkos::deep_copy(ptrHost_, 
ptr_h);
 
  220    indHost_   = 
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), 
ind_h.extent(0));
 
  222    Kokkos::deep_copy(indHost_, 
ind_h);
 
  225    val_                 = 
decltype(val_)(impl_scalar_type_dualview(
"val", 
numValEnt));
 
 
  229template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  232                   const typename local_matrix_device_type::values_type& 
values,
 
  236  , rowMeshMap_(*(
graph.getRowMap()))
 
  249      !graph_.
isSorted(), std::invalid_argument,
 
  251      "BlockCrsMatrix constructor: The input CrsGraph does not have sorted " 
  252      "rows (isSorted() is false).  This class assumes sorted rows.");
 
  254  graphRCP_ = Teuchos::rcpFromRef(graph_);
 
  264      "BlockCrsMatrix constructor: The input blockSize = " 
  265          << 
blockSize << 
" <= 0.  The block size must be positive.");
 
  279    ptrHost_           = 
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), 
ptr_h.extent(0));
 
  280    Kokkos::deep_copy(ptrHost_, 
ptr_h);
 
  283    indHost_   = 
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), 
ind_h.extent(0));
 
  284    Kokkos::deep_copy(indHost_, 
ind_h);
 
  288    val_ = 
decltype(val_)(
values);
 
 
  292template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  300  , rowMeshMap_(*(
graph.getRowMap()))
 
  314      !graph_.
isSorted(), std::invalid_argument,
 
  316      "BlockCrsMatrix constructor: The input CrsGraph does not have sorted " 
  317      "rows (isSorted() is false).  This class assumes sorted rows.");
 
  319  graphRCP_ = Teuchos::rcpFromRef(graph_);
 
  329      "BlockCrsMatrix constructor: The input blockSize = " 
  330          << 
blockSize << 
" <= 0.  The block size must be positive.");
 
  340    ptrHost_           = 
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), 
ptr_h.extent(0));
 
  342    Kokkos::deep_copy(ptrHost_, 
ptr_h);
 
  345    indHost_   = 
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), 
ind_h.extent(0));
 
  347    Kokkos::deep_copy(indHost_, 
ind_h);
 
  350    val_                 = 
decltype(val_)(impl_scalar_type_dualview(
"val", 
numValEnt));
 
 
  354template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  355Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
 
  359  return Teuchos::rcp(
new map_type(domainPointMap_));
 
 
  362template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  363Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
 
  367  return Teuchos::rcp(
new map_type(rangePointMap_));
 
 
  370template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  371Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
 
  374  return graph_.getRowMap();
 
 
  377template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  378Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
 
  381  return graph_.getColMap();
 
 
  384template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  388  return graph_.getGlobalNumRows();
 
 
  391template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  395  return graph_.getLocalNumRows();
 
 
  398template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  402  return graph_.getLocalMaxNumRowEntries();
 
 
  405template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  409          Teuchos::ETransp 
mode,
 
  414      mode != Teuchos::NO_TRANS && 
mode != Teuchos::TRANS && 
mode != Teuchos::CONJ_TRANS,
 
  415      std::invalid_argument,
 
  416      "Tpetra::BlockCrsMatrix::apply: " 
  417      "Invalid 'mode' argument.  Valid values are Teuchos::NO_TRANS, " 
  418      "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
 
  421      !
X.isConstantStride() || !
Y.isConstantStride(),
 
  422      std::invalid_argument,
 
  423      "Tpetra::BlockCrsMatrix::apply: " 
  424      "X and Y must both be constant stride");
 
  432  } 
catch (std::invalid_argument& 
e) {
 
  434        true, std::invalid_argument,
 
  435        "Tpetra::BlockCrsMatrix::" 
  436        "apply: Either the input MultiVector X or the output MultiVector Y " 
  437        "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's " 
  438        "graph.  BlockMultiVector's constructor threw the following exception: " 
  448  } 
catch (std::invalid_argument& 
e) {
 
  450        true, std::invalid_argument,
 
  451        "Tpetra::BlockCrsMatrix::" 
  452        "apply: The implementation method applyBlock complained about having " 
  453        "an invalid argument.  It reported the following: " 
  455  } 
catch (std::logic_error& 
e) {
 
  457        true, std::invalid_argument,
 
  458        "Tpetra::BlockCrsMatrix::" 
  459        "apply: The implementation method applyBlock complained about a " 
  460        "possible bug in its implementation.  It reported the following: " 
  462  } 
catch (std::exception& 
e) {
 
  464        true, std::invalid_argument,
 
  465        "Tpetra::BlockCrsMatrix::" 
  466        "apply: The implementation method applyBlock threw an exception which " 
  467        "is neither std::invalid_argument nor std::logic_error, but is a " 
  468        "subclass of std::exception.  It reported the following: " 
  472        true, std::logic_error,
 
  473        "Tpetra::BlockCrsMatrix::" 
  474        "apply: The implementation method applyBlock threw an exception which " 
  475        "is not an instance of a subclass of std::exception.  This probably " 
  476        "indicates a bug.  Please report this to the Tpetra developers.");
 
 
  480template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  484               Teuchos::ETransp 
mode,
 
  488      X.getBlockSize() != 
Y.getBlockSize(), std::invalid_argument,
 
  489      "Tpetra::BlockCrsMatrix::applyBlock: " 
  490      "X and Y have different block sizes.  X.getBlockSize() = " 
  491          << 
X.getBlockSize() << 
" != Y.getBlockSize() = " 
  492          << 
Y.getBlockSize() << 
".");
 
  494  if (
mode == Teuchos::NO_TRANS) {
 
  496  } 
else if (
mode == Teuchos::TRANS || 
mode == Teuchos::CONJ_TRANS) {
 
  500        true, std::invalid_argument,
 
  501        "Tpetra::BlockCrsMatrix::" 
  502        "applyBlock: Invalid 'mode' argument.  Valid values are " 
  503        "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
 
 
  507template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  517                             "destMatrix is required to be null.");
 
 
  531template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  541                             "destMatrix is required to be null.");
 
 
  555template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  558  auto val_d = val_.getDeviceView(Access::OverwriteAll);
 
 
  562template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  574template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  577                                           Kokkos::MemoryUnmanaged>& offsets)
 const {
 
  578  graph_.getLocalDiagOffsets(offsets);
 
 
  581template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  584                                        Kokkos::MemoryUnmanaged>& diag,
 
  586                                        Kokkos::MemoryUnmanaged>& offsets)
 const {
 
  587  using Kokkos::parallel_for;
 
  588  const char prefix[] = 
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
 
  590  const LO 
lclNumMeshRows = 
static_cast<LO
>(rowMeshMap_.getLocalNumElements());
 
  591  const LO 
blockSize      = this->getBlockSize();
 
  593                                 static_cast<LO
>(diag.extent(1)) < 
blockSize ||
 
  594                                 static_cast<LO
>(diag.extent(2)) < 
blockSize,
 
  595                             std::invalid_argument, 
prefix << 
"The input Kokkos::View is not big enough to hold all the data.");
 
  597                             prefix << 
"offsets.size() = " << offsets.size() << 
" < local number of " 
  601  typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
 
  608  auto val_d = val_.getDeviceView(Access::ReadOnly);
 
  611                            graph_.getLocalGraphDevice().row_map, blockSize_));
 
 
  614template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  626template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  637template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  640                    nonconst_local_inds_host_view_type& 
Indices,
 
  641                    nonconst_values_host_view_type& 
Values,
 
  647                               "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold " 
  654  for (LO 
i = 0; 
i < 
numInds * blockSize_ * blockSize_; ++
i) {
 
 
  660template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  666  if (!rowMeshMap_.isNodeLocalElement(
localRowInd)) {
 
  671    return static_cast<LO
>(0);
 
  674  const LO 
LINV = Teuchos::OrdinalTraits<LO>::invalid();
 
 
  690template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  696  if (!rowMeshMap_.isNodeLocalElement(
localRowInd)) {
 
  701    return static_cast<LO
>(0);
 
  709  const ptrdiff_t STINV     = Teuchos::OrdinalTraits<ptrdiff_t>::invalid();
 
  715    if (offsets[
k] != 
STINV) {
 
 
  725template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  731  if (!rowMeshMap_.isNodeLocalElement(
localRowInd)) {
 
  736    return static_cast<LO
>(0);
 
  738  const impl_scalar_type* 
const vIn = 
reinterpret_cast<const impl_scalar_type*
>(vals);
 
  739  auto val_out                      = getValuesHost(localRowInd);
 
  740  impl_scalar_type* vOut            = 
const_cast<impl_scalar_type*
>(val_out.data());
 
  742  const size_t perBlockSize = offsetPerBlock();
 
  743  const size_t STINV        = Teuchos::OrdinalTraits<size_t>::invalid();
 
  744  size_t pointOffset        = 0;  
 
  747  for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
 
  748    const size_t blockOffset = offsets[k] * perBlockSize;
 
  749    if (blockOffset != STINV) {
 
  750      little_block_type A_old       = getNonConstLocalBlockFromInput(vOut, blockOffset);
 
  751      const_little_block_type A_new = getConstLocalBlockFromInput(vIn, pointOffset);
 
  759template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  765  if (!rowMeshMap_.isNodeLocalElement(
localRowInd)) {
 
  770    return static_cast<LO
>(0);
 
  779  const size_t STINV        = Teuchos::OrdinalTraits<size_t>::invalid();
 
 
  795template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  797    impl_scalar_type_dualview::t_host::const_type
 
  800  return val_.getHostView(Access::ReadOnly);
 
  803template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  804typename BlockCrsMatrix<Scalar, LO, GO, Node>::
 
  805    impl_scalar_type_dualview::t_dev::const_type
 
  806    BlockCrsMatrix<Scalar, LO, GO, Node>::
 
  807        getValuesDevice()
 const {
 
  808  return val_.getDeviceView(Access::ReadOnly);
 
  811template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  812typename BlockCrsMatrix<Scalar, LO, GO, Node>::
 
  813    impl_scalar_type_dualview::t_host
 
  816  return val_.getHostView(Access::ReadWrite);
 
 
  819template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  821    impl_scalar_type_dualview::t_dev
 
  824  return val_.getDeviceView(Access::ReadWrite);
 
  827template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  828typename BlockCrsMatrix<Scalar, LO, GO, Node>::
 
  829    impl_scalar_type_dualview::t_host::const_type
 
  833  auto val                  = val_.getHostView(Access::ReadOnly);
 
 
  838template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  840    impl_scalar_type_dualview::t_dev::const_type
 
  844  auto val                  = val_.getDeviceView(Access::ReadOnly);
 
 
  849template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  854  auto val                  = val_.getHostView(Access::ReadWrite);
 
 
  859template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  864  auto val                  = val_.getDeviceView(Access::ReadWrite);
 
 
  869template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  874  if (
numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid()) {
 
 
  881template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  882typename BlockCrsMatrix<Scalar, LO, GO, Node>::local_matrix_device_type
 
  885  auto numCols       = this->graph_.getColMap()->getLocalNumElements();
 
  886  auto val           = val_.getDeviceView(Access::ReadWrite);
 
  887  const LO 
blockSize = this->getBlockSize();
 
  888  const auto graph   = this->graph_.getLocalGraphDevice();
 
  890  return local_matrix_device_type(
"Tpetra::BlockCrsMatrix::lclMatrixDevice",
 
 
  897template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  901                    const Teuchos::ETransp 
mode,
 
  911      true, std::logic_error,
 
  912      "Tpetra::BlockCrsMatrix::apply: " 
  913      "transpose and conjugate transpose modes are not yet implemented.");
 
  916template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
  917void BlockCrsMatrix<Scalar, LO, GO, Node>::
 
  918    applyBlockNoTrans(
const BlockMultiVector<Scalar, LO, GO, Node>& X,
 
  919                      BlockMultiVector<Scalar, LO, GO, Node>& Y,
 
  924  typedef ::Tpetra::Import<LO, GO, Node> import_type;
 
  925  typedef ::Tpetra::Export<LO, GO, Node> export_type;
 
  926  const Scalar zero             = STS::zero();
 
  927  const Scalar one              = STS::one();
 
  928  RCP<const import_type> 
import = graph_.getImporter();
 
  930  RCP<const export_type> theExport = graph_.getExporter();
 
  931  const char prefix[]              = 
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
 
  936    } 
else if (beta != one) {
 
  940    const BMV* X_colMap = NULL;
 
  941    if (
import.is_null()) {
 
  944      } 
catch (std::exception& e) {
 
  945        TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, prefix << 
"Tpetra::MultiVector::" 
  946                                                                     "operator= threw an exception: " 
  957      if ((*X_colMap_).is_null() ||
 
  958          (**X_colMap_).getNumVectors() != X.getNumVectors() ||
 
  959          (**X_colMap_).getBlockSize() != X.getBlockSize()) {
 
  960        *X_colMap_ = rcp(
new BMV(*(graph_.getColMap()), getBlockSize(),
 
  961                                 static_cast<LO
>(X.getNumVectors())));
 
  963      (*X_colMap_)->getMultiVectorView().doImport(X.getMultiVectorView(), **pointImporter_, 
::Tpetra::REPLACE);
 
  965        X_colMap = &(**X_colMap_);
 
  966      } 
catch (std::exception& e) {
 
  967        TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, prefix << 
"Tpetra::MultiVector::" 
  968                                                                     "operator= threw an exception: " 
  974    BMV* Y_rowMap = NULL;
 
  975    if (theExport.is_null()) {
 
  977    } 
else if ((*Y_rowMap_).is_null() ||
 
  978               (**Y_rowMap_).getNumVectors() != Y.getNumVectors() ||
 
  979               (**Y_rowMap_).getBlockSize() != Y.getBlockSize()) {
 
  980      *Y_rowMap_ = rcp(
new BMV(*(graph_.getRowMap()), getBlockSize(),
 
  981                               static_cast<LO
>(X.getNumVectors())));
 
  983        Y_rowMap = &(**Y_rowMap_);
 
  984      } 
catch (std::exception& e) {
 
  985        TEUCHOS_TEST_FOR_EXCEPTION(
 
  986            true, std::logic_error, prefix << 
"Tpetra::MultiVector::" 
  987                                              "operator= threw an exception: " 
  994      localApplyBlockNoTrans(*X_colMap, *Y_rowMap, alpha, beta);
 
  995    } 
catch (std::exception& e) {
 
  996      TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, prefix << 
"localApplyBlockNoTrans threw " 
 1000      TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, prefix << 
"localApplyBlockNoTrans threw " 
 1001                                                                     "an exception not a subclass of std::exception.");
 
 1004    if (!theExport.is_null()) {
 
 1010template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 1011void BlockCrsMatrix<Scalar, LO, GO, Node>::localApplyBlockNoTrans(
 
 1012    const BlockMultiVector<Scalar, LO, GO, Node>& X,
 
 1013    BlockMultiVector<Scalar, LO, GO, Node>& Y, 
const Scalar alpha,
 
 1014    const Scalar beta) {
 
 1016      "Tpetra::BlockCrsMatrix::localApplyBlockNoTrans");
 
 1017  const impl_scalar_type alpha_impl = alpha;
 
 1018  const auto graph                  = this->graph_.getLocalGraphDevice();
 
 1020  mv_type X_mv = X.getMultiVectorView();
 
 1021  mv_type Y_mv = Y.getMultiVectorView();
 
 1022  auto X_lcl   = X_mv.getLocalViewDevice(Access::ReadOnly);
 
 1023  auto Y_lcl   = Y_mv.getLocalViewDevice(Access::ReadWrite);
 
 1025  auto A_lcl = getLocalMatrixDevice();
 
 1026  if (!applyHelper.get()) {
 
 1028    applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map);
 
 1030  if (applyHelper->shouldUseIntRowptrs()) {
 
 1031    auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
 
 1033        &applyHelper->handle_int, KokkosSparse::NoTranspose,
 
 1034        alpha_impl, A_lcl_int_rowptrs, X_lcl, beta, Y_lcl);
 
 1037        &applyHelper->handle, KokkosSparse::NoTranspose,
 
 1038        alpha_impl, A_lcl, X_lcl, beta, Y_lcl);
 
 1042template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 1043LO BlockCrsMatrix<Scalar, LO, GO, Node>::
 
 1044    findRelOffsetOfColumnIndex(
const LO localRowIndex,
 
 1045                               const LO colIndexToFind,
 
 1046                               const LO hint)
 const {
 
 1047  const size_t absStartOffset = ptrHost_[localRowIndex];
 
 1048  const size_t absEndOffset   = ptrHost_[localRowIndex + 1];
 
 1049  const LO numEntriesInRow    = 
static_cast<LO
>(absEndOffset - absStartOffset);
 
 1051  const LO* 
const curInd = indHost_.data() + absStartOffset;
 
 1054  if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
 
 1061  LO relOffset = Teuchos::OrdinalTraits<LO>::invalid();
 
 1066  const LO maxNumEntriesForLinearSearch = 32;
 
 1067  if (numEntriesInRow > maxNumEntriesForLinearSearch) {
 
 1072    const LO* beg = curInd;
 
 1073    const LO* end = curInd + numEntriesInRow;
 
 1074    std::pair<const LO*, const LO*> p =
 
 1075        std::equal_range(beg, end, colIndexToFind);
 
 1076    if (p.first != p.second) {
 
 1078      relOffset = 
static_cast<LO
>(p.first - beg);
 
 1081    for (LO k = 0; k < numEntriesInRow; ++k) {
 
 1082      if (colIndexToFind == curInd[k]) {
 
 1092template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 1093LO BlockCrsMatrix<Scalar, LO, GO, Node>::
 
 1094    offsetPerBlock()
 const {
 
 1095  return offsetPerBlock_;
 
 1098template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 1100BlockCrsMatrix<Scalar, LO, GO, Node>::
 
 1101    getConstLocalBlockFromInput(
const impl_scalar_type* val,
 
 1102                                const size_t pointOffset)
 const {
 
 1104  const LO rowStride = blockSize_;
 
 1105  return const_little_block_type(val + pointOffset, blockSize_, rowStride);
 
 1108template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 1110BlockCrsMatrix<Scalar, LO, GO, Node>::
 
 1111    getNonConstLocalBlockFromInput(impl_scalar_type* val,
 
 1112                                   const size_t pointOffset)
 const {
 
 1114  const LO rowStride = blockSize_;
 
 1115  return little_block_type(val + pointOffset, blockSize_, rowStride);
 
 1118template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 1119typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
 
 1120BlockCrsMatrix<Scalar, LO, GO, Node>::
 
 1121    getNonConstLocalBlockFromInputHost(impl_scalar_type* val,
 
 1122                                       const size_t pointOffset)
 const {
 
 1124  const LO rowStride = blockSize_;
 
 1125  const size_t bs2   = blockSize_ * blockSize_;
 
 1126  return little_block_host_type(val + bs2 * pointOffset, blockSize_, rowStride);
 
 1129template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 1131BlockCrsMatrix<Scalar, LO, GO, Node>::
 
 1132    getLocalBlockDeviceNonConst(
const LO localRowInd, 
const LO localColInd)
 const {
 
 1133  using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
 
 1135  const size_t absRowBlockOffset = ptrHost_[localRowInd];
 
 1136  const LO relBlockOffset        = this->findRelOffsetOfColumnIndex(localRowInd, localColInd);
 
 1137  if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid()) {
 
 1138    const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
 
 1139    auto vals                   = 
const_cast<this_BCRS_type&
>(*this).getValuesDeviceNonConst();
 
 1140    auto r_val                  = getNonConstLocalBlockFromInput(vals.data(), absBlockOffset);
 
 1143    return little_block_type();
 
 1147template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 1148typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
 
 1149BlockCrsMatrix<Scalar, LO, GO, Node>::
 
 1150    getLocalBlockHostNonConst(
const LO localRowInd, 
const LO localColInd)
 const {
 
 1151  using this_BCRS_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
 
 1153  const size_t absRowBlockOffset = ptrHost_[localRowInd];
 
 1154  const LO relBlockOffset        = this->findRelOffsetOfColumnIndex(localRowInd, localColInd);
 
 1155  if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid()) {
 
 1156    const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
 
 1157    auto vals                   = 
const_cast<this_BCRS_type&
>(*this).getValuesHostNonConst();
 
 1158    auto r_val                  = getNonConstLocalBlockFromInputHost(vals.data(), absBlockOffset);
 
 1161    return little_block_host_type();
 
 1165template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 1166bool BlockCrsMatrix<Scalar, LO, GO, Node>::
 
 1167    checkSizes(const ::Tpetra::SrcDistObject& source) {
 
 1169  typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_BCRS_type;
 
 1170  const this_BCRS_type* src = 
dynamic_cast<const this_BCRS_type*
>(&source);
 
 1173    std::ostream& err = markLocalErrorAndGetStream();
 
 1174    err << 
"checkSizes: The source object of the Import or Export " 
 1175           "must be a BlockCrsMatrix with the same template parameters as the " 
 1181    if (src->getBlockSize() != this->getBlockSize()) {
 
 1182      std::ostream& err = markLocalErrorAndGetStream();
 
 1183      err << 
"checkSizes: The source and target objects of the Import or " 
 1184          << 
"Export must have the same block sizes.  The source's block " 
 1185          << 
"size = " << src->getBlockSize() << 
" != the target's block " 
 1186          << 
"size = " << this->getBlockSize() << 
"." << endl;
 
 1188    if (!src->graph_.isFillComplete()) {
 
 1189      std::ostream& err = markLocalErrorAndGetStream();
 
 1190      err << 
"checkSizes: The source object of the Import or Export is " 
 1191             "not fill complete.  Both source and target objects must be fill " 
 1195    if (!this->graph_.isFillComplete()) {
 
 1196      std::ostream& err = markLocalErrorAndGetStream();
 
 1197      err << 
"checkSizes: The target object of the Import or Export is " 
 1198             "not fill complete.  Both source and target objects must be fill " 
 1202    if (src->graph_.getColMap().is_null()) {
 
 1203      std::ostream& err = markLocalErrorAndGetStream();
 
 1204      err << 
"checkSizes: The source object of the Import or Export does " 
 1205             "not have a column Map.  Both source and target objects must have " 
 1209    if (this->graph_.getColMap().is_null()) {
 
 1210      std::ostream& err = markLocalErrorAndGetStream();
 
 1211      err << 
"checkSizes: The target object of the Import or Export does " 
 1212             "not have a column Map.  Both source and target objects must have " 
 1218  return !(*(this->localError_));
 
 1221template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 1224                   const size_t numSameIDs,
 
 1231  using ::Tpetra::Details::Behavior;
 
 1232  using ::Tpetra::Details::dualViewStatusToString;
 
 1233  using ::Tpetra::Details::ProfilingRegion;
 
 1236  ProfilingRegion 
profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
 
 1237  const bool debug   = Behavior::debug();
 
 1238  const bool verbose = Behavior::verbose();
 
 1243    std::ostringstream 
os;
 
 1244    const int myRank = this->graph_.getRowMap()->getComm()->getRank();
 
 1245    os << 
"Proc " << 
myRank << 
": BlockCrsMatrix::copyAndPermute : " << 
endl;
 
 1250  if (*(this->localError_)) {
 
 1251    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 1253        << 
"The target object of the Import or Export is already in an error state." 
 1262    std::ostringstream 
os;
 
 1266    std::cerr << 
os.str();
 
 1273    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 1281    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 1283        << 
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host." 
 1290    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 1292        << 
"The source (input) object of the Import or " 
 1293           "Export is either not a BlockCrsMatrix, or does not have the right " 
 1294           "template parameters.  checkSizes() should have caught this.  " 
 1295           "Please report this bug to the Tpetra developers." 
 1301#ifdef HAVE_TPETRA_DEBUG 
 1316#ifdef HAVE_TPETRA_DEBUG 
 1326    std::ostringstream 
os;
 
 1328       << 
"canUseLocalColumnIndices: " 
 1332    std::cerr << 
os.str();
 
 1341#ifdef HAVE_TPETRA_DEBUG 
 1342      if (!
srcRowMap.isNodeLocalElement(localRow)) {
 
 1350      values_host_view_type 
vals;
 
 1356      if (numEntries > 0) {
 
 1358        if (
err != numEntries) {
 
 1360          if (!
dstRowMap.isNodeLocalElement(localRow)) {
 
 1361#ifdef HAVE_TPETRA_DEBUG 
 1370            for (LO 
k = 0; 
k < numEntries; ++
k) {
 
 1373#ifdef HAVE_TPETRA_DEBUG 
 1389      values_host_view_type 
vals;
 
 1393      if (numEntries > 0) {
 
 1395        if (
err != numEntries) {
 
 1397#ifdef HAVE_TPETRA_DEBUG 
 1398          for (LO 
c = 0; 
c < numEntries; ++
c) {
 
 1409    const size_t maxNumEnt = src->graph_.getLocalMaxNumRowEntries();
 
 1415      values_host_view_type 
vals;
 
 1423      } 
catch (std::exception& 
e) {
 
 1425          std::ostringstream 
os;
 
 1426          const int myRank = this->graph_.getRowMap()->getComm()->getRank();
 
 1427          os << 
"Proc " << 
myRank << 
": copyAndPermute: At \"same\" localRow " 
 1428             << localRow << 
", src->getLocalRowView() threw an exception: " 
 1430          std::cerr << 
os.str();
 
 1435      if (numEntries > 0) {
 
 1436        if (
static_cast<size_t>(numEntries) > 
static_cast<size_t>(
lclDstCols.size())) {
 
 1439            std::ostringstream 
os;
 
 1440            const int myRank = this->graph_.getRowMap()->getComm()->getRank();
 
 1441            os << 
"Proc " << 
myRank << 
": copyAndPermute: At \"same\" localRow " 
 1442               << localRow << 
", numEntries = " << numEntries << 
" > maxNumEnt = " 
 1444            std::cerr << 
os.str();
 
 1450          for (LO 
j = 0; 
j < numEntries; ++
j) {
 
 1454#ifdef HAVE_TPETRA_DEBUG 
 1462          } 
catch (std::exception& 
e) {
 
 1464              std::ostringstream 
os;
 
 1465              const int myRank = this->graph_.getRowMap()->getComm()->getRank();
 
 1466              os << 
"Proc " << 
myRank << 
": copyAndPermute: At \"same\" localRow " 
 1467                 << localRow << 
", this->replaceLocalValues() threw an exception: " 
 1469              std::cerr << 
os.str();
 
 1473          if (
err != numEntries) {
 
 1476              std::ostringstream 
os;
 
 1477              const int myRank = this->graph_.getRowMap()->getComm()->getRank();
 
 1478              os << 
"Proc " << 
myRank << 
": copyAndPermute: At \"same\" " 
 1480                 << localRow << 
", this->replaceLocalValues " 
 1482                 << 
err << 
" instead of numEntries = " 
 1483                 << numEntries << 
endl;
 
 1484              std::cerr << 
os.str();
 
 1497      values_host_view_type 
vals;
 
 1503      } 
catch (std::exception& 
e) {
 
 1505          std::ostringstream 
os;
 
 1506          const int myRank = this->graph_.getRowMap()->getComm()->getRank();
 
 1507          os << 
"Proc " << 
myRank << 
": copyAndPermute: At \"permute\" " 
 1510             << 
", src->getLocalRowView() threw an exception: " << 
e.what();
 
 1511          std::cerr << 
os.str();
 
 1516      if (numEntries > 0) {
 
 1517        if (
static_cast<size_t>(numEntries) > 
static_cast<size_t>(
lclDstCols.size())) {
 
 1523          for (LO 
j = 0; 
j < numEntries; ++
j) {
 
 1527#ifdef HAVE_TPETRA_DEBUG 
 1533          if (
err != numEntries) {
 
 1542    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 1543#ifdef HAVE_TPETRA_DEBUG 
 1544    err << 
"copyAndPermute: The graph structure of the source of the " 
 1545           "Import or Export must be a subset of the graph structure of the " 
 1547    err << 
"invalidSrcCopyRows = [";
 
 1551      typename std::set<LO>::const_iterator 
itp1 = 
it;
 
 1557    err << 
"], invalidDstCopyRows = [";
 
 1561      typename std::set<LO>::const_iterator 
itp1 = 
it;
 
 1567    err << 
"], invalidDstCopyCols = [";
 
 1571      typename std::set<LO>::const_iterator 
itp1 = 
it;
 
 1577    err << 
"], invalidDstPermuteCols = [";
 
 1581      typename std::set<LO>::const_iterator 
itp1 = 
it;
 
 1587    err << 
"], invalidPermuteFromRows = [";
 
 1591      typename std::set<LO>::const_iterator 
itp1 = 
it;
 
 1599    err << 
"copyAndPermute: The graph structure of the source of the " 
 1600           "Import or Export must be a subset of the graph structure of the " 
 1607    std::ostringstream 
os;
 
 1608    const int myRank      = this->graph_.getRowMap()->getComm()->getRank();
 
 1609    const bool lclSuccess = !(*(this->localError_));
 
 1610    os << 
"*** Proc " << 
myRank << 
": copyAndPermute " 
 1615      os << 
": error messages: " << this->errorMessages();  
 
 1617    std::cerr << 
os.str();
 
 
 1641template <
class LO, 
class GO>
 
 1646  using ::Tpetra::Details::PackTraits;
 
 1655    const size_t numEntLen = PackTraits<LO>::packValueCount(numEntLO);
 
 1656    const size_t gidsLen   = numEnt * PackTraits<GO>::packValueCount(gid);
 
 1657    const size_t valsLen   = numEnt * numBytesPerValue * blkSize * blkSize;
 
 1658    return numEntLen + gidsLen + valsLen;
 
 1672template <
class ST, 
class LO, 
class GO>
 
 1674unpackRowCount(
const typename ::Tpetra::Details::PackTraits<LO>::input_buffer_type& imports,
 
 1675               const size_t offset,
 
 1676               const size_t numBytes,
 
 1678  using Kokkos::subview;
 
 1679  using ::Tpetra::Details::PackTraits;
 
 1681  if (numBytes == 0) {
 
 1683    return static_cast<size_t>(0);
 
 1686    const size_t theNumBytes = PackTraits<LO>::packValueCount(numEntLO);
 
 1687    TEUCHOS_TEST_FOR_EXCEPTION(theNumBytes > numBytes, std::logic_error,
 
 1690                                   << theNumBytes << 
" < numBytes = " << numBytes
 
 1692    const char* 
const inBuf     = imports.data() + offset;
 
 1693    const size_t actualNumBytes = PackTraits<LO>::unpackValue(numEntLO, inBuf);
 
 1694    TEUCHOS_TEST_FOR_EXCEPTION(actualNumBytes > numBytes, std::logic_error,
 
 1697                                   << actualNumBytes << 
" < numBytes = " << numBytes
 
 1699    return static_cast<size_t>(numEntLO);
 
 1706template <
class ST, 
class LO, 
class GO>
 
 1708packRowForBlockCrs(
const typename ::Tpetra::Details::PackTraits<LO>::output_buffer_type exports,
 
 1709                   const size_t offset,
 
 1710                   const size_t numEnt,
 
 1711                   const typename ::Tpetra::Details::PackTraits<GO>::input_array_type& gidsIn,
 
 1712                   const typename ::Tpetra::Details::PackTraits<ST>::input_array_type& valsIn,
 
 1713                   const size_t numBytesPerValue,
 
 1714                   const size_t blockSize) {
 
 1715  using Kokkos::subview;
 
 1716  using ::Tpetra::Details::PackTraits;
 
 1722  const size_t numScalarEnt = numEnt * blockSize * blockSize;
 
 1725  const LO numEntLO = 
static_cast<size_t>(numEnt);
 
 1727  const size_t numEntBeg = offset;
 
 1728  const size_t numEntLen = PackTraits<LO>::packValueCount(numEntLO);
 
 1729  const size_t gidsBeg   = numEntBeg + numEntLen;
 
 1730  const size_t gidsLen   = numEnt * PackTraits<GO>::packValueCount(gid);
 
 1731  const size_t valsBeg   = gidsBeg + gidsLen;
 
 1732  const size_t valsLen   = numScalarEnt * numBytesPerValue;
 
 1734  char* 
const numEntOut = exports.data() + numEntBeg;
 
 1735  char* 
const gidsOut   = exports.data() + gidsBeg;
 
 1736  char* 
const valsOut   = exports.data() + valsBeg;
 
 1738  size_t numBytesOut = 0;
 
 1740  numBytesOut += PackTraits<LO>::packValue(numEntOut, numEntLO);
 
 1743    Kokkos::pair<int, size_t> p;
 
 1744    p = PackTraits<GO>::packArray(gidsOut, gidsIn.data(), numEnt);
 
 1745    errorCode += p.first;
 
 1746    numBytesOut += p.second;
 
 1748    p = PackTraits<ST>::packArray(valsOut, valsIn.data(), numScalarEnt);
 
 1749    errorCode += p.first;
 
 1750    numBytesOut += p.second;
 
 1753  const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
 
 1754  TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != expectedNumBytes, std::logic_error,
 
 1755                             "packRowForBlockCrs: numBytesOut = " << numBytesOut
 
 1756                                                                  << 
" != expectedNumBytes = " << expectedNumBytes << 
".");
 
 1758  TEUCHOS_TEST_FOR_EXCEPTION(errorCode != 0, std::runtime_error,
 
 1759                             "packRowForBlockCrs: " 
 1760                             "PackTraits::packArray returned a nonzero error code");
 
 1766template <
class ST, 
class LO, 
class GO>
 
 1768unpackRowForBlockCrs(
const typename ::Tpetra::Details::PackTraits<GO>::output_array_type& gidsOut,
 
 1769                     const typename ::Tpetra::Details::PackTraits<ST>::output_array_type& valsOut,
 
 1770                     const typename ::Tpetra::Details::PackTraits<int>::input_buffer_type& imports,
 
 1771                     const size_t offset,
 
 1772                     const size_t numBytes,
 
 1773                     const size_t numEnt,
 
 1774                     const size_t numBytesPerValue,
 
 1775                     const size_t blockSize) {
 
 1776  using ::Tpetra::Details::PackTraits;
 
 1778  if (numBytes == 0) {
 
 1782  const size_t numScalarEnt = numEnt * blockSize * blockSize;
 
 1783  TEUCHOS_TEST_FOR_EXCEPTION(
static_cast<size_t>(imports.extent(0)) <= offset,
 
 1784                             std::logic_error, 
"unpackRowForBlockCrs: imports.extent(0) = " << imports.extent(0) << 
" <= offset = " << offset << 
".");
 
 1785  TEUCHOS_TEST_FOR_EXCEPTION(
static_cast<size_t>(imports.extent(0)) < offset + numBytes,
 
 1786                             std::logic_error, 
"unpackRowForBlockCrs: imports.extent(0) = " << imports.extent(0) << 
" < offset + numBytes = " << (offset + numBytes) << 
".");
 
 1791  const size_t numEntBeg = offset;
 
 1792  const size_t numEntLen = PackTraits<LO>::packValueCount(lid);
 
 1793  const size_t gidsBeg   = numEntBeg + numEntLen;
 
 1794  const size_t gidsLen   = numEnt * PackTraits<GO>::packValueCount(gid);
 
 1795  const size_t valsBeg   = gidsBeg + gidsLen;
 
 1796  const size_t valsLen   = numScalarEnt * numBytesPerValue;
 
 1798  const char* 
const numEntIn = imports.data() + numEntBeg;
 
 1799  const char* 
const gidsIn   = imports.data() + gidsBeg;
 
 1800  const char* 
const valsIn   = imports.data() + valsBeg;
 
 1802  size_t numBytesOut = 0;
 
 1805  numBytesOut += PackTraits<LO>::unpackValue(numEntOut, numEntIn);
 
 1806  TEUCHOS_TEST_FOR_EXCEPTION(
static_cast<size_t>(numEntOut) != numEnt, std::logic_error,
 
 1807                             "unpackRowForBlockCrs: Expected number of entries " << numEnt
 
 1808                                                                                 << 
" != actual number of entries " << numEntOut << 
".");
 
 1811    Kokkos::pair<int, size_t> p;
 
 1812    p = PackTraits<GO>::unpackArray(gidsOut.data(), gidsIn, numEnt);
 
 1813    errorCode += p.first;
 
 1814    numBytesOut += p.second;
 
 1816    p = PackTraits<ST>::unpackArray(valsOut.data(), valsIn, numScalarEnt);
 
 1817    errorCode += p.first;
 
 1818    numBytesOut += p.second;
 
 1821  TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != numBytes, std::logic_error,
 
 1822                             "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
 
 1823                                                                    << 
" != numBytes = " << numBytes << 
".");
 
 1825  const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
 
 1826  TEUCHOS_TEST_FOR_EXCEPTION(numBytesOut != expectedNumBytes, std::logic_error,
 
 1827                             "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
 
 1828                                                                    << 
" != expectedNumBytes = " << expectedNumBytes << 
".");
 
 1830  TEUCHOS_TEST_FOR_EXCEPTION(errorCode != 0, std::runtime_error,
 
 1831                             "unpackRowForBlockCrs: " 
 1832                             "PackTraits::unpackArray returned a nonzero error code");
 
 1838template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 1845                   Kokkos::DualView<
size_t*,
 
 1849  using ::Tpetra::Details::Behavior;
 
 1850  using ::Tpetra::Details::dualViewStatusToString;
 
 1851  using ::Tpetra::Details::PackTraits;
 
 1852  using ::Tpetra::Details::ProfilingRegion;
 
 1854  typedef typename Kokkos::View<int*, device_type>::host_mirror_type::execution_space 
host_exec;
 
 1858  ProfilingRegion 
profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
 
 1860  const bool debug   = Behavior::debug();
 
 1861  const bool verbose = Behavior::verbose();
 
 1866    std::ostringstream 
os;
 
 1867    const int myRank = this->graph_.getRowMap()->getComm()->getRank();
 
 1868    os << 
"Proc " << 
myRank << 
": BlockCrsMatrix::packAndPrepare: " << std::endl;
 
 1873  if (*(this->localError_)) {
 
 1874    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 1876        << 
"The target object of the Import or Export is already in an error state." 
 1885    std::ostringstream 
os;
 
 1887       << 
prefix << 
"  " << dualViewStatusToString(
exportLIDs, 
"exportLIDs") << std::endl
 
 1888       << 
prefix << 
"  " << dualViewStatusToString(exports, 
"exports") << std::endl
 
 1890    std::cerr << 
os.str();
 
 1897    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 1899        << 
"exportLIDs.extent(0) = " << 
exportLIDs.extent(0)
 
 1901        << 
"." << std::endl;
 
 1905    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 1906    err << 
prefix << 
"exportLIDs be sync'd to host." << std::endl;
 
 1912    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 1914        << 
"The source (input) object of the Import or " 
 1915           "Export is either not a BlockCrsMatrix, or does not have the right " 
 1916           "template parameters.  checkSizes() should have caught this.  " 
 1917           "Please report this bug to the Tpetra developers." 
 1934  const size_t blockSize         = 
static_cast<size_t>(src->getBlockSize());
 
 1938    auto val_host = val_.getHostView(Access::ReadOnly);
 
 1975    std::ostringstream 
os;
 
 1977       << 
"totalNumBytes = " << totalNumBytes << 
", totalNumEntries = " << totalNumEntries
 
 1979    std::cerr << 
os.str();
 
 1985  if (exports.extent(0) != totalNumBytes) {
 
 1986    const std::string 
oldLabel = exports.view_device().label();
 
 1988    exports                    = Kokkos::DualView<packet_type*, buffer_device_type>(
newLabel, totalNumBytes);
 
 1990  if (totalNumEntries > 0) {
 
 1995      Kokkos::parallel_scan(
policy,
 
 1996                            [=](
const size_t& 
i, 
size_t& update, 
const bool& 
final) {
 
 1997                              if (
final) 
offset(
i) = update;
 
 2002      std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 2004          << 
"At end of method, the final offset (in bytes) " 
 2006          << totalNumBytes << 
".  " 
 2007          << 
"Please report this bug to the Tpetra developers." << std::endl;
 
 2021    exports.modify_host();
 
 2023      typedef Kokkos::TeamPolicy<host_exec> policy_type;
 
 2026              .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO) * maxRowLength));
 
 2031      Kokkos::parallel_for(
policy,
 
 2032                           [=](
const typename policy_type::member_type& 
member) {
 
 2033                             const size_t i = 
member.league_rank();
 
 2034                             Kokkos::View<GO*, typename host_exec::scratch_memory_space>
 
 2039                             values_host_view_type 
vals;
 
 2059                                                                              Kokkos::View<const GO*, host_exec>(
gblColInds.data(), maxRowLength),
 
 2068                                 std::ostringstream 
os;
 
 2070                                    << 
"numBytes computed from packRowForBlockCrs is different from " 
 2071                                    << 
"precomputed offset values, LID = " << 
i << std::endl;
 
 2072                                 std::cerr << 
os.str();
 
 2081    std::ostringstream 
os;
 
 2082    const bool lclSuccess = !(*(this->localError_));
 
 2086    std::cerr << 
os.str();
 
 
 2090template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2097                     Kokkos::DualView<
size_t*,
 
 2103  using ::Tpetra::Details::Behavior;
 
 2104  using ::Tpetra::Details::dualViewStatusToString;
 
 2105  using ::Tpetra::Details::PackTraits;
 
 2106  using ::Tpetra::Details::ProfilingRegion;
 
 2108      typename Kokkos::View<int*, device_type>::host_mirror_type::execution_space;
 
 2110  ProfilingRegion 
profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
 
 2111  const bool verbose = Behavior::verbose();
 
 2116    std::ostringstream 
os;
 
 2117    auto map         = this->graph_.getRowMap();
 
 2118    auto comm        = 
map.is_null() ? Teuchos::null : 
map->getComm();
 
 2119    const int myRank = comm.is_null() ? -1 : comm->getRank();
 
 2120    os << 
"Proc " << 
myRank << 
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
 
 2124      std::cerr << 
os.str();
 
 2129  if (*(this->localError_)) {
 
 2130    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 2131    std::ostringstream 
os;
 
 2132    os << 
prefix << 
"{Im/Ex}port target is already in error." << 
endl;
 
 2134      std::cerr << 
os.str();
 
 2144    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 2145    std::ostringstream 
os;
 
 2150      std::cerr << 
os.str();
 
 2159    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 2160    std::ostringstream 
os;
 
 2162       << 
"Invalid CombineMode value " << 
combineMode << 
".  Valid " 
 2163       << 
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO." 
 2166      std::cerr << 
os.str();
 
 2172  if (this->graph_.getColMap().is_null()) {
 
 2173    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 2174    std::ostringstream 
os;
 
 2175    os << 
prefix << 
"Target matrix's column Map is null." << 
endl;
 
 2177      std::cerr << 
os.str();
 
 2189  const size_t blockSize     = this->getBlockSize();
 
 2199    auto val_host = val_.getHostView(Access::ReadOnly);
 
 2203  const size_t maxRowNumEnt       = graph_.getLocalMaxNumRowEntries();
 
 2207    std::ostringstream 
os;
 
 2215    std::cerr << 
os.str();
 
 2220      std::ostringstream 
os;
 
 2221      os << 
prefix << 
"Nothing to unpack. Done!" << std::endl;
 
 2222      std::cerr << 
os.str();
 
 2229  if (imports.need_sync_host()) {
 
 2230    imports.sync_host();
 
 2242    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 2243    std::ostringstream 
os;
 
 2244    os << 
prefix << 
"All input DualViews must be sync'd to host by now. " 
 2245       << 
"imports_nc.need_sync_host()=" 
 2246       << (imports.need_sync_host() ? 
"true" : 
"false")
 
 2247       << 
", numPacketsPerLID.need_sync_host()=" 
 2249       << 
", importLIDs.need_sync_host()=" 
 2250       << (
importLIDs.need_sync_host() ? 
"true" : 
"false")
 
 2253      std::cerr << 
os.str();
 
 2269    Kokkos::parallel_scan(
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets", 
policy,
 
 2270                          [=](
const size_t& 
i, 
size_t& update, 
const bool& 
final) {
 
 2271                            if (
final) 
offset(
i) = update;
 
 2282  Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
 
 2286    using policy_type      = Kokkos::TeamPolicy<host_exec>;
 
 2293    using pair_type = Kokkos::pair<size_t, size_t>;
 
 2296    getValuesHostNonConst();
 
 2298    Kokkos::parallel_for(
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack", 
policy,
 
 2299                         [=, *
this](
const typename policy_type::member_type& 
member) {
 
 2300                           const size_t i = 
member.league_rank();
 
 2315                                 std::ostringstream 
os;
 
 2317                                    << 
"At i = " << 
i << 
", numEnt = " << 
numEnt 
 2320                                 std::cerr << 
os.str();
 
 2338                                                                                imports.view_host(),
 
 2341                           } 
catch (std::exception& 
e) {
 
 2344                               std::ostringstream 
os;
 
 2345                               os << 
prefix << 
"At i = " << 
i << 
", unpackRowForBlockCrs threw: " 
 2346                                  << 
e.what() << 
endl;
 
 2347                               std::cerr << 
os.str();
 
 2355                               std::ostringstream 
os;
 
 2357                                  << 
"At i = " << 
i << 
", numBytes = " << 
numBytes 
 2360                               std::cerr << 
os.str();
 
 2368                             if (
lidsOut(
k) == Teuchos::OrdinalTraits<LO>::invalid()) {
 
 2370                                 std::ostringstream 
os;
 
 2372                                    << 
"At i = " << 
i << 
", GID " << 
gidsOut(
k)
 
 2373                                    << 
" is not owned by the calling process." 
 2375                                 std::cerr << 
os.str();
 
 2396                               std::ostringstream 
os;
 
 2398                                  << 
" != numCombd = " << 
numCombd << 
"." 
 2400                               std::cerr << 
os.str();
 
 2409    std::ostream& 
err = this->markLocalErrorAndGetStream();
 
 2412      err << 
"  Please run again with the environment variable setting " 
 2413             "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
 
 2419    std::ostringstream 
os;
 
 2420    const bool lclSuccess = !(*(this->localError_));
 
 2424    std::cerr << 
os.str();
 
 
 2428template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2431  using Teuchos::TypeNameTraits;
 
 2432  std::ostringstream 
os;
 
 2433  os << 
"\"Tpetra::BlockCrsMatrix\": { " 
 2434     << 
"Template parameters: { " 
 2441     << 
", Global dimensions: [" 
 2442     << graph_.getDomainMap()->getGlobalNumElements() << 
", " 
 2443     << graph_.getRangeMap()->getGlobalNumElements() << 
"]" 
 2444     << 
", Block size: " << getBlockSize()
 
 
 2449template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2452             const Teuchos::EVerbosityLevel 
verbLevel)
 const {
 
 2453  using Teuchos::ArrayRCP;
 
 2454  using Teuchos::CommRequest;
 
 2455  using Teuchos::FancyOStream;
 
 2456  using Teuchos::getFancyOStream;
 
 2457  using Teuchos::ireceive;
 
 2458  using Teuchos::isend;
 
 2459  using Teuchos::outArg;
 
 2460  using Teuchos::TypeNameTraits;
 
 2461  using Teuchos::VERB_DEFAULT;
 
 2462  using Teuchos::VERB_LOW;
 
 2463  using Teuchos::VERB_MEDIUM;
 
 2464  using Teuchos::VERB_NONE;
 
 2468  using Teuchos::VERB_EXTREME;
 
 2469  using Teuchos::wait;
 
 2472  const Teuchos::EVerbosityLevel 
vl =
 
 2482  out << 
"\"Tpetra::BlockCrsMatrix\":" << 
endl;
 
 2485  out << 
"Template parameters:" << 
endl;
 
 2494      << 
"Global dimensions: [" 
 2495      << graph_.getDomainMap()->getGlobalNumElements() << 
", " 
 2496      << graph_.getRangeMap()->getGlobalNumElements() << 
"]" << 
endl;
 
 2503    const Teuchos::Comm<int>& comm = *(graph_.getMap()->getComm());
 
 2504    const int myRank               = comm.getRank();
 
 2508    getRowMap()->describe(
out, 
vl);
 
 2511      if (getColMap() == getRowMap()) {
 
 2513          out << 
"Column Map: same as row Map" << 
endl;
 
 2519        getColMap()->describe(
out, 
vl);
 
 2522    if (!getDomainMap().
is_null()) {
 
 2523      if (getDomainMap() == getRowMap()) {
 
 2525          out << 
"Domain Map: same as row Map" << 
endl;
 
 2527      } 
else if (getDomainMap() == getColMap()) {
 
 2529          out << 
"Domain Map: same as column Map" << 
endl;
 
 2535        getDomainMap()->describe(
out, 
vl);
 
 2538    if (!getRangeMap().
is_null()) {
 
 2539      if (getRangeMap() == getDomainMap()) {
 
 2541          out << 
"Range Map: same as domain Map" << 
endl;
 
 2543      } 
else if (getRangeMap() == getRowMap()) {
 
 2545          out << 
"Range Map: same as row Map" << 
endl;
 
 2551        getRangeMap()->describe(
out, 
vl);
 
 2557    const Teuchos::Comm<int>& comm = *(graph_.getMap()->getComm());
 
 2558    const int myRank               = comm.getRank();
 
 2559    const int numProcs             = comm.getSize();
 
 2577      values_host_view_type 
vals;
 
 
 2678template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2679Teuchos::RCP<const Teuchos::Comm<int> >
 
 2682  return graph_.getComm();
 
 
 2685template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2689  return graph_.getGlobalNumCols();
 
 
 2692template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2696  return graph_.getLocalNumCols();
 
 
 2699template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2702  return graph_.getIndexBase();
 
 
 2705template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2709  return graph_.getGlobalNumEntries();
 
 
 2712template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2716  return graph_.getLocalNumEntries();
 
 
 2719template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2723  return graph_.getNumEntriesInGlobalRow(
globalRow);
 
 
 2726template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2730  return graph_.getGlobalMaxNumRowEntries();
 
 
 2733template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2736  return graph_.hasColMap();
 
 
 2739template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2742  return graph_.isLocallyIndexed();
 
 
 2745template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2748  return graph_.isGloballyIndexed();
 
 
 2751template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2754  return graph_.isFillComplete();
 
 
 2757template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2763template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2766                     nonconst_global_inds_host_view_type& ,
 
 2767                     nonconst_values_host_view_type& ,
 
 2770      true, std::logic_error,
 
 2771      "Tpetra::BlockCrsMatrix::getGlobalRowCopy: " 
 2772      "This class doesn't support global matrix indexing.");
 
 
 2775template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2778                     global_inds_host_view_type& ,
 
 2779                     values_host_view_type& )
 const {
 
 2781      true, std::logic_error,
 
 2782      "Tpetra::BlockCrsMatrix::getGlobalRowView: " 
 2783      "This class doesn't support global matrix indexing.");
 
 
 2786template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2789                    local_inds_host_view_type& 
colInds,
 
 2790                    values_host_view_type& 
vals)
 const {
 
 2791  if (!rowMeshMap_.isNodeLocalElement(
localRowInd)) {
 
 2792    colInds = local_inds_host_view_type();
 
 2793    vals    = values_host_view_type();
 
 
 2803template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2806                            local_inds_host_view_type& 
colInds,
 
 2807                            nonconst_values_host_view_type& 
vals)
 const {
 
 2808  if (!rowMeshMap_.isNodeLocalElement(
localRowInd)) {
 
 2809    colInds = nonconst_local_inds_host_view_type();
 
 2810    vals    = nonconst_values_host_view_type();
 
 
 2821template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2840  LO 
bs            = getBlockSize();
 
 2841  for (
size_t r = 0; 
r < getLocalNumRows(); 
r++) {
 
 2844    for (
int b = 0; 
b < 
bs; 
b++) {
 
 
 2854template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2856    leftScale(const ::Tpetra::Vector<Scalar, LO, GO, Node>& ) {
 
 2858      true, std::logic_error,
 
 2859      "Tpetra::BlockCrsMatrix::leftScale: " 
 2860      "not implemented.");
 
 
 2863template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2865    rightScale(const ::Tpetra::Vector<Scalar, LO, GO, Node>& ) {
 
 2867      true, std::logic_error,
 
 2868      "Tpetra::BlockCrsMatrix::rightScale: " 
 2869      "not implemented.");
 
 
 2872template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2873Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
 
 2879template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 2880typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
 
 2884      true, std::logic_error,
 
 2885      "Tpetra::BlockCrsMatrix::getFrobeniusNorm: " 
 2886      "not implemented.");
 
 
 2896#define TPETRA_BLOCKCRSMATRIX_INSTANT(S, LO, GO, NODE) \ 
 2897  template class BlockCrsMatrix<S, LO, GO, NODE>; 
Linear algebra kernels for small dense matrices and vectors.
 
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
 
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
 
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
 
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
 
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh,...
 
void exportAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Export< LO, GO, Node > &exporter) const
Import from this to the given destination matrix, and make the result fill complete.
 
virtual bool isLocallyIndexed() const override
Whether matrix indices are locally indexed.
 
virtual bool isFillComplete() const override
Whether fillComplete() has been called.
 
virtual size_t getGlobalMaxNumRowEntries() const override
The maximum number of entries in any row over all processes in the matrix's communicator.
 
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
 
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
 
virtual void getLocalRowCopy(LO LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Not implemented.
 
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
 
virtual void getGlobalRowView(GO GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
 
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const override
Print a description of this object to the given output stream.
 
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
 
LO local_ordinal_type
The type of local indices.
 
virtual void getGlobalRowCopy(GO GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Get a copy of the given global row's entries.
 
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
 
virtual size_t getLocalNumCols() const override
The number of columns needed to apply the forward operator on this node.
 
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
 
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
 
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
 
virtual bool hasColMap() const override
Whether this matrix has a well-defined column Map.
 
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
 
size_t getLocalNumRows() const override
get the local number of block rows
 
virtual size_t getLocalNumEntries() const override
The local number of stored (structurally nonzero) entries.
 
impl_scalar_type_dualview::t_host getValuesHostNonConst() const
Get the host or device View of the matrix's values (val_).
 
Kokkos::View< impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
 
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
 
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const override
The Frobenius norm of the matrix.
 
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const override
Get the (mesh) graph.
 
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
 
local_matrix_device_type getLocalMatrixDevice() const
 
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
 
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
 
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
 
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
 
std::string description() const override
One-line description of this object.
 
void importAndFillComplete(Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > &destMatrix, const Import< LO, GO, Node > &importer) const
Import from this to the given destination matrix, and make the result fill complete.
 
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
 
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
 
size_t getNumEntriesInLocalRow(const LO localRowInd) const override
Return the number of entries in the given row on the calling process.
 
virtual bool supportsRowViews() const override
Whether this object implements getLocalRowView() and getGlobalRowView().
 
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh,...
 
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const override
The current number of entries on the calling process in the specified global row.
 
Node::device_type device_type
The Kokkos::Device specialization that this class uses.
 
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
 
Kokkos::View< const impl_scalar_type **, Impl::BlockCrsMatrixLittleBlockArrayLayout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > const_little_block_type
The type used to access const matrix blocks.
 
void getLocalRowViewNonConst(LO LocalRow, local_inds_host_view_type &indices, nonconst_values_host_view_type &values) const
 
char packet_type
Implementation detail; tells.
 
typename DistObject< Scalar, LO, GO, Node >::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
 
virtual void packAndPrepare(const SrcDistObject &sourceObj, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
 
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the right with the given Vector x.
 
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
 
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
 
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
 
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which this matrix is distributed.
 
global_size_t getGlobalNumRows() const override
get the global number of block rows
 
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the left with the given Vector x.
 
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
 
virtual bool isGloballyIndexed() const override
Whether matrix indices are globally indexed.
 
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
 
MultiVector for multiple degrees of freedom per mesh point.
 
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
 
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
 
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
 
Struct that holds views of the contents of a CrsMatrix.
 
One or more distributed dense vectors.
 
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
 
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
 
Kokkos::LayoutRight BlockCrsMatrixLittleBlockArrayLayout
give an option to use layoutleft
 
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
 
Namespace Tpetra contains the class and methods constituting the Tpetra library.
 
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
 
size_t global_size_t
Global size_t object.
 
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
 
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
 
CombineMode
Rule for combining data in an Import or Export.
 
@ REPLACE
Replace existing values with new values.
 
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
 
@ INSERT
Insert new values that don't currently exist.
 
@ ZERO
Replace old values with zero.