10#ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
11#define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
13#include "Tpetra_BlockMultiVector_decl.hpp"
16#include "Teuchos_OrdinalTraits.hpp"
20template <
class Scalar,
class LO,
class GO,
class Node>
27template <
class Scalar,
class LO,
class GO,
class Node>
34template <
class Scalar,
class LO,
class GO,
class Node>
35Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
36BlockMultiVector<Scalar, LO, GO, Node>::
38 typedef BlockMultiVector<Scalar, LO, GO, Node> BMV;
39 const BMV* src_bmv =
dynamic_cast<const BMV*
>(&src);
40 TEUCHOS_TEST_FOR_EXCEPTION(
41 src_bmv ==
nullptr, std::invalid_argument,
43 "BlockMultiVector: The source object of an Import or Export to a "
44 "BlockMultiVector, must also be a BlockMultiVector.");
45 return Teuchos::rcp(src_bmv,
false);
48template <
class Scalar,
class LO,
class GO,
class Node>
53 , meshMap_(
in.meshMap_)
54 , pointMap_(
in.pointMap_)
56 , blockSize_(
in.blockSize_) {}
58template <
class Scalar,
class LO,
class GO,
class Node>
71template <
class Scalar,
class LO,
class GO,
class Node>
84template <
class Scalar,
class LO,
class GO,
class Node>
95 if (
X_mv.getCopyOrView() == Teuchos::View) {
99 }
else if (
X_mv.getCopyOrView() == Teuchos::Copy) {
107 const size_t numCols =
X_mv.getNumVectors();
109 Teuchos::Array<size_t>
cols(0);
117 "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
118 "should never happen. Please report this bug to the Tpetra developers.");
125 X_view->getCopyOrView() != Teuchos::View, std::logic_error,
127 "BlockMultiVector constructor: We just set a MultiVector "
128 "to have view semantics, but it claims that it doesn't have view "
129 "semantics. This should never happen. "
130 "Please report this bug to the Tpetra developers.");
139template <
class Scalar,
class LO,
class GO,
class Node>
151 blockSize_(
X.getBlockSize()) {}
153template <
class Scalar,
class LO,
class GO,
class Node>
161 , pointMap_(makePointMapRCP(
newMeshMap,
X.getBlockSize()))
162 , mv_(
X.mv_, pointMap_,
offset *
X.getBlockSize())
164 blockSize_(
X.getBlockSize()) {}
166template <
class Scalar,
class LO,
class GO,
class Node>
172template <
class Scalar,
class LO,
class GO,
class Node>
177 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
180 static_cast<GST>(
meshMap.getGlobalNumElements());
182 static_cast<size_t>(
meshMap.getLocalNumElements());
214template <
class Scalar,
class LO,
class GO,
class Node>
215Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::map_type>
219 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
222 static_cast<GST>(
meshMap.getGlobalNumElements());
224 static_cast<size_t>(
meshMap.getLocalNumElements());
256template <
class Scalar,
class LO,
class GO,
class Node>
262 typename const_little_vec_type::host_mirror_type::const_type
X_src(
reinterpret_cast<const impl_scalar_type*
>(
vals),
265 using exec_space =
typename device_type::execution_space;
269template <
class Scalar,
class LO,
class GO,
class Node>
282template <
class Scalar,
class LO,
class GO,
class Node>
288 if (
localRowIndex == Teuchos::OrdinalTraits<LO>::invalid()) {
296template <
class Scalar,
class LO,
class GO,
class Node>
302 typename const_little_vec_type::host_mirror_type::const_type
X_src(
reinterpret_cast<const impl_scalar_type*
>(
vals),
307template <
class Scalar,
class LO,
class GO,
class Node>
320template <
class Scalar,
class LO,
class GO,
class Node>
326 if (
localRowIndex == Teuchos::OrdinalTraits<LO>::invalid()) {
334template <
class Scalar,
class LO,
class GO,
class Node>
335typename BlockMultiVector<Scalar, LO, GO, Node>::const_little_host_vec_type
339 const Access::ReadOnlyStruct)
const {
341 return const_little_host_vec_type();
343 const size_t blockSize = getBlockSize();
344 auto hostView = mv_.getLocalViewHost(Access::ReadOnly);
345 LO startRow = localRowIndex * blockSize;
346 LO endRow = startRow + blockSize;
347 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
352template <
class Scalar,
class LO,
class GO,
class Node>
353typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
357 const Access::OverwriteAllStruct) {
359 return little_host_vec_type();
362 auto hostView = mv_.getLocalViewHost(Access::OverwriteAll);
370template <
class Scalar,
class LO,
class GO,
class Node>
371typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
375 const Access::ReadWriteStruct) {
377 return little_host_vec_type();
379 const size_t blockSize = getBlockSize();
380 auto hostView = mv_.getLocalViewHost(Access::ReadWrite);
381 LO startRow = localRowIndex * blockSize;
382 LO endRow = startRow + blockSize;
383 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
388template <
class Scalar,
class LO,
class GO,
class Node>
389Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
390BlockMultiVector<Scalar, LO, GO, Node>::
393 using Teuchos::rcpFromRef;
399 typedef BlockMultiVector<Scalar, LO, GO, Node> this_BMV_type;
400 const this_BMV_type* srcBlkVec =
dynamic_cast<const this_BMV_type*
>(&src);
401 if (srcBlkVec ==
nullptr) {
402 const mv_type* srcMultiVec =
dynamic_cast<const mv_type*
>(&src);
403 if (srcMultiVec ==
nullptr) {
407 return rcp(
new mv_type());
409 return rcp(srcMultiVec,
false);
412 return rcpFromRef(srcBlkVec->mv_);
416template <
class Scalar,
class LO,
class GO,
class Node>
419 return !getMultiVectorFromSrcDistObject(src).is_null();
422template <
class Scalar,
class LO,
class GO,
class Node>
425 const size_t numSameIDs,
432 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
433 "instead, create a point importer using makePointMap function.");
436template <
class Scalar,
class LO,
class GO,
class Node>
441 Kokkos::DualView<packet_type*,
443 Kokkos::DualView<
size_t*,
448 "Tpetra::BlockMultiVector::packAndPrepare: Do NOT use this; "
449 "instead, create a point importer using makePointMap function.");
452template <
class Scalar,
class LO,
class GO,
class Node>
456 Kokkos::DualView<packet_type*,
459 Kokkos::DualView<
size_t*,
465 "Tpetra::BlockMultiVector::unpackAndCombine: Do NOT use this; "
466 "instead, create a point importer using makePointMap function.");
469template <
class Scalar,
class LO,
class GO,
class Node>
476template <
class Scalar,
class LO,
class GO,
class Node>
482template <
class Scalar,
class LO,
class GO,
class Node>
488template <
class Scalar,
class LO,
class GO,
class Node>
498template <
typename Scalar,
typename ViewY,
typename ViewD,
typename ViewX>
499struct BlockWiseMultiply {
500 typedef typename ViewD::size_type Size;
503 typedef typename ViewD::device_type Device;
504 typedef typename ViewD::non_const_value_type ImplScalar;
505 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
507 template <
typename View>
508 using UnmanagedView = Kokkos::View<
typename View::data_type,
typename View::array_layout,
509 typename View::device_type, Unmanaged>;
514 const Size block_size_;
529 KOKKOS_INLINE_FUNCTION
530 void operator()(
const Size k)
const {
531#if KOKKOS_VERSION >= 40799
532 const auto zero = KokkosKernels::ArithTraits<Scalar>::zero();
534 const auto zero = Kokkos::ArithTraits<Scalar>::zero();
536 auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL(), Kokkos::ALL());
537 const auto num_vecs = X_.extent(1);
538 for (Size i = 0; i < num_vecs; ++i) {
539 Kokkos::pair<Size, Size> kslice(k * block_size_, (k + 1) * block_size_);
540 auto X_curBlk = Kokkos::subview(X_, kslice, i);
541 auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
550template <
typename Scalar,
typename ViewY,
typename ViewD,
typename ViewX>
551inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
552createBlockWiseMultiply(
const int block_size,
const Scalar& alpha,
553 const ViewY& Y,
const ViewD& D,
const ViewX& X) {
554 return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
557template <
typename ViewY,
561 typename LO =
typename ViewY::size_type>
562class BlockJacobiUpdate {
564 typedef typename ViewD::device_type Device;
565 typedef typename ViewD::non_const_value_type ImplScalar;
566 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
568 template <
typename ViewType>
569 using UnmanagedView = Kokkos::View<
typename ViewType::data_type,
570 typename ViewType::array_layout,
571 typename ViewType::device_type,
573 typedef UnmanagedView<ViewY> UnMViewY;
574 typedef UnmanagedView<ViewD> UnMViewD;
575 typedef UnmanagedView<ViewZ> UnMViewZ;
585 BlockJacobiUpdate(
const ViewY& Y,
590 : blockSize_(D.extent(1))
598 static_assert(
static_cast<int>(ViewY::rank) == 1,
599 "Y must have rank 1.");
600 static_assert(
static_cast<int>(ViewD::rank) == 3,
"D must have rank 3.");
601 static_assert(
static_cast<int>(ViewZ::rank) == 1,
602 "Z must have rank 1.");
608 KOKKOS_INLINE_FUNCTION
void
609 operator()(
const LO& k)
const {
611 using Kokkos::subview;
612 typedef Kokkos::pair<LO, LO> range_type;
613#if KOKKOS_VERSION >= 40799
614 typedef KokkosKernels::ArithTraits<Scalar> KAT;
616 typedef Kokkos::ArithTraits<Scalar> KAT;
621 auto D_curBlk = subview(D_, k, ALL(), ALL());
622 const range_type kslice(k * blockSize_, (k + 1) * blockSize_);
626 auto Z_curBlk = subview(Z_, kslice);
627 auto Y_curBlk = subview(Y_, kslice);
629 if (beta_ == KAT::zero()) {
631 }
else if (beta_ != KAT::one()) {
638template <
class ViewY,
642 class LO =
typename ViewD::size_type>
643void blockJacobiUpdate(
const ViewY& Y,
647 const Scalar& beta) {
648 static_assert(Kokkos::is_view<ViewY>::value,
"Y must be a Kokkos::View.");
649 static_assert(Kokkos::is_view<ViewD>::value,
"D must be a Kokkos::View.");
650 static_assert(Kokkos::is_view<ViewZ>::value,
"Z must be a Kokkos::View.");
651 static_assert(
static_cast<int>(ViewY::rank) ==
static_cast<int>(ViewZ::rank),
652 "Y and Z must have the same rank.");
653 static_assert(
static_cast<int>(ViewD::rank) == 3,
"D must have rank 3.");
655 const auto lclNumMeshRows = D.extent(0);
657#ifdef HAVE_TPETRA_DEBUG
661 const auto blkSize = D.extent(1);
662 const auto lclNumPtRows = lclNumMeshRows * blkSize;
663 TEUCHOS_TEST_FOR_EXCEPTION(Y.extent(0) != lclNumPtRows, std::invalid_argument,
664 "blockJacobiUpdate: Y.extent(0) = " << Y.extent(0) <<
" != "
665 "D.extent(0)*D.extent(1) = "
666 << lclNumMeshRows <<
" * " << blkSize
667 <<
" = " << lclNumPtRows <<
".");
668 TEUCHOS_TEST_FOR_EXCEPTION(Y.extent(0) != Z.extent(0), std::invalid_argument,
669 "blockJacobiUpdate: Y.extent(0) = " << Y.extent(0) <<
" != "
671 << Z.extent(0) <<
".");
672 TEUCHOS_TEST_FOR_EXCEPTION(Y.extent(1) != Z.extent(1), std::invalid_argument,
673 "blockJacobiUpdate: Y.extent(1) = " << Y.extent(1) <<
" != "
675 << Z.extent(1) <<
".");
678 BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor(Y, alpha, D, Z, beta);
679 typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
681 range_type range(0,
static_cast<LO
>(lclNumMeshRows));
682 Kokkos::parallel_for(range, functor);
687template <
class Scalar,
class LO,
class GO,
class Node>
694 typedef typename device_type::execution_space
exec_space;
697 if (
alpha == STS::zero()) {
698 this->putScalar(STS::zero());
700 const LO
blockSize = this->getBlockSize();
702 auto X_lcl =
X.mv_.getLocalViewDevice(Access::ReadOnly);
703 auto Y_lcl = this->mv_.getLocalViewDevice(Access::ReadWrite);
716template <
class Scalar,
class LO,
class GO,
class Node>
725 using Kokkos::subview;
730 const LO
numVecs = mv_.getNumVectors();
732 if (
alpha == STS::zero()) {
735 Z.update(STS::one(),
X, -STS::one());
737 auto Y_lcl = this->mv_.getLocalViewDevice(Access::ReadWrite);
738 auto Z_lcl =
Z.mv_.getLocalViewDevice(Access::ReadWrite);
753#define TPETRA_BLOCKMULTIVECTOR_INSTANT(S, LO, GO, NODE) \
754 template class BlockMultiVector<S, LO, GO, NODE>;
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
MultiVector for multiple degrees of freedom per mesh point.
virtual bool checkSizes(const Tpetra::SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
const mv_type & getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a local index.
static Teuchos::RCP< const map_type > makePointMapRCP(const map_type &meshMap, const LO blockSize)
Create and return an owning RCP to the point Map corresponding to the given mesh Map and block size.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a global index.
BlockMultiVector()
Default constructor.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using local row and column indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization used for communication buffers.
Tpetra::MultiVector< Scalar, LO, GO, Node > mv_type
The specialization of Tpetra::MultiVector that this class uses.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
typename mv_type::device_type device_type
The Kokkos Device type.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
LO local_ordinal_type
The type of local indices.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using a global index.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Struct that holds views of the contents of a CrsMatrix.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Abstract base class for objects that can be the source of an Import or Export operation.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
CombineMode
Rule for combining data in an Import or Export.