10#ifndef TPETRA_MULTIVECTOR_DEF_HPP
11#define TPETRA_MULTIVECTOR_DEF_HPP
23#include "Tpetra_Vector.hpp"
35#include "Tpetra_Details_Random.hpp"
36#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
37#include "Teuchos_SerialDenseMatrix.hpp"
39#include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
40#include "KokkosCompat_View.hpp"
41#include "KokkosBlas.hpp"
42#include "KokkosKernels_Utils.hpp"
43#include "Kokkos_Random.hpp"
44#if KOKKOS_VERSION >= 40799
45#include "KokkosKernels_ArithTraits.hpp"
47#include "Kokkos_ArithTraits.hpp"
52#ifdef HAVE_TPETRA_INST_FLOAT128
55template <
class Generator>
56struct rand<Generator, __float128> {
57 static KOKKOS_INLINE_FUNCTION __float128 max() {
58 return static_cast<__float128
>(1.0);
60 static KOKKOS_INLINE_FUNCTION __float128
61 draw(Generator& gen) {
64 const __float128 scalingFactor =
65 static_cast<__float128
>(std::numeric_limits<double>::min()) /
66 static_cast<__float128
>(2.0);
67 const __float128 higherOrderTerm =
static_cast<__float128
>(gen.drand());
68 const __float128 lowerOrderTerm =
69 static_cast<__float128
>(gen.drand()) * scalingFactor;
70 return higherOrderTerm + lowerOrderTerm;
72 static KOKKOS_INLINE_FUNCTION __float128
73 draw(Generator& gen,
const __float128& range) {
75 const __float128 scalingFactor =
76 static_cast<__float128
>(std::numeric_limits<double>::min()) /
77 static_cast<__float128
>(2.0);
78 const __float128 higherOrderTerm =
79 static_cast<__float128
>(gen.drand(range));
80 const __float128 lowerOrderTerm =
81 static_cast<__float128
>(gen.drand(range)) * scalingFactor;
82 return higherOrderTerm + lowerOrderTerm;
84 static KOKKOS_INLINE_FUNCTION __float128
85 draw(Generator& gen,
const __float128& start,
const __float128& end) {
87 const __float128 scalingFactor =
88 static_cast<__float128
>(std::numeric_limits<double>::min()) /
89 static_cast<__float128
>(2.0);
90 const __float128 higherOrderTerm =
91 static_cast<__float128
>(gen.drand(start, end));
92 const __float128 lowerOrderTerm =
93 static_cast<__float128
>(gen.drand(start, end)) * scalingFactor;
94 return higherOrderTerm + lowerOrderTerm;
116template <
class ST,
class LO,
class GO,
class NT>
118allocDualView(
const size_t lclNumRows,
119 const size_t numCols,
120 const bool zeroOut =
true,
121 const bool allowPadding =
false) {
122 using Kokkos::AllowPadding;
123 using Kokkos::view_alloc;
124 using Kokkos::WithoutInitializing;
125 using ::Tpetra::Details::Behavior;
128 typedef typename dual_view_type::t_dev dev_view_type;
134 const std::string label(
"MV::DualView");
135 const bool debug = Behavior::debug();
145 dev_view_type d_view;
148 d_view = dev_view_type(view_alloc(label, AllowPadding),
149 lclNumRows, numCols);
151 d_view = dev_view_type(view_alloc(label),
152 lclNumRows, numCols);
156 d_view = dev_view_type(view_alloc(label,
159 lclNumRows, numCols);
161 d_view = dev_view_type(view_alloc(label, WithoutInitializing),
162 lclNumRows, numCols);
173#if KOKKOS_VERSION >= 40799
174 const ST nan = KokkosKernels::ArithTraits<ST>::nan();
176 const ST nan = Kokkos::ArithTraits<ST>::nan();
178 KokkosBlas::fill(d_view, nan);
182 TEUCHOS_TEST_FOR_EXCEPTION(
static_cast<size_t>(d_view.extent(0)) != lclNumRows ||
183 static_cast<size_t>(d_view.extent(1)) != numCols,
185 "allocDualView: d_view's dimensions actual dimensions do not match "
186 "requested dimensions. d_view is "
187 << d_view.extent(0) <<
" x " << d_view.extent(1) <<
"; requested " << lclNumRows <<
" x " << numCols
188 <<
". Please report this bug to the Tpetra developers.");
191 return wrapped_dual_view_type(d_view);
198template <
class T,
class ExecSpace>
199struct MakeUnmanagedView {
209 typedef typename std::conditional<
210 Kokkos::SpaceAccessibility<
211 typename ExecSpace::memory_space,
212 Kokkos::HostSpace>::accessible,
213 typename ExecSpace::device_type,
214 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
215 typedef Kokkos::LayoutLeft array_layout;
216 typedef Kokkos::View<T*, array_layout, host_exec_space,
217 Kokkos::MemoryUnmanaged>
220 static view_type getView(
const Teuchos::ArrayView<T>& x_in) {
221 const size_t numEnt =
static_cast<size_t>(x_in.size());
225 return view_type(x_in.getRawPtr(), numEnt);
230template <
class WrappedDualViewType>
232takeSubview(
const WrappedDualViewType& X,
233 const std::pair<size_t, size_t>& rowRng,
234 const Kokkos::ALL_t& colRng)
238 return WrappedDualViewType(X, rowRng, colRng);
241template <
class WrappedDualViewType>
243takeSubview(
const WrappedDualViewType& X,
244 const Kokkos::ALL_t& rowRng,
245 const std::pair<size_t, size_t>& colRng) {
246 using DualViewType =
typename WrappedDualViewType::DVT;
249 if (X.extent(0) == 0 && X.extent(1) != 0) {
250 return WrappedDualViewType(DualViewType(
"MV::DualView", 0, colRng.second - colRng.first));
252 return WrappedDualViewType(X, rowRng, colRng);
256template <
class WrappedDualViewType>
258takeSubview(
const WrappedDualViewType& X,
259 const std::pair<size_t, size_t>& rowRng,
260 const std::pair<size_t, size_t>& colRng) {
261 using DualViewType =
typename WrappedDualViewType::DVT;
271 if (X.extent(0) == 0 && X.extent(1) != 0) {
272 return WrappedDualViewType(DualViewType(
"MV::DualView", 0, colRng.second - colRng.first));
274 return WrappedDualViewType(X, rowRng, colRng);
278template <
class WrappedOrNotDualViewType>
280getDualViewStride(
const WrappedOrNotDualViewType& dv) {
285 size_t strides[WrappedOrNotDualViewType::t_dev::rank + 1];
287 const size_t LDA = strides[1];
288 const size_t numRows = dv.extent(0);
291 return (numRows == 0) ? size_t(1) : numRows;
297template <
class ViewType>
299getViewStride(
const ViewType& view) {
300 const size_t LDA = view.stride(1);
301 const size_t numRows = view.extent(0);
304 return (numRows == 0) ? size_t(1) : numRows;
310template <
class impl_scalar_type,
class buffer_device_type>
312 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports) {
313 if (!imports.need_sync_device()) {
317 size_t localLengthThreshold =
319 return imports.extent(0) <= localLengthThreshold;
323template <
class SC,
class LO,
class GO,
class NT>
324bool runKernelOnHost(const ::Tpetra::MultiVector<SC, LO, GO, NT>& X) {
325 if (!X.need_sync_device()) {
329 size_t localLengthThreshold =
331 return X.getLocalLength() <= localLengthThreshold;
335template <
class SC,
class LO,
class GO,
class NT>
336void multiVectorNormImpl(typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
338 const ::Tpetra::Details::EWhichNorm whichNorm) {
340 using ::Tpetra::Details::normImpl;
342 using val_type =
typename MV::impl_scalar_type;
343 using mag_type =
typename MV::mag_type;
344 using dual_view_type =
typename MV::dual_view_type;
346 auto map =
X.getMap();
347 auto comm =
map.is_null() ?
nullptr :
map->getComm().getRawPtr();
348 auto whichVecs = getMultiVectorWhichVectors(
X);
349 const bool isConstantStride =
X.isConstantStride();
350 const bool isDistributed =
X.isDistributed();
354 using view_type =
typename dual_view_type::t_host;
355 using array_layout =
typename view_type::array_layout;
356 using device_type =
typename view_type::device_type;
358 auto X_lcl =
X.getLocalViewHost(Access::ReadOnly);
359 normImpl<val_type, array_layout, device_type,
361 isConstantStride, isDistributed, comm);
363 using view_type =
typename dual_view_type::t_dev;
364 using array_layout =
typename view_type::array_layout;
365 using device_type =
typename view_type::device_type;
367 auto X_lcl =
X.getLocalViewDevice(Access::ReadOnly);
368 normImpl<val_type, array_layout, device_type,
370 isConstantStride, isDistributed, comm);
378template <
typename DstView,
typename SrcView>
379struct AddAssignFunctor {
387 operator()(
const size_t k)
const { tgt(
k) += src(
k); }
394template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
400template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
405template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
418template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
424 , whichVectors_(
source.whichVectors_) {
427 "MultiVector(const MultiVector&, "
428 "const Teuchos::DataAccess): ";
436 this->
view_ = cpy.view_;
441 true, std::invalid_argument,
442 "Second argument 'copyOrView' has an "
446 << Teuchos::Copy <<
" and Teuchos::View = "
447 << Teuchos::View <<
".");
451template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
463 std::invalid_argument,
464 "Kokkos::DualView does not match Map. "
465 "map->getLocalNumElements() = "
468 <<
", and getStride() = " <<
LDA <<
".");
470 using ::Tpetra::Details::Behavior;
471 const bool debug = Behavior::debug();
473 using ::Tpetra::Details::checkGlobalDualViewValidity;
475 const bool verbose = Behavior::verbose();
476 const auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
478 checkGlobalDualViewValidity(&
gblErrStrm, view, verbose,
484template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
496 std::invalid_argument,
497 "Kokkos::DualView does not match Map. "
498 "map->getLocalNumElements() = "
501 <<
", and getStride() = " <<
LDA <<
".");
503 using ::Tpetra::Details::Behavior;
504 const bool debug = Behavior::debug();
506 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
508 const bool verbose = Behavior::verbose();
509 const auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
511 checkGlobalWrappedDualViewValidity(&
gblErrStrm, view, verbose,
517template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
520 const typename dual_view_type::t_dev&
d_view)
522 using Teuchos::ArrayRCP;
531 "Map does not match "
532 "Kokkos::View. map->getLocalNumElements() = "
534 <<
", View's column stride = " <<
LDA
535 <<
", and View's extent(0) = " <<
d_view.extent(0) <<
".");
541 using ::Tpetra::Details::Behavior;
542 const bool debug = Behavior::debug();
544 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
546 const bool verbose = Behavior::verbose();
547 const auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
559template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
572 "The input Kokkos::DualView's "
573 "column stride LDA = "
575 <<
". This may also mean that the input origView's first dimension (number "
577 <<
origView.extent(0) <<
") does not not match the number "
578 "of entries in the Map on the calling process.");
580 using ::Tpetra::Details::Behavior;
581 const bool debug = Behavior::debug();
583 using ::Tpetra::Details::checkGlobalDualViewValidity;
585 const bool verbose = Behavior::verbose();
586 const auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
588 checkGlobalDualViewValidity(&
gblErrStrm, view, verbose,
598template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
607 using Kokkos::subview;
608 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
610 using ::Tpetra::Details::Behavior;
611 const bool debug = Behavior::debug();
613 using ::Tpetra::Details::checkGlobalDualViewValidity;
615 const bool verbose = Behavior::verbose();
616 const auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
618 checkGlobalDualViewValidity(&
gblErrStrm, view, verbose,
623 const size_t lclNumRows =
map.is_null() ? size_t(0) :
map->getLocalNumElements();
631 view.extent(1) != 0 &&
static_cast<size_t>(view.extent(0)) <
lclNumRows,
632 std::invalid_argument,
"view.extent(0) = " << view.extent(0) <<
" < map->getLocalNumElements() = " <<
lclNumRows <<
".");
635 view.extent(1) != 0 && view.extent(1) == 0,
636 std::invalid_argument,
637 "view.extent(1) = 0, but whichVectors.size()"
641 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
644 whichVectors[
k] == Teuchos::OrdinalTraits<size_t>::invalid(),
645 std::invalid_argument,
"whichVectors[" <<
k <<
"] = "
646 "Teuchos::OrdinalTraits<size_t>::invalid().");
650 view.extent(1) != 0 &&
static_cast<size_t>(view.extent(1)) <=
maxColInd,
651 std::invalid_argument,
"view.extent(1) = " << view.extent(1) <<
" <= max(whichVectors) = " <<
maxColInd <<
".");
658 "LDA = " <<
LDA <<
" < this->getLocalLength() = " <<
lclNumRows <<
".");
674 whichVectors_.clear();
678template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
687 using Kokkos::subview;
688 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
690 using ::Tpetra::Details::Behavior;
691 const bool debug = Behavior::debug();
693 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
695 const bool verbose = Behavior::verbose();
696 const auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
698 checkGlobalWrappedDualViewValidity(&
gblErrStrm, view, verbose,
703 const size_t lclNumRows =
map.is_null() ? size_t(0) :
map->getLocalNumElements();
711 view.extent(1) != 0 &&
static_cast<size_t>(view.extent(0)) <
lclNumRows,
712 std::invalid_argument,
"view.extent(0) = " << view.extent(0) <<
" < map->getLocalNumElements() = " <<
lclNumRows <<
".");
715 view.extent(1) != 0 && view.extent(1) == 0,
716 std::invalid_argument,
717 "view.extent(1) = 0, but whichVectors.size()"
721 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
724 whichVectors[
k] == Teuchos::OrdinalTraits<size_t>::invalid(),
725 std::invalid_argument,
"whichVectors[" <<
k <<
"] = "
726 "Teuchos::OrdinalTraits<size_t>::invalid().");
730 view.extent(1) != 0 &&
static_cast<size_t>(view.extent(1)) <=
maxColInd,
731 std::invalid_argument,
"view.extent(1) = " << view.extent(1) <<
" <= max(whichVectors) = " <<
maxColInd <<
".");
738 "LDA = " <<
LDA <<
" < this->getLocalLength() = " <<
lclNumRows <<
".");
754 whichVectors_.clear();
758template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
768 using Kokkos::subview;
769 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
771 using ::Tpetra::Details::Behavior;
772 const bool debug = Behavior::debug();
774 using ::Tpetra::Details::checkGlobalDualViewValidity;
776 const bool verbose = Behavior::verbose();
777 const auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
779 checkGlobalDualViewValidity(&
gblErrStrm, view, verbose,
796 view.extent(1) != 0 &&
static_cast<size_t>(view.extent(0)) <
lclNumRows,
797 std::invalid_argument,
"view.extent(0) = " << view.extent(0) <<
" < map->getLocalNumElements() = " <<
lclNumRows <<
".");
800 view.extent(1) != 0 && view.extent(1) == 0,
801 std::invalid_argument,
802 "view.extent(1) = 0, but whichVectors.size()"
806 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
809 whichVectors[
k] == Teuchos::OrdinalTraits<size_t>::invalid(),
810 std::invalid_argument,
"whichVectors[" <<
k <<
"] = "
811 "Teuchos::OrdinalTraits<size_t>::invalid().");
815 view.extent(1) != 0 &&
static_cast<size_t>(view.extent(1)) <=
maxColInd,
816 std::invalid_argument,
"view.extent(1) = " << view.extent(1) <<
" <= max(whichVectors) = " <<
maxColInd <<
".");
823 "Map and DualView origView "
824 "do not match. LDA = "
825 <<
LDA <<
" < this->getLocalLength() = " <<
lclNumRows <<
". origView.extent(0) = " <<
origView.extent(0)
826 <<
", origView.stride(1) = " <<
origView.view_device().stride(1) <<
".");
846template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
849 const Teuchos::ArrayView<const Scalar>&
data,
855 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
862 map.is_null() ? size_t(0) :
map->getLocalNumElements();
864 "map->getLocalNumElements() = "
869 std::invalid_argument,
870 "Input Teuchos::ArrayView does not have enough "
871 "entries, given the input Map and number of vectors in the MultiVector."
873 <<
data.size() <<
" < (LDA*(numVecs-1)) + "
874 "map->getLocalNumElements () = "
880 auto X_out = this->getLocalViewDevice(Access::OverwriteAll);
890 Kokkos::MemoryUnmanaged>
899 X_out.extent(1) == 0 ? size_t(1) :
X_out.stride(1);
906 typedef decltype(Kokkos::subview(
X_out, Kokkos::ALL(), 0))
908 typedef decltype(Kokkos::subview(
X_in, Kokkos::ALL(), 0))
919template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
922 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >&
ArrayOfPtrs,
928 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
932 map.is_null() ? size_t(0) :
map->getLocalNumElements();
934 std::runtime_error,
"Either numVecs (= " <<
numVecs <<
") < 1, or "
935 "ArrayOfPtrs.size() (= "
941 std::invalid_argument,
"ArrayOfPtrs[" <<
j <<
"].size() = " <<
X_j_av.size() <<
" < map->getLocalNumElements() = " <<
lclNumRows <<
".");
949 using array_layout =
typename decltype(
X_out)::array_layout;
953 Kokkos::MemoryUnmanaged>;
957 Teuchos::ArrayView<const IST>
X_j_av =
958 Teuchos::av_reinterpret_cast<const IST>(
ArrayOfPtrs[
j]);
966template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
969 return whichVectors_.empty();
972template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
976 if (this->getMap().
is_null()) {
977 return static_cast<size_t>(0);
979 return this->getMap()->getLocalNumElements();
983template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
987 if (this->getMap().
is_null()) {
988 return static_cast<size_t>(0);
990 return this->getMap()->getGlobalNumElements();
994template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1001template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1005 auto thisData = view_.getDualView().view_host().data();
1007 size_t thisSpan = view_.getDualView().view_host().span();
1012template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1019 const MV* src =
dynamic_cast<const MV*
>(&
sourceObj);
1020 if (src ==
nullptr) {
1029 return src->getNumVectors() == this->getNumVectors();
1033template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1037 return this->getNumVectors();
1040template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1043 const size_t numSameIDs,
1044 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
permuteToLIDs,
1045 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
permuteFromLIDs,
1048 using Kokkos::Compat::create_const_view;
1049 using KokkosRefactor::Details::permute_array_multi_column;
1050 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
1052 using ::Tpetra::Details::Behavior;
1053 using ::Tpetra::Details::getDualViewCopyFromArrayView;
1054 using ::Tpetra::Details::ProfilingRegion;
1061 if (importsAreAliased() && (this->constantNumberOfPackets() != 0) && Behavior::enableGranularTransfers()) {
1066 copyOnHost = !Behavior::assumeMpiIsGPUAware();
1071 const char longFuncNameHost[] =
"Tpetra::MultiVector::copyAndPermute[Host]";
1076 const bool verbose = Behavior::verbose();
1077 std::unique_ptr<std::string>
prefix;
1079 auto map = this->getMap();
1080 auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
1081 const int myRank = comm.is_null() ? -1 : comm->getRank();
1082 std::ostringstream
os;
1083 os <<
"Proc " <<
myRank <<
": MV::copyAndPermute: ";
1084 prefix = std::unique_ptr<std::string>(
new std::string(
os.str()));
1086 std::cerr <<
os.str();
1090 std::logic_error,
"permuteToLIDs.extent(0) = " <<
permuteToLIDs.extent(0) <<
" != permuteFromLIDs.extent(0) = " <<
permuteFromLIDs.extent(0) <<
".");
1091 const size_t numCols = this->getNumVectors();
1097 "Input MultiVector needs sync to both host "
1100 std::ostringstream
os;
1102 std::cerr <<
os.str();
1106 std::ostringstream
os;
1107 os << *
prefix <<
"Copy first " << numSameIDs <<
" elements" <<
endl;
1108 std::cerr <<
os.str();
1133 if (numSameIDs > 0) {
1134 const std::pair<size_t, size_t>
rows(0, numSameIDs);
1136 ProfilingRegion
regionC(
"Tpetra::MultiVector::copy[Host]");
1137 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1140 for (
size_t j = 0;
j < numCols; ++
j) {
1141 const size_t tgtCol = isConstantStride() ?
j : whichVectors_[
j];
1150 Kokkos::RangePolicy<execution_space, size_t>;
1152 Tpetra::Details::AddAssignFunctor<
decltype(
tgt_j),
decltype(
src_j)>
1154 Kokkos::parallel_for(
rp,
aaf);
1159 space.fence(
"Tpetra::MultiVector::copyAndPermute-1");
1163 ProfilingRegion
regionC(
"Tpetra::MultiVector::copy[Device]");
1164 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1167 for (
size_t j = 0;
j < numCols; ++
j) {
1168 const size_t tgtCol = isConstantStride() ?
j : whichVectors_[
j];
1177 Kokkos::RangePolicy<execution_space, size_t>;
1179 Tpetra::Details::AddAssignFunctor<
decltype(
tgt_j),
decltype(
src_j)>
1181 Kokkos::parallel_for(
rp,
aaf);
1186 space.fence(
"Tpetra::MultiVector::copyAndPermute-2");
1205 std::ostringstream
os;
1207 std::cerr <<
os.str();
1211 ProfilingRegion
regionP(
"Tpetra::MultiVector::permute");
1214 std::ostringstream
os;
1216 std::cerr <<
os.str();
1222 !this->isConstantStride() || !
sourceMV.isConstantStride();
1225 std::ostringstream
os;
1228 std::cerr <<
os.str();
1235 Kokkos::DualView<const size_t*, device_type>
srcWhichVecs;
1236 Kokkos::DualView<const size_t*, device_type>
tgtWhichVecs;
1238 if (this->whichVectors_.size() == 0) {
1239 Kokkos::DualView<size_t*, device_type>
tmpTgt(
"tgtWhichVecs", numCols);
1241 for (
size_t j = 0;
j < numCols; ++
j) {
1249 Teuchos::ArrayView<const size_t>
tgtWhichVecsT = this->whichVectors_();
1257 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
tgtWhichVecs.extent(0) <<
" != this->getNumVectors() = " <<
this->getNumVectors() <<
".");
1259 if (
sourceMV.whichVectors_.size() == 0) {
1260 Kokkos::DualView<size_t*, device_type>
tmpSrc(
"srcWhichVecs", numCols);
1262 for (
size_t j = 0;
j < numCols; ++
j) {
1281 <<
" != sourceMV.getNumVectors() = " <<
sourceMV.getNumVectors()
1287 std::ostringstream
os;
1288 os << *
prefix <<
"Get permute LIDs on host" << std::endl;
1289 std::cerr <<
os.str();
1291 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1301 std::ostringstream
os;
1303 std::cerr <<
os.str();
1313 using op_type = KokkosRefactor::Details::AddOp;
1314 permute_array_multi_column_variable_stride(
tgt_h,
src_h,
1321 using op_type = KokkosRefactor::Details::InsertOp;
1322 permute_array_multi_column_variable_stride(
tgt_h,
src_h,
1331 using op_type = KokkosRefactor::Details::AddOp;
1335 using op_type = KokkosRefactor::Details::InsertOp;
1342 std::ostringstream
os;
1344 std::cerr <<
os.str();
1346 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1356 std::ostringstream
os;
1366 using op_type = KokkosRefactor::Details::AddOp;
1374 using op_type = KokkosRefactor::Details::InsertOp;
1375 permute_array_multi_column_variable_stride(space,
tgt_d,
src_d,
1384 using op_type = KokkosRefactor::Details::AddOp;
1388 using op_type = KokkosRefactor::Details::InsertOp;
1396 std::ostringstream
os;
1398 std::cerr <<
os.str();
1402template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1405 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
permuteToLIDs,
1406 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
permuteFromLIDs,
1412template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1415 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
exportLIDs,
1416 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1417 Kokkos::DualView<size_t*, buffer_device_type> ,
1420 using Kokkos::Compat::create_const_view;
1421 using Kokkos::Compat::getKokkosViewDeepCopy;
1423 using ::Tpetra::Details::Behavior;
1424 using ::Tpetra::Details::ProfilingRegion;
1425 using ::Tpetra::Details::reallocDualViewIfNeeded;
1432 const char longFuncNameHost[] =
"Tpetra::MultiVector::packAndPrepare[Host]";
1450 std::unique_ptr<std::string>
prefix;
1453 auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
1454 const int myRank = comm.is_null() ? -1 : comm->getRank();
1456 os <<
"Proc " <<
myRank <<
": MV::packAndPrepare: ";
1457 prefix = std::unique_ptr<std::string>(
new std::string(
os.str()));
1459 std::cerr <<
os.str();
1462 const size_t numCols =
sourceMV.getNumVectors();
1473 std::ostringstream
os;
1474 os << *
prefix <<
"No exports on this proc, DONE" << std::endl;
1475 std::cerr <<
os.str();
1498 std::ostringstream
os;
1501 <<
", exports.extent(0): " << exports.extent(0)
1503 std::cerr <<
os.str();
1511 "Input MultiVector needs sync to both host "
1514 std::ostringstream
os;
1516 std::cerr <<
os.str();
1528 exports.clear_sync_state();
1529 exports.modify_host();
1536 exports.clear_sync_state();
1537 exports.modify_device();
1554 using KokkosRefactor::Details::pack_array_single_column;
1556 std::ostringstream
os;
1557 os << *
prefix <<
"Pack numCols=1 const stride" <<
endl;
1558 std::cerr <<
os.str();
1562 pack_array_single_column(exports.view_host(),
1569 pack_array_single_column(exports.view_device(),
1577 using KokkosRefactor::Details::pack_array_single_column;
1579 std::ostringstream
os;
1581 std::cerr <<
os.str();
1585 pack_array_single_column(exports.view_host(),
1592 pack_array_single_column(exports.view_device(),
1602 using KokkosRefactor::Details::pack_array_multi_column;
1604 std::ostringstream
os;
1605 os << *
prefix <<
"Pack numCols=" << numCols <<
" const stride" <<
endl;
1606 std::cerr <<
os.str();
1610 pack_array_multi_column(exports.view_host(),
1617 pack_array_multi_column(exports.view_device(),
1625 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1628 os << *
prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1635 using IST = impl_scalar_type;
1636 using DV = Kokkos::DualView<IST*, device_type>;
1637 using HES =
typename DV::t_host::execution_space;
1638 using DES =
typename DV::t_dev::execution_space;
1642 pack_array_multi_column_variable_stride(exports.view_host(),
1650 pack_array_multi_column_variable_stride(exports.view_device(),
1661 std::ostringstream
os;
1663 std::cerr <<
os.str();
1667template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1670 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
exportLIDs,
1671 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1677template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1679typename std::enable_if<std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1680 typename NO::device_type::memory_space>::value,
1685 const std::string*
prefix,
1686 const bool areRemoteLIDsContiguous,
1694 std::ostringstream
os;
1695 os << *
prefix <<
"Realloc (if needed) imports_ from "
1696 << this->imports_.extent(0) <<
" to " <<
newSize << std::endl;
1697 std::cerr <<
os.str();
1702 using IST = impl_scalar_type;
1703 using DV = Kokkos::DualView<IST*, Kokkos::LayoutLeft, buffer_device_type>;
1715 if ((std::is_same_v<typename dual_view_type::t_dev::device_type, typename dual_view_type::t_host::device_type> ||
1718 areRemoteLIDsContiguous &&
1720 (getNumVectors() == 1) &&
1723 reallocated = this->imports_.view_device().data() != view_.getDualView().view_device().data() +
offset;
1725 typedef std::pair<size_t, size_t> range_type;
1726 this->imports_ =
DV(view_.getDualView(),
1727 range_type(
offset, getLocalLength()),
1731 std::ostringstream
os;
1732 os << *
prefix <<
"Aliased imports_ to MV.view_" << std::endl;
1733 std::cerr <<
os.str();
1739 using ::Tpetra::Details::reallocDualViewIfNeeded;
1741 reallocDualViewIfNeeded(this->unaliased_imports_,
newSize,
"imports");
1743 std::ostringstream
os;
1744 os << *
prefix <<
"Finished realloc'ing unaliased_imports_" << std::endl;
1745 std::cerr <<
os.str();
1747 this->imports_ = this->unaliased_imports_;
1752template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1754typename std::enable_if<!std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1755 typename NO::device_type::memory_space>::value,
1760 const std::string*
prefix,
1761 const bool areRemoteLIDsContiguous,
1766template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1770 const std::string*
prefix,
1771 const bool areRemoteLIDsContiguous,
1777template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1780 return (this->imports_.view_device().data() +
this->imports_.view_device().extent(0) ==
1781 view_.getDualView().view_device().data() + view_.getDualView().view_device().extent(0));
1784template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1787 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1788 Kokkos::DualView<size_t*, buffer_device_type> ,
1791 const execution_space& space) {
1792 using Kokkos::Compat::getKokkosViewDeepCopy;
1793 using KokkosRefactor::Details::unpack_array_multi_column;
1794 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1796 using ::Tpetra::Details::Behavior;
1797 using ::Tpetra::Details::ProfilingRegion;
1815 std::unique_ptr<std::string>
prefix;
1817 auto map = this->getMap();
1818 auto comm =
map.is_null() ? Teuchos::null :
map->getComm();
1819 const int myRank = comm.is_null() ? -1 : comm->getRank();
1820 std::ostringstream
os;
1822 prefix = std::unique_ptr<std::string>(
new std::string(
os.str()));
1824 std::cerr <<
os.str();
1830 std::ostringstream
os;
1832 std::cerr <<
os.str();
1838 if (importsAreAliased()) {
1840 std::ostringstream
os;
1841 os << *
prefix <<
"Skipping unpack, since imports_ is aliased to MV.view_. Done!" <<
endl;
1842 std::cerr <<
os.str();
1854 "imports.extent(0) = " << imports.extent(0)
1855 <<
" != getNumVectors() * importLIDs.extent(0) = " <<
numVecs
1860 "constantNumPackets input argument must be nonzero.");
1864 std::runtime_error,
"constantNumPackets must equal numVecs.");
1873 if (this->imports_.need_sync_host()) this->imports_.sync_host();
1875 if (this->imports_.need_sync_device()) this->imports_.sync_device();
1879 std::ostringstream
os;
1882 std::cerr <<
os.str();
1892 Kokkos::DualView<size_t*, device_type>
whichVecs;
1893 if (!isConstantStride()) {
1894 Kokkos::View<
const size_t*, Kokkos::HostSpace,
1895 Kokkos::MemoryUnmanaged>
1921 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
1922 using host_exec_space =
typename dual_view_type::t_host::execution_space;
1928 std::ostringstream
os;
1930 std::cerr <<
os.str();
1937 using op_type = KokkosRefactor::Details::InsertOp;
1938 if (isConstantStride()) {
1940 auto X_h = this->getLocalViewHost(Access::ReadWrite);
1941 unpack_array_multi_column(host_exec_space(),
1948 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
1949 unpack_array_multi_column(space,
1957 auto X_h = this->getLocalViewHost(Access::ReadWrite);
1958 unpack_array_multi_column_variable_stride(host_exec_space(),
1967 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
1968 unpack_array_multi_column_variable_stride(space,
1979 using op_type = KokkosRefactor::Details::AddOp;
1980 if (isConstantStride()) {
1982 auto X_h = this->getLocalViewHost(Access::ReadWrite);
1983 unpack_array_multi_column(host_exec_space(),
1989 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
1990 unpack_array_multi_column(space,
1998 auto X_h = this->getLocalViewHost(Access::ReadWrite);
1999 unpack_array_multi_column_variable_stride(host_exec_space(),
2008 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2009 unpack_array_multi_column_variable_stride(space,
2020 using op_type = KokkosRefactor::Details::AbsMaxOp;
2021 if (isConstantStride()) {
2023 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2024 unpack_array_multi_column(host_exec_space(),
2030 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2031 unpack_array_multi_column(space,
2039 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2040 unpack_array_multi_column_variable_stride(host_exec_space(),
2049 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2050 unpack_array_multi_column_variable_stride(space,
2065 std::ostringstream
os;
2067 std::cerr <<
os.str();
2072 std::ostringstream
os;
2074 std::cerr <<
os.str();
2078template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2081 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
2088template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2092 if (isConstantStride()) {
2093 return static_cast<size_t>(view_.extent(1));
2095 return static_cast<size_t>(whichVectors_.size());
2103 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
2105 using Teuchos::REDUCE_MAX;
2106 using Teuchos::REDUCE_SUM;
2107 using Teuchos::reduceAll;
2108 typedef typename RV::non_const_value_type
dot_type;
2128 const int nv =
static_cast<int>(
numVecs);
2135 typename RV::non_const_type
lclDots(Kokkos::ViewAllocateWithoutInitializing(
"tmp"),
numVecs);
2149template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2152 const Kokkos::View<dot_type*, Kokkos::HostSpace>&
dots)
const {
2153 using Kokkos::subview;
2154 using Teuchos::Comm;
2155 using Teuchos::null;
2157 using ::Tpetra::Details::Behavior;
2159 typedef Kokkos::View<dot_type*, Kokkos::HostSpace>
RV;
2160 typedef typename dual_view_type::t_dev_const
XMV;
2165 const size_t numVecs = this->getNumVectors();
2169 const size_t lclNumRows = this->getLocalLength();
2171 const bool debug = Behavior::debug();
2174 const bool compat = this->getMap()->isCompatible(*(
A.getMap()));
2176 "'*this' MultiVector is not "
2177 "compatible with the input MultiVector A. We only test for this "
2189 lclNumRows !=
A.getLocalLength(), std::runtime_error,
2190 "MultiVectors do not have the same local length. "
2191 "this->getLocalLength() = "
2193 "A.getLocalLength() = "
2194 <<
A.getLocalLength() <<
".");
2196 numVecs !=
A.getNumVectors(), std::runtime_error,
2197 "MultiVectors must have the same number of columns (vectors). "
2198 "this->getNumVectors() = "
2200 "A.getNumVectors() = "
2201 <<
A.getNumVectors() <<
".");
2204 "The output array 'dots' must have the same number of entries as the "
2205 "number of columns (vectors) in *this and A. dots.extent(0) = "
2212 auto thisView = this->getLocalViewDevice(Access::ReadOnly);
2213 auto A_view =
A.getLocalViewDevice(Access::ReadOnly);
2216 this->whichVectors_.getRawPtr(),
2217 A.whichVectors_.getRawPtr(),
2218 this->isConstantStride(),
A.isConstantStride());
2233template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2237 using ::Tpetra::Details::ProfilingRegion;
2239 using dot_type =
typename MV::dot_type;
2240 ProfilingRegion
region(
"Tpetra::multiVectorSingleColumnDot");
2242 auto map =
x.getMap();
2243 Teuchos::RCP<const Teuchos::Comm<int> > comm =
2244 map.is_null() ? Teuchos::null :
map->getComm();
2245 if (comm.is_null()) {
2246#if KOKKOS_VERSION >= 40799
2247 return KokkosKernels::ArithTraits<dot_type>::zero();
2249 return Kokkos::ArithTraits<dot_type>::zero();
2256 const LO
lclNumRows =
static_cast<LO
>(std::min(
x.getLocalLength(),
2257 y.getLocalLength()));
2259#if KOKKOS_VERSION >= 40799
2260 dot_type lclDot = KokkosKernels::ArithTraits<dot_type>::zero();
2262 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero();
2264#if KOKKOS_VERSION >= 40799
2274 auto x_2d =
x.getLocalViewDevice(Access::ReadOnly);
2276 auto y_2d =
y.getLocalViewDevice(Access::ReadOnly);
2278 lclDot = KokkosBlas::dot(
x_1d,
y_1d);
2280 if (
x.isDistributed()) {
2281 using Teuchos::outArg;
2282 using Teuchos::REDUCE_SUM;
2283 using Teuchos::reduceAll;
2293template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2296 const Teuchos::ArrayView<dot_type>&
dots)
const {
2300 const size_t numVecs = this->getNumVectors();
2301 const size_t lclNumRows = this->getLocalLength();
2302 const size_t numDots =
static_cast<size_t>(
dots.size());
2313 "MultiVectors do not have the same local length. "
2314 "this->getLocalLength() = "
2316 "A.getLocalLength() = "
2317 <<
A.getLocalLength() <<
".");
2319 "MultiVectors must have the same number of columns (vectors). "
2320 "this->getNumVectors() = "
2322 "A.getNumVectors() = "
2323 <<
A.getNumVectors() <<
".");
2325 "The output array 'dots' must have the same number of entries as the "
2326 "number of columns (vectors) in *this and A. dots.extent(0) = "
2329 if (
numVecs == 1 && this->isConstantStride() &&
A.isConstantStride()) {
2333 this->dot(
A, Kokkos::View<dot_type*, Kokkos::HostSpace>(
dots.getRawPtr(),
numDots));
2337template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2339 norm2(
const Teuchos::ArrayView<mag_type>&
norms)
const {
2341 using ::Tpetra::Details::NORM_TWO;
2342 using ::Tpetra::Details::ProfilingRegion;
2343 ProfilingRegion
region(
"Tpetra::MV::norm2 (host output)");
2346 MV&
X =
const_cast<MV&
>(*this);
2350template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2352 norm2(
const Kokkos::View<mag_type*, Kokkos::HostSpace>&
norms)
const {
2357template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2359 norm1(
const Teuchos::ArrayView<mag_type>&
norms)
const {
2361 using ::Tpetra::Details::NORM_ONE;
2362 using ::Tpetra::Details::ProfilingRegion;
2363 ProfilingRegion
region(
"Tpetra::MV::norm1 (host output)");
2366 MV&
X =
const_cast<MV&
>(*this);
2370template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2372 norm1(
const Kokkos::View<mag_type*, Kokkos::HostSpace>&
norms)
const {
2377template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2379 normInf(
const Teuchos::ArrayView<mag_type>&
norms)
const {
2381 using ::Tpetra::Details::NORM_INF;
2382 using ::Tpetra::Details::ProfilingRegion;
2383 ProfilingRegion
region(
"Tpetra::MV::normInf (host output)");
2386 MV&
X =
const_cast<MV&
>(*this);
2390template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2392 normInf(
const Kokkos::View<mag_type*, Kokkos::HostSpace>&
norms)
const {
2397template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2399 meanValue(
const Teuchos::ArrayView<impl_scalar_type>&
means)
const {
2402 using Kokkos::subview;
2403 using Teuchos::Comm;
2405 using Teuchos::REDUCE_SUM;
2406 using Teuchos::reduceAll;
2407#if KOKKOS_VERSION >= 40799
2408 typedef KokkosKernels::ArithTraits<impl_scalar_type> ATS;
2410 typedef Kokkos::ArithTraits<impl_scalar_type> ATS;
2413 const size_t lclNumRows = this->getLocalLength();
2414 const size_t numVecs = this->getNumVectors();
2419 "Tpetra::MultiVector::meanValue: means.size() = " <<
numMeans
2420 <<
" != this->getNumVectors() = " <<
numVecs <<
".");
2430 typename local_view_type::host_mirror_type::array_layout,
2432 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
2436 RCP<const Comm<int> > comm = this->getMap().is_null() ? Teuchos::null : this->getMap()->getComm();
2442 auto X_lcl =
subview(getLocalViewHost(Access::ReadOnly),
2445 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace>
lclSums(
"MV::meanValue tmp",
numVecs);
2446 if (isConstantStride()) {
2450 const size_t col = whichVectors_[
j];
2458 if (!comm.is_null() &&
this->isDistributed()) {
2467 auto X_lcl =
subview(this->getLocalViewDevice(Access::ReadOnly),
2471 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace>
lclSums(
"MV::meanValue tmp",
numVecs);
2472 if (isConstantStride()) {
2476 const size_t col = whichVectors_[
j];
2493 if (!comm.is_null() &&
this->isDistributed()) {
2506 ATS::one() /
static_cast<mag_type>(this->getGlobalLength());
2512template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2516#if KOKKOS_VERSION >= 40799
2517 typedef KokkosKernels::ArithTraits<IST> ATS;
2519 typedef Kokkos::ArithTraits<IST> ATS;
2521 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space>
pool_type;
2524 const IST
max = Kokkos::rand<generator_type, IST>::max();
2525 const IST
min = ATS::is_signed ? IST(-
max) : ATS::zero();
2530template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2534 typedef Tpetra::Details::Static_Random_XorShift64_Pool<typename device_type::execution_space>
tpetra_pool_type;
2535 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space>
pool_type;
2538 if (!tpetra_pool_type::isSet())
2539 tpetra_pool_type::resetPool(this->getMap()->getComm()->getRank());
2542 const IST
max =
static_cast<IST
>(maxVal);
2543 const IST
min =
static_cast<IST
>(minVal);
2545 auto thisView = this->getLocalViewDevice(Access::OverwriteAll);
2547 if (isConstantStride()) {
2550 const size_t numVecs = getNumVectors();
2552 const size_t col = whichVectors_[
k];
2553 auto X_k = Kokkos::subview(
thisView, Kokkos::ALL(), col);
2559template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2562 using ::Tpetra::Details::ProfilingRegion;
2563 using ::Tpetra::Details::Blas::fill;
2564 using DES =
typename dual_view_type::t_dev::execution_space;
2565 using HES =
typename dual_view_type::t_host::execution_space;
2567 ProfilingRegion
region(
"Tpetra::MultiVector::putScalar");
2572 const LO
lclNumRows =
static_cast<LO
>(this->getLocalLength());
2573 const LO
numVecs =
static_cast<LO
>(this->getNumVectors());
2581 auto X = this->getLocalViewDevice(Access::OverwriteAll);
2582 if (this->isConstantStride()) {
2586 this->whichVectors_.getRawPtr());
2589 auto X = this->getLocalViewHost(Access::OverwriteAll);
2590 if (this->isConstantStride()) {
2594 this->whichVectors_.getRawPtr());
2599template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2602 using Teuchos::ArrayRCP;
2603 using Teuchos::Comm;
2614 !this->isConstantStride(), std::logic_error,
2615 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2616 "if the MultiVector is a column view of another MultiVector (that is, if "
2617 "isConstantStride() == false).");
2647 if (this->getMap().
is_null()) {
2653 newMap.is_null(), std::invalid_argument,
2654 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2655 "This probably means that the input Map is incorrect.");
2661 const size_t numCols = this->getNumVectors();
2666 }
else if (
newMap.is_null()) {
2669 const size_t newNumRows =
static_cast<size_t>(0);
2670 const size_t numCols = this->getNumVectors();
2677template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2684#if KOKKOS_VERSION >= 40799
2685 if (
theAlpha == KokkosKernels::ArithTraits<IST>::one()) {
2687 if (
theAlpha == Kokkos::ArithTraits<IST>::one()) {
2692 const size_t numVecs = getNumVectors();
2704 auto Y_lcl = Kokkos::subview(getLocalViewHost(Access::ReadWrite),
rowRng,
ALL());
2705 if (isConstantStride()) {
2709 const size_t Y_col = whichVectors_[
k];
2715 auto Y_lcl = Kokkos::subview(getLocalViewDevice(Access::ReadWrite),
rowRng,
ALL());
2716 if (isConstantStride()) {
2720 const size_t Y_col = isConstantStride() ?
k : whichVectors_[
k];
2728template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2730 scale(
const Teuchos::ArrayView<const Scalar>&
alphas) {
2731 const size_t numVecs = this->getNumVectors();
2735 "Tpetra::MultiVector::"
2736 "scale: alphas.size() = "
2737 <<
numAlphas <<
" != this->getNumVectors() = "
2741 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2749 this->scale(
k_alphas.view_device());
2752template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2754 scale(
const Kokkos::View<const impl_scalar_type*, device_type>&
alphas) {
2756 using Kokkos::subview;
2758 const size_t lclNumRows = this->getLocalLength();
2759 const size_t numVecs = this->getNumVectors();
2762 std::invalid_argument,
2763 "Tpetra::MultiVector::scale(alphas): "
2764 "alphas.extent(0) = "
2766 <<
" != this->getNumVectors () = " <<
numVecs <<
".");
2787 if (isConstantStride()) {
2791 const size_t Y_col = this->isConstantStride() ?
k : this->whichVectors_[
k];
2800 if (isConstantStride()) {
2812 const size_t Y_col = this->isConstantStride() ?
k : this->whichVectors_[
k];
2820template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2825 using Kokkos::subview;
2829 const size_t numVecs = getNumVectors();
2832 lclNumRows !=
A.getLocalLength(), std::invalid_argument,
2833 "this->getLocalLength() = " <<
lclNumRows <<
" != A.getLocalLength() = "
2834 <<
A.getLocalLength() <<
".");
2836 numVecs !=
A.getNumVectors(), std::invalid_argument,
2837 "this->getNumVectors() = " <<
numVecs <<
" != A.getNumVectors() = "
2838 <<
A.getNumVectors() <<
".");
2844 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
2845 auto X_lcl_orig =
A.getLocalViewDevice(Access::ReadOnly);
2849 if (isConstantStride() &&
A.isConstantStride()) {
2854 const size_t Y_col = this->isConstantStride() ?
k : this->whichVectors_[
k];
2855 const size_t X_col =
A.isConstantStride() ?
k :
A.whichVectors_[
k];
2864template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2870 getLocalLength() !=
A.getLocalLength(), std::runtime_error,
2871 "MultiVectors do not have the same local length. "
2872 "this->getLocalLength() = "
2874 <<
" != A.getLocalLength() = " <<
A.getLocalLength() <<
".");
2876 A.getNumVectors() !=
this->getNumVectors(), std::runtime_error,
2877 ": MultiVectors do not have the same number of columns (vectors). "
2878 "this->getNumVectors() = "
2880 <<
" != A.getNumVectors() = " <<
A.getNumVectors() <<
".");
2882 const size_t numVecs = getNumVectors();
2884 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
2885 auto A_view_dev =
A.getLocalViewDevice(Access::ReadOnly);
2887 if (isConstantStride() &&
A.isConstantStride()) {
2891 using Kokkos::subview;
2893 const size_t this_col = isConstantStride() ?
k : whichVectors_[
k];
2895 const size_t A_col = isConstantStride() ?
k :
A.whichVectors_[
k];
2902template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2908 getLocalLength() !=
A.getLocalLength(), std::runtime_error,
2909 ": MultiVectors do not have the same local length. "
2910 "this->getLocalLength() = "
2912 <<
" != A.getLocalLength() = " <<
A.getLocalLength() <<
".");
2914 A.getNumVectors() !=
this->getNumVectors(), std::runtime_error,
2915 ": MultiVectors do not have the same number of columns (vectors). "
2916 "this->getNumVectors() = "
2918 <<
" != A.getNumVectors() = " <<
A.getNumVectors() <<
".");
2919 const size_t numVecs = getNumVectors();
2921 auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
2922 auto A_view_dev =
A.getLocalViewDevice(Access::ReadOnly);
2924 if (isConstantStride() &&
A.isConstantStride()) {
2928 using Kokkos::subview;
2931 const size_t this_col = isConstantStride() ?
k : whichVectors_[
k];
2933 const size_t A_col = isConstantStride() ?
k :
A.whichVectors_[
k];
2940template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2947 using Kokkos::subview;
2952 const size_t numVecs = getNumVectors();
2956 lclNumRows !=
A.getLocalLength(), std::invalid_argument,
2957 "this->getLocalLength() = " <<
lclNumRows <<
" != A.getLocalLength() = "
2958 <<
A.getLocalLength() <<
".");
2960 numVecs !=
A.getNumVectors(), std::invalid_argument,
2961 "this->getNumVectors() = " <<
numVecs <<
" != A.getNumVectors() = "
2962 <<
A.getNumVectors() <<
".");
2970 auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
2972 auto X_lcl_orig =
A.getLocalViewDevice(Access::ReadOnly);
2976 if (isConstantStride() &&
A.isConstantStride()) {
2981 const size_t Y_col = this->isConstantStride() ?
k : this->whichVectors_[
k];
2982 const size_t X_col =
A.isConstantStride() ?
k :
A.whichVectors_[
k];
2991template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2999 using Kokkos::subview;
3001 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
3005 const size_t lclNumRows = this->getLocalLength();
3006 const size_t numVecs = getNumVectors();
3010 lclNumRows !=
A.getLocalLength(), std::invalid_argument,
3011 "The input MultiVector A has " <<
A.getLocalLength() <<
" local "
3012 "row(s), but this MultiVector has "
3016 lclNumRows !=
B.getLocalLength(), std::invalid_argument,
3017 "The input MultiVector B has " <<
B.getLocalLength() <<
" local "
3018 "row(s), but this MultiVector has "
3022 A.getNumVectors() !=
numVecs, std::invalid_argument,
3023 "The input MultiVector A has " <<
A.getNumVectors() <<
" column(s), "
3024 "but this MultiVector has "
3027 B.getNumVectors() !=
numVecs, std::invalid_argument,
3028 "The input MultiVector B has " <<
B.getNumVectors() <<
" column(s), "
3029 "but this MultiVector has "
3047 if (isConstantStride() &&
A.isConstantStride() &&
B.isConstantStride()) {
3053 const size_t this_col = isConstantStride() ?
k : whichVectors_[
k];
3054 const size_t A_col =
A.isConstantStride() ?
k :
A.whichVectors_[
k];
3055 const size_t B_col =
B.isConstantStride() ?
k :
B.whichVectors_[
k];
3063template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3064Teuchos::ArrayRCP<const Scalar>
3076 auto hostView = getLocalViewHost(Access::ReadOnly);
3077 const size_t col = isConstantStride() ?
j : whichVectors_[
j];
3080 Kokkos::Compat::persistingView(
hostView_j, 0, getLocalLength());
3085 "hostView_j.extent(0) = " <<
hostView_j.extent(0)
3086 <<
" < dataAsArcp.size() = " <<
dataAsArcp.size() <<
". "
3087 "Please report this bug to the Tpetra developers.");
3089 return Teuchos::arcp_reinterpret_cast<const Scalar>(
dataAsArcp);
3092template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3093Teuchos::ArrayRCP<Scalar>
3097 using Kokkos::subview;
3101 auto hostView = getLocalViewHost(Access::ReadWrite);
3102 const size_t col = isConstantStride() ?
j : whichVectors_[
j];
3105 Kokkos::Compat::persistingView(
hostView_j, 0, getLocalLength());
3110 "hostView_j.extent(0) = " <<
hostView_j.extent(0)
3111 <<
" < dataAsArcp.size() = " <<
dataAsArcp.size() <<
". "
3112 "Please report this bug to the Tpetra developers.");
3114 return Teuchos::arcp_reinterpret_cast<Scalar>(
dataAsArcp);
3117template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3118Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3120 subCopy(
const Teuchos::ArrayView<const size_t>&
cols)
const {
3129 if (
cols[
j] !=
cols[
j - 1] +
static_cast<size_t>(1)) {
3144template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3145Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3152 RCP<MV> Y(
new MV(this->getMap(),
static_cast<size_t>(
colRng.size()),
false));
3157template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3161 return view_.origExtent(0);
3164template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3168 return view_.origExtent(1);
3171template <
class Scalar,
class LO,
class GO,
class Node>
3174 const Teuchos::RCP<const map_type>&
subMap,
3178 using Kokkos::subview;
3180 using Teuchos::outArg;
3183 using Teuchos::REDUCE_MIN;
3184 using Teuchos::reduceAll;
3186 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3187 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3190 std::unique_ptr<std::ostringstream>
errStrm;
3197 const auto comm =
subMap->getComm();
3199 const int myRank = comm->getRank();
3202 const LO numCols =
static_cast<LO
>(
X.getNumVectors());
3205 std::ostringstream
os;
3208 <<
", origLclNumRows: " <<
X.getOrigNumLocalRows()
3209 <<
", numCols: " << numCols <<
"}, "
3211 std::cerr <<
os.str();
3219 errStrm = std::unique_ptr<std::ostringstream>(
new std::ostringstream);
3220 *
errStrm <<
" Proc " <<
myRank <<
": subMap->getLocalNumElements() (="
3222 <<
") > X.getOrigNumLocalRows() (=" <<
X.getOrigNumLocalRows()
3240 using range_type = std::pair<LO, LO>;
3252 newView.extent(1) !=
X.view_.extent(1)) {
3264 if (
errStrm.get() ==
nullptr) {
3265 errStrm = std::unique_ptr<std::ostringstream>(
new std::ostringstream);
3274 "dimensions on one or more processes:"
3286 std::ostringstream
os;
3288 std::cerr <<
os.str();
3294 std::ostringstream
os;
3296 std::cerr <<
os.str();
3300template <
class Scalar,
class LO,
class GO,
class Node>
3308template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3309Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3312 const size_t offset)
const {
3317template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3318Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3326template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3327Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3329 subView(
const Teuchos::ArrayView<const size_t>&
cols)
const {
3330 using Teuchos::Array;
3337 "Tpetra::MultiVector::subView"
3338 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3339 "contain at least one entry, but cols.size() = "
3347 if (
cols[
j] !=
cols[
j - 1] +
static_cast<size_t>(1)) {
3362 if (isConstantStride()) {
3363 return rcp(
new MV(this->getMap(), view_,
cols));
3369 return rcp(
new MV(this->getMap(), view_,
newcols()));
3373template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3374Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3378 using Kokkos::subview;
3379 using Teuchos::Array;
3382 using ::Tpetra::Details::Behavior;
3386 const size_t lclNumRows = this->getLocalLength();
3387 const size_t numVecs = this->getNumVectors();
3392 static_cast<size_t>(
colRng.size()) >
numVecs, std::runtime_error,
3393 "colRng.size() = " <<
colRng.size() <<
" > this->getNumVectors() = "
3397 (
colRng.lbound() <
static_cast<Teuchos::Ordinal
>(0) ||
3399 std::invalid_argument,
"Nonempty input range [" <<
colRng.lbound() <<
"," <<
colRng.ubound() <<
"] exceeds the valid range of column indices "
3414 if (
colRng.size() == 0) {
3415 const std::pair<size_t, size_t>
cols(0, 0);
3420 if (isConstantStride()) {
3421 const std::pair<size_t, size_t>
cols(
colRng.lbound(),
3426 if (
static_cast<size_t>(
colRng.size()) ==
static_cast<size_t>(1)) {
3429 const std::pair<size_t, size_t> col(whichVectors_[0] +
colRng.lbound(),
3430 whichVectors_[0] +
colRng.ubound() + 1);
3435 whichVectors_.begin() +
colRng.ubound() + 1);
3441 const bool debug = Behavior::debug();
3443 using Teuchos::Comm;
3444 using Teuchos::outArg;
3445 using Teuchos::REDUCE_MIN;
3446 using Teuchos::reduceAll;
3448 RCP<const Comm<int> > comm = this->getMap().is_null() ? Teuchos::null : this->getMap()->getComm();
3449 if (!comm.is_null()) {
3453 if (
X_ret.is_null()) {
3458 "X_ret (the subview of this "
3459 "MultiVector; the return value of this method) is null on some MPI "
3460 "process in this MultiVector's communicator. This should never "
3461 "happen. Please report this bug to the Tpetra developers.");
3462 if (!
X_ret.is_null() &&
3463 X_ret->getNumVectors() !=
static_cast<size_t>(
colRng.size())) {
3469 "X_ret->getNumVectors() != "
3470 "colRng.size(), on at least one MPI process in this MultiVector's "
3471 "communicator. This should never happen. "
3472 "Please report this bug to the Tpetra developers.");
3478template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3479Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3483 return Teuchos::rcp_const_cast<MV>(this->subView(
cols));
3486template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3487Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3491 return Teuchos::rcp_const_cast<MV>(this->subView(
colRng));
3494template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3499 using Kokkos::subview;
3500 typedef std::pair<size_t, size_t> range_type;
3501 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3503 const size_t numCols =
X.getNumVectors();
3505 const size_t jj =
X.isConstantStride() ?
static_cast<size_t>(
j) :
static_cast<size_t>(
X.whichVectors_[
j]);
3517 const size_t newSize =
X.imports_.extent(0) / numCols;
3526 const size_t newSize =
X.exports_.extent(0) / numCols;
3541template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3542Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3546 return Teuchos::rcp(
new V(*
this,
j));
3549template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3550Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3554 return Teuchos::rcp(
new V(*
this,
j));
3557template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3559 get1dCopy(
const Teuchos::ArrayView<Scalar>&
A,
const size_t LDA)
const {
3561 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3563 Kokkos::MemoryUnmanaged>;
3566 const size_t numRows = this->getLocalLength();
3567 const size_t numCols = this->getNumVectors();
3570 "LDA = " <<
LDA <<
" < numRows = " <<
numRows <<
".");
3574 "A.size() = " <<
A.size() <<
", but its size must be at least "
3575 << (
LDA * (numCols - 1) +
numRows) <<
" to hold all the entries.");
3578 const std::pair<size_t, size_t>
colRange(0, numCols);
3580 input_view_type
A_view_orig(
reinterpret_cast<IST*
>(
A.getRawPtr()),
3595 if (this->need_sync_host() && this->need_sync_device()) {
3598 throw std::runtime_error(
"Tpetra::MultiVector: A non-const view is alive outside and we cannot give a copy where host or device view can be modified outside");
3600 const bool useHostView = view_.host_view_use_count() >= view_.device_view_use_count();
3601 if (this->isConstantStride()) {
3603 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3607 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3612 for (
size_t j = 0;
j < numCols; ++
j) {
3613 const size_t srcCol = this->whichVectors_[
j];
3617 auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
3622 auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
3632template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3637 const size_t numRows = this->getLocalLength();
3638 const size_t numCols = this->getNumVectors();
3641 static_cast<size_t>(
ArrayOfPtrs.size()) != numCols,
3643 "Input array of pointers must contain as many "
3644 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3645 <<
ArrayOfPtrs.size() <<
" != getNumVectors() = " << numCols <<
".");
3647 if (
numRows != 0 && numCols != 0) {
3649 for (
size_t j = 0;
j < numCols; ++
j) {
3652 dstLen <
numRows, std::invalid_argument,
"Array j = " <<
j <<
" of "
3653 "the input array of arrays is not long enough to fit all entries in "
3654 "that column of the MultiVector. ArrayOfPtrs[j].size() = "
3659 for (
size_t j = 0;
j < numCols; ++
j) {
3660 Teuchos::RCP<const V>
X_j = this->getVector(
j);
3667template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3668Teuchos::ArrayRCP<const Scalar>
3671 if (getLocalLength() == 0 || getNumVectors() == 0) {
3672 return Teuchos::null;
3675 !isConstantStride(), std::runtime_error,
3676 "Tpetra::MultiVector::"
3677 "get1dView: This MultiVector does not have constant stride, so it is "
3678 "not possible to view its data as a single array. You may check "
3679 "whether a MultiVector has constant stride by calling "
3680 "isConstantStride().");
3684 auto X_lcl = getLocalViewHost(Access::ReadOnly);
3685 Teuchos::ArrayRCP<const impl_scalar_type>
dataAsArcp =
3686 Kokkos::Compat::persistingView(
X_lcl);
3687 return Teuchos::arcp_reinterpret_cast<const Scalar>(
dataAsArcp);
3691template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3692Teuchos::ArrayRCP<Scalar>
3695 if (this->getLocalLength() == 0 || this->getNumVectors() == 0) {
3696 return Teuchos::null;
3699 "Tpetra::MultiVector::"
3700 "get1dViewNonConst: This MultiVector does not have constant stride, "
3701 "so it is not possible to view its data as a single array. You may "
3702 "check whether a MultiVector has constant stride by calling "
3703 "isConstantStride().");
3704 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3705 Teuchos::ArrayRCP<impl_scalar_type>
dataAsArcp =
3706 Kokkos::Compat::persistingView(
X_lcl);
3707 return Teuchos::arcp_reinterpret_cast<Scalar>(
dataAsArcp);
3711template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3712Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3715 auto X_lcl = getLocalViewHost(Access::ReadWrite);
3721 const size_t myNumRows = this->getLocalLength();
3722 const size_t numCols = this->getNumVectors();
3725 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
views(numCols);
3726 for (
size_t j = 0;
j < numCols; ++
j) {
3727 const size_t col = this->isConstantStride() ?
j : this->whichVectors_[
j];
3730 Kokkos::Compat::persistingView(
X_lcl_j);
3736template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3740 return view_.getHostView(
s);
3743template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3747 return view_.getHostView(
s);
3750template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3754 return view_.getHostView(
s);
3757template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3761 return view_.getDeviceView(
s);
3764template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3768 return view_.getDeviceView(
s);
3771template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3775 return view_.getDeviceView(
s);
3778template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3785template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3786Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
3792 auto X_lcl = getLocalViewHost(Access::ReadOnly);
3798 const size_t myNumRows = this->getLocalLength();
3799 const size_t numCols = this->getNumVectors();
3802 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
views(numCols);
3803 for (
size_t j = 0;
j < numCols; ++
j) {
3804 const size_t col = this->isConstantStride() ?
j : this->whichVectors_[
j];
3806 Teuchos::ArrayRCP<const impl_scalar_type>
X_lcl_j_arcp =
3807 Kokkos::Compat::persistingView(
X_lcl_j);
3813template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3822 using Teuchos::CONJ_TRANS;
3823 using Teuchos::NO_TRANS;
3826 using Teuchos::rcpFromRef;
3827 using Teuchos::TRANS;
3828 using ::Tpetra::Details::ProfilingRegion;
3829#if KOKKOS_VERSION >= 40799
3830 using ATS = KokkosKernels::ArithTraits<impl_scalar_type>;
3832 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
3835 using STS = Teuchos::ScalarTraits<Scalar>;
3838 ProfilingRegion
region(
"Tpetra::MV::multiply");
3870 const bool C_is_local = !this->isDistributed();
3880 auto myMap = this->getMap();
3881 if (!
myMap.is_null() && !
myMap->getComm().is_null()) {
3882 using Teuchos::outArg;
3883 using Teuchos::REDUCE_MIN;
3884 using Teuchos::reduceAll;
3886 auto comm =
myMap->getComm();
3899 const int myRank = comm->getRank();
3901 if (this->getLocalLength() !=
A_nrows) {
3903 << this->getLocalLength() <<
" != A_nrows=" <<
A_nrows
3904 <<
"." << std::endl;
3906 if (this->getNumVectors() !=
B_ncols) {
3908 << this->getNumVectors() <<
" != B_ncols=" <<
B_ncols
3909 <<
"." << std::endl;
3914 <<
"." << std::endl;
3921 std::ostringstream
os;
3925 "Inconsistent local dimensions on at "
3926 "least one process in this object's communicator."
3929 <<
"C(" << (
C_is_local ?
"local" :
"distr") <<
") = "
3931 << (
transA == Teuchos::TRANS ?
"^T" : (
transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
3932 <<
"(" << (
A_is_local ?
"local" :
"distr") <<
") * "
3934 << (
transA == Teuchos::TRANS ?
"^T" : (
transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
3935 <<
"(" << (
B_is_local ?
"local" :
"distr") <<
")." << std::endl
3936 <<
"Global dimensions: C(" << this->getGlobalLength() <<
", "
3937 << this->getNumVectors() <<
"), A(" <<
A.getGlobalLength()
3938 <<
", " <<
A.getNumVectors() <<
"), B(" <<
B.getGlobalLength()
3939 <<
", " <<
B.getNumVectors() <<
")." << std::endl
3957 "Multiplication of op(A) and op(B) into *this is not a "
3958 "supported use case.");
3966 const int myRank = this->getMap()->getComm()->getRank();
3977 if (!isConstantStride()) {
3978 C_tmp =
rcp(
new MV(*
this, Teuchos::Copy));
3984 if (!
A.isConstantStride()) {
3991 if (!
B.isConstantStride()) {
3998 !
A_tmp->isConstantStride(),
4000 "Failed to make temporary constant-stride copies of MultiVectors.");
4005 auto A_lcl =
A_tmp->getLocalViewDevice(Access::ReadOnly);
4006 auto A_sub = Kokkos::subview(A_lcl,
4012 auto B_lcl =
B_tmp->getLocalViewDevice(Access::ReadOnly);
4013 auto B_sub = Kokkos::subview(B_lcl,
4020 auto C_lcl =
C_tmp->getLocalViewDevice(Access::ReadWrite);
4024 const char ctransA = (
transA == Teuchos::NO_TRANS ?
'N' : (
transA == Teuchos::TRANS ?
'T' :
'C'));
4025 const char ctransB = (
transB == Teuchos::NO_TRANS ?
'N' : (
transB == Teuchos::TRANS ?
'T' :
'C'));
4028 ProfilingRegion
regionGemm(
"Tpetra::MV::multiply-call-gemm");
4034 if (!isConstantStride()) {
4039 A_tmp = Teuchos::null;
4040 B_tmp = Teuchos::null;
4048template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4055 using Kokkos::subview;
4058 const size_t lclNumRows = this->getLocalLength();
4059 const size_t numVecs = this->getNumVectors();
4062 std::runtime_error,
"MultiVectors do not have the same local length.");
4064 numVecs !=
B.getNumVectors(), std::runtime_error,
4065 "this->getNumVectors"
4067 <<
numVecs <<
" != B.getNumVectors() = " <<
B.getNumVectors()
4070 auto this_view = this->getLocalViewDevice(Access::ReadWrite);
4071 auto A_view =
A.getLocalViewDevice(Access::ReadOnly);
4072 auto B_view =
B.getLocalViewDevice(Access::ReadOnly);
4074 if (isConstantStride() &&
B.isConstantStride()) {
4087 const size_t C_col = isConstantStride() ?
j : whichVectors_[
j];
4088 const size_t B_col =
B.isConstantStride() ?
j :
B.whichVectors_[
j];
4098template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4101 using ::Tpetra::Details::allReduceView;
4102 using ::Tpetra::Details::ProfilingRegion;
4103 ProfilingRegion
region(
"Tpetra::MV::reduce");
4105 const auto map = this->getMap();
4106 if (
map.get() ==
nullptr) {
4109 const auto comm =
map->getComm();
4110 if (comm.get() ==
nullptr) {
4121 Kokkos::fence(
"MultiVector::reduce");
4122 auto X_lcl = this->getLocalViewDevice(Access::ReadWrite);
4125 auto X_lcl = this->getLocalViewHost(Access::ReadWrite);
4130template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4141 "Tpetra::MultiVector::replaceLocalValue: row index " <<
lclRow
4142 <<
" is invalid. The range of valid row indices on this process "
4143 << this->getMap()->getComm()->getRank() <<
" is [" <<
minLocalIndex
4146 vectorIndexOutOfRange(col),
4148 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4149 <<
" of the multivector is invalid.");
4152 auto view = this->getLocalViewHost(Access::ReadWrite);
4153 const size_t colInd = isConstantStride() ? col : whichVectors_[col];
4157template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4162 const bool atomic) {
4169 "Tpetra::MultiVector::sumIntoLocalValue: row index " <<
lclRow
4170 <<
" is invalid. The range of valid row indices on this process "
4171 << this->getMap()->getComm()->getRank() <<
" is [" <<
minLocalIndex
4174 vectorIndexOutOfRange(col),
4176 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4177 <<
" of the multivector is invalid.");
4180 const size_t colInd = isConstantStride() ? col : whichVectors_[col];
4182 auto view = this->getLocalViewHost(Access::ReadWrite);
4190template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4203 "Global row index " <<
gblRow <<
" is not present on this process "
4204 << this->getMap()->getComm()->getRank() <<
".");
4206 "Vector index " << col <<
" of the MultiVector is invalid.");
4212template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4217 const bool atomic) {
4224 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
4226 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " <<
globalRow
4227 <<
" is not present on this process "
4228 << this->getMap()->getComm()->getRank() <<
".");
4230 vectorIndexOutOfRange(col),
4232 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4233 <<
" of the multivector is invalid.");
4236 this->sumIntoLocalValue(
lclRow, col, value, atomic);
4239template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4246 typename dual_view_type::array_layout,
4249 const size_t col = isConstantStride() ?
j : whichVectors_[
j];
4251 Kokkos::subview(view_, Kokkos::ALL(), col);
4252 return Kokkos::Compat::persistingView(
X_col.view_device());
4255template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4258 return view_.need_sync_host();
4261template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4264 return view_.need_sync_device();
4267template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4271 using Teuchos::TypeNameTraits;
4273 std::ostringstream
out;
4278 <<
", Node" << Node::name()
4283 out <<
", numRows: " << this->getGlobalLength();
4285 out <<
", numCols: " << this->getNumVectors()
4286 <<
", isConstantStride: " << this->isConstantStride();
4288 if (this->isConstantStride()) {
4289 out <<
", columnStride: " << this->getStride();
4296template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4300 return this->descriptionImpl(
"Tpetra::MultiVector");
4304template <
typename ViewType>
4305void print_vector(Teuchos::FancyOStream&
out,
const char*
prefix,
const ViewType&
v) {
4307 static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace, typename ViewType::memory_space>::accessible,
4308 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a host-accessible view.");
4309 static_assert(ViewType::rank == 2,
4310 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a rank-2 view.");
4313 out <<
"Values(" <<
prefix <<
"): " << std::endl
4315 const size_t numRows =
v.extent(0);
4316 const size_t numCols =
v.extent(1);
4326 for (
size_t j = 0;
j < numCols; ++
j) {
4328 if (
j + 1 < numCols) {
4341template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4348 if (
vl <= Teuchos::VERB_LOW) {
4349 return std::string();
4351 auto map = this->getMap();
4352 if (
map.is_null()) {
4353 return std::string();
4355 auto outStringP = Teuchos::rcp(
new std::ostringstream());
4357 Teuchos::FancyOStream&
out = *
outp;
4358 auto comm =
map->getComm();
4359 const int myRank = comm->getRank();
4360 const int numProcs = comm->getSize();
4366 const LO
lclNumRows =
static_cast<LO
>(this->getLocalLength());
4369 if (
vl > Teuchos::VERB_MEDIUM) {
4372 if (this->getNumVectors() !=
static_cast<size_t>(1)) {
4373 out <<
"isConstantStride: " << this->isConstantStride() <<
endl;
4375 if (this->isConstantStride()) {
4376 out <<
"Column stride: " << this->getStride() <<
endl;
4387 auto X_dev = view_.getDeviceCopy();
4388 auto X_host = view_.getHostCopy();
4392 Details::print_vector(
out,
"unified",
X_host);
4394 Details::print_vector(
out,
"host",
X_host);
4395 Details::print_vector(
out,
"dev",
X_dev);
4403template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4407 const Teuchos::EVerbosityLevel
verbLevel)
const {
4409 using Teuchos::TypeNameTraits;
4410 using Teuchos::VERB_DEFAULT;
4411 using Teuchos::VERB_LOW;
4412 using Teuchos::VERB_NONE;
4413 const Teuchos::EVerbosityLevel
vl =
4423 auto map = this->getMap();
4424 if (
map.is_null()) {
4427 auto comm =
map->getComm();
4428 if (comm.is_null()) {
4432 const int myRank = comm->getRank();
4441 Teuchos::RCP<Teuchos::OSTab>
tab0,
tab1;
4445 tab0 = Teuchos::rcp(
new Teuchos::OSTab(
out));
4447 tab1 = Teuchos::rcp(
new Teuchos::OSTab(
out));
4449 out <<
"Template parameters:" <<
endl;
4454 <<
"Node: " << Node::name() <<
endl;
4459 out <<
"Global number of rows: " << this->getGlobalLength() <<
endl;
4461 out <<
"Number of columns: " << this->getNumVectors() <<
endl;
4469 const std::string
lclStr = this->localDescribeToString(
vl);
4474template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4477 const Teuchos::EVerbosityLevel
verbLevel)
const {
4478 this->describeImpl(
out,
"Tpetra::MultiVector",
verbLevel);
4481template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4487template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4490 using ::Tpetra::Details::localDeepCopy;
4491 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4494 this->getNumVectors() != src.getNumVectors(),
4495 std::invalid_argument,
4496 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4497 "objects do not match. src has dimensions ["
4498 << src.getGlobalLength()
4499 <<
"," << src.getNumVectors() <<
"], and *this has dimensions ["
4500 <<
this->getGlobalLength() <<
"," <<
this->getNumVectors() <<
"].");
4503 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4504 "objects do not match. src has "
4505 << src.getLocalLength() <<
" row(s) "
4506 <<
" and *this has " <<
this->getLocalLength() <<
" row(s).");
4521 localDeepCopy(this->getLocalViewHost(Access::ReadWrite),
4522 src.getLocalViewHost(Access::ReadOnly),
4523 this->isConstantStride(),
4524 src.isConstantStride(),
4525 this->whichVectors_(),
4526 src.whichVectors_());
4528 localDeepCopy(this->getLocalViewDevice(Access::ReadWrite),
4529 src.getLocalViewDevice(Access::ReadOnly),
4530 this->isConstantStride(),
4531 src.isConstantStride(),
4532 this->whichVectors_(),
4533 src.whichVectors_());
4537template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4539Teuchos::RCP<MultiVector<T, LocalOrdinal, GlobalOrdinal, Node> >
4545 this->getNumVectors(),
4551template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4554 using ::Tpetra::Details::PackTraits;
4556 const size_t l1 = this->getLocalLength();
4557 const size_t l2 =
vec.getLocalLength();
4558 if ((
l1 !=
l2) || (this->getNumVectors() !=
vec.getNumVectors())) {
4565template <
class ST,
class LO,
class GO,
class NT>
4568 std::swap(
mv.map_,
this->map_);
4569 std::swap(
mv.view_,
this->view_);
4570 std::swap(
mv.whichVectors_,
this->whichVectors_);
4573#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4574template <
class ST,
class LO,
class GO,
class NT>
4576 const Teuchos::SerialDenseMatrix<int, ST>& src) {
4577 using ::Tpetra::Details::localDeepCopy;
4579 using IST =
typename MV::impl_scalar_type;
4580 using input_view_type =
4581 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4582 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4583 using pair_type = std::pair<LO, LO>;
4585 const LO
numRows =
static_cast<LO
>(src.numRows());
4586 const LO numCols =
static_cast<LO
>(src.numCols());
4589 numCols !=
static_cast<LO
>(dst.getNumVectors()),
4590 std::invalid_argument,
"Tpetra::deep_copy: On Process " << dst.getMap()->getComm()->getRank() <<
", dst is " << dst.getLocalLength() <<
" x " << dst.getNumVectors() <<
", but src is " <<
numRows <<
" x " << numCols <<
".");
4592 const IST*
src_raw =
reinterpret_cast<const IST*
>(src.values());
4599 localDeepCopy(dst.getLocalViewHost(Access::ReadWrite),
4601 dst.isConstantStride(),
4603 getMultiVectorWhichVectors(dst),
4607template <
class ST,
class LO,
class GO,
class NT>
4608void deep_copy(Teuchos::SerialDenseMatrix<int, ST>& dst,
4610 using ::Tpetra::Details::localDeepCopy;
4612 using IST =
typename MV::impl_scalar_type;
4613 using output_view_type =
4614 Kokkos::View<IST**, Kokkos::LayoutLeft,
4615 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4616 using pair_type = std::pair<LO, LO>;
4618 const LO
numRows =
static_cast<LO
>(dst.numRows());
4619 const LO numCols =
static_cast<LO
>(dst.numCols());
4623 std::invalid_argument,
"Tpetra::deep_copy: On Process " << src.
getMap()->getComm()->getRank() <<
", src is " << src.
getLocalLength() <<
" x " << src.
getNumVectors() <<
", but dst is " <<
numRows <<
" x " << numCols <<
".");
4625 IST*
dst_raw =
reinterpret_cast<IST*
>(dst.values());
4639 getMultiVectorWhichVectors(src));
4643template <
class Scalar,
class LO,
class GO,
class NT>
4644Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4651template <
class ST,
class LO,
class GO,
class NT>
4655 MV
cpy(src.getMap(), src.getNumVectors(),
false);
4668#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4669#define TPETRA_MULTIVECTOR_INSTANT(SCALAR, LO, GO, NODE) \
4670 template class MultiVector<SCALAR, LO, GO, NODE>; \
4671 template MultiVector<SCALAR, LO, GO, NODE> createCopy(const MultiVector<SCALAR, LO, GO, NODE>& src); \
4672 template Teuchos::RCP<MultiVector<SCALAR, LO, GO, NODE> > createMultiVector(const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4673 template void deep_copy(MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4674 template void deep_copy(Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4677#define TPETRA_MULTIVECTOR_INSTANT(SCALAR, LO, GO, NODE) \
4678 template class MultiVector<SCALAR, LO, GO, NODE>; \
4679 template MultiVector<SCALAR, LO, GO, NODE> createCopy(const MultiVector<SCALAR, LO, GO, NODE>& src);
4683#define TPETRA_MULTIVECTOR_CONVERT_INSTANT(SO, SI, LO, GO, NODE) \
4685 template Teuchos::RCP<MultiVector<SO, LO, GO, NODE> > \
4686 MultiVector<SI, LO, GO, NODE>::convert<SO>() const;
Functions for initializing and finalizing Tpetra.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
All-reduce a 1-D or 2-D Kokkos::View.
Declaration of functions for checking whether a given pointer is accessible from a given Kokkos execu...
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::isInterComm.
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const GlobalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::padding_type &padding, const bool unpack_pids, const int myRank, const bool verbose)
Perform the unpack operation for the graph.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Number of packets to receive for each receive operation.
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Number of packets to send for each send operation.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
A parallel distribution of indices over processes.
One or more distributed dense vectors.
void reciprocal(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise reciprocal values of input Multi-vector in target, this(i,j) = 1/A(i,...
void normInf(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the infinity-norm of each vector (column), storing the result in a host View.
void get1dCopy(const Teuchos::ArrayView< Scalar > &A, const size_t LDA) const
Fill the given array with a copy of this multivector's local values.
size_t getStride() const
Stride between columns in the multivector.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool areRemoteLIDsContiguous=false, const CombineMode CM=INSERT) override
Reallocate imports_ if needed.
void reduce()
Sum values of a locally replicated multivector across all processes.
virtual bool checkSizes(const SrcDistObject &sourceObj) override
Whether data redistribution between sourceObj and this object is legal.
void randomize()
Set all values in the multivector to pseudorandom numbers.
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
void dot(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayView< dot_type > &dots) const
Compute the dot product of each corresponding pair of vectors (columns) in A and B.
void describeImpl(Teuchos::FancyOStream &out, const std::string &className, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Implementation of describe() for this class, and its subclass Vector.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Map and their communicator.
Teuchos::ArrayRCP< Scalar > get1dViewNonConst()
Nonconst persisting (1-D) view of this multivector's local values.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
void meanValue(const Teuchos::ArrayView< impl_scalar_type > &means) const
Compute mean (average) value of each column.
std::string descriptionImpl(const std::string &className) const
Implementation of description() for this class, and its subclass Vector.
void multiply(Teuchos::ETransp transA, Teuchos::ETransp transB, const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Scalar &beta)
Matrix-matrix multiplication: this = beta*this + alpha*op(A)*op(B).
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
wrapped_dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
void replaceLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &value)
Replace value in host memory, using local (row) index.
void swap(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &mv)
Swap contents of mv with contents of *this.
size_t getLocalLength() const
Local number of rows on the calling process.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subCopy(const Teuchos::Range1D &colRng) const
Return a MultiVector with copies of selected columns.
typename Kokkos::Details::InnerProductSpaceTraits< impl_scalar_type >::dot_type dot_type
Type of an inner ("dot") product result.
Teuchos::ArrayRCP< const Scalar > getData(size_t j) const
Const view of the local values in a particular vector of this multivector.
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM, const execution_space &space) override
Same as copyAndPermute, but do operations in space.
void norm2(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the two-norm of each vector (column), storing the result in a host View.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
Teuchos::ArrayRCP< const Scalar > get1dView() const
Const persisting (1-D) view of this multivector's local values.
Teuchos::ArrayRCP< T > getSubArrayRCP(Teuchos::ArrayRCP< T > arr, size_t j) const
Persisting view of j-th column in the given ArrayRCP.
void replaceGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value)
Replace value in host memory, using global row index.
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, device_type > dual_view_type
Kokkos::DualView specialization used by this class.
void elementWiseMultiply(Scalar scalarAB, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarThis)
Multiply a Vector A elementwise by a MultiVector B.
size_t getNumVectors() const
Number of columns in the multivector.
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
virtual size_t constantNumberOfPackets() const override
Number of packets to send per LID.
wrapped_dual_view_type getWrappedDualView() const
Return the wrapped dual view holding this MultiVector's local data.
void sumIntoLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &val, const bool atomic=useAtomicUpdatesByDefault)
Update (+=) a value in host memory, using local row index.
void replaceMap(const Teuchos::RCP< const map_type > &map)
Replace the underlying Map in place.
Teuchos::RCP< MultiVector< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another MultiVector with the same entries, but converted to a different Scalar type T.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
void abs(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise absolute values of input Multi-vector in target: A = abs(this)
void norm1(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the one-norm of each vector (column), storing the result in a host view.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print the object with the given verbosity level to a FancyOStream.
void get2dCopy(const Teuchos::ArrayView< const Teuchos::ArrayView< Scalar > > &ArrayOfPtrs) const
Fill the given array with a copy of this multivector's local values.
void sumIntoGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value, const bool atomic=useAtomicUpdatesByDefault)
Update (+=) a value in host memory, using global row index.
bool aliases(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &other) const
Whether this multivector's memory might alias other. This is conservative: if either this or other is...
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subViewNonConst(const Teuchos::Range1D &colRng)
Return a MultiVector with views of selected columns.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
Teuchos::ArrayRCP< Scalar > getDataNonConst(size_t j)
View of the local values in a particular vector of this multivector.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
size_t getOrigNumLocalCols() const
"Original" number of columns in the (local) data.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< Scalar > > get2dViewNonConst()
Return non-const persisting pointers to values.
virtual std::string description() const override
A simple one-line description of this object.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetViewNonConst(const Teuchos::RCP< const map_type > &subMap, const size_t offset)
Return a nonconst view of a subset of rows.
std::string localDescribeToString(const Teuchos::EVerbosityLevel vl) const
Print the calling process' verbose describe() information to the returned string.
bool isSameSize(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &vec) const
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< const Scalar > > get2dView() const
Return const persisting pointers to values.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
Abstract base class for objects that can be the source of an Import or Export operation.
Implementation details of Tpetra.
int local_ordinal_type
Default value of Scalar template parameter.
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
bool isInterComm(const Teuchos::Comm< int > &)
Return true if and only if the input communicator wraps an MPI intercommunicator.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
size_t global_size_t
Global size_t object.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ ADD_ASSIGN
Accumulate new values into existing values (may not be supported in all classes)
@ INSERT
Insert new values that don't currently exist.