10#ifndef TPETRA_MULTIVECTOR_DEF_HPP 
   11#define TPETRA_MULTIVECTOR_DEF_HPP 
   23#include "Tpetra_Vector.hpp" 
   35#include "Tpetra_Details_Random.hpp" 
   36#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS 
   37#include "Teuchos_SerialDenseMatrix.hpp" 
   39#include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp" 
   40#include "KokkosCompat_View.hpp" 
   41#include "KokkosBlas.hpp" 
   42#include "KokkosKernels_Utils.hpp" 
   43#include "Kokkos_Random.hpp" 
   44#if KOKKOS_VERSION >= 40799 
   45#include "KokkosKernels_ArithTraits.hpp" 
   47#include "Kokkos_ArithTraits.hpp" 
   52#ifdef HAVE_TPETRA_INST_FLOAT128 
   55template <
class Generator>
 
   56struct rand<Generator, __float128> {
 
   57  static KOKKOS_INLINE_FUNCTION __float128 max() {
 
   58    return static_cast<__float128
>(1.0);
 
   60  static KOKKOS_INLINE_FUNCTION __float128
 
   61  draw(Generator& gen) {
 
   64    const __float128 scalingFactor =
 
   65        static_cast<__float128
>(std::numeric_limits<double>::min()) /
 
   66        static_cast<__float128
>(2.0);
 
   67    const __float128 higherOrderTerm = 
static_cast<__float128
>(gen.drand());
 
   68    const __float128 lowerOrderTerm =
 
   69        static_cast<__float128
>(gen.drand()) * scalingFactor;
 
   70    return higherOrderTerm + lowerOrderTerm;
 
   72  static KOKKOS_INLINE_FUNCTION __float128
 
   73  draw(Generator& gen, 
const __float128& range) {
 
   75    const __float128 scalingFactor =
 
   76        static_cast<__float128
>(std::numeric_limits<double>::min()) /
 
   77        static_cast<__float128
>(2.0);
 
   78    const __float128 higherOrderTerm =
 
   79        static_cast<__float128
>(gen.drand(range));
 
   80    const __float128 lowerOrderTerm =
 
   81        static_cast<__float128
>(gen.drand(range)) * scalingFactor;
 
   82    return higherOrderTerm + lowerOrderTerm;
 
   84  static KOKKOS_INLINE_FUNCTION __float128
 
   85  draw(Generator& gen, 
const __float128& start, 
const __float128& end) {
 
   87    const __float128 scalingFactor =
 
   88        static_cast<__float128
>(std::numeric_limits<double>::min()) /
 
   89        static_cast<__float128
>(2.0);
 
   90    const __float128 higherOrderTerm =
 
   91        static_cast<__float128
>(gen.drand(start, end));
 
   92    const __float128 lowerOrderTerm =
 
   93        static_cast<__float128
>(gen.drand(start, end)) * scalingFactor;
 
   94    return higherOrderTerm + lowerOrderTerm;
 
  116template <
class ST, 
class LO, 
class GO, 
class NT>
 
  118allocDualView(
const size_t lclNumRows,
 
  119              const size_t numCols,
 
  120              const bool zeroOut      = 
true,
 
  121              const bool allowPadding = 
false) {
 
  122  using Kokkos::AllowPadding;
 
  123  using Kokkos::view_alloc;
 
  124  using Kokkos::WithoutInitializing;
 
  125  using ::Tpetra::Details::Behavior;
 
  128  typedef typename dual_view_type::t_dev dev_view_type;
 
  134  const std::string label(
"MV::DualView");
 
  135  const bool debug = Behavior::debug();
 
  145  dev_view_type d_view;
 
  148      d_view = dev_view_type(view_alloc(label, AllowPadding),
 
  149                             lclNumRows, numCols);
 
  151      d_view = dev_view_type(view_alloc(label),
 
  152                             lclNumRows, numCols);
 
  156      d_view = dev_view_type(view_alloc(label,
 
  159                             lclNumRows, numCols);
 
  161      d_view = dev_view_type(view_alloc(label, WithoutInitializing),
 
  162                             lclNumRows, numCols);
 
  173#if KOKKOS_VERSION >= 40799 
  174      const ST nan = KokkosKernels::ArithTraits<ST>::nan();
 
  176      const ST nan = Kokkos::ArithTraits<ST>::nan();
 
  178      KokkosBlas::fill(d_view, nan);
 
  182    TEUCHOS_TEST_FOR_EXCEPTION(
static_cast<size_t>(d_view.extent(0)) != lclNumRows ||
 
  183                                   static_cast<size_t>(d_view.extent(1)) != numCols,
 
  185                               "allocDualView: d_view's dimensions actual dimensions do not match " 
  186                               "requested dimensions.  d_view is " 
  187                                   << d_view.extent(0) << 
" x " << d_view.extent(1) << 
"; requested " << lclNumRows << 
" x " << numCols
 
  188                                   << 
".  Please report this bug to the Tpetra developers.");
 
  191  return wrapped_dual_view_type(d_view);
 
  198template <
class T, 
class ExecSpace>
 
  199struct MakeUnmanagedView {
 
  209  typedef typename std::conditional<
 
  210      Kokkos::SpaceAccessibility<
 
  211          typename ExecSpace::memory_space,
 
  212          Kokkos::HostSpace>::accessible,
 
  213      typename ExecSpace::device_type,
 
  214      typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
 
  215  typedef Kokkos::LayoutLeft array_layout;
 
  216  typedef Kokkos::View<T*, array_layout, host_exec_space,
 
  217                       Kokkos::MemoryUnmanaged>
 
  220  static view_type getView(
const Teuchos::ArrayView<T>& x_in) {
 
  221    const size_t numEnt = 
static_cast<size_t>(x_in.size());
 
  225      return view_type(x_in.getRawPtr(), numEnt);
 
  230template <
class WrappedDualViewType>
 
  232takeSubview(
const WrappedDualViewType& X,
 
  233            const std::pair<size_t, size_t>& rowRng,
 
  234            const Kokkos::ALL_t& colRng)
 
  238  return WrappedDualViewType(X, rowRng, colRng);
 
  241template <
class WrappedDualViewType>
 
  243takeSubview(
const WrappedDualViewType& X,
 
  244            const Kokkos::ALL_t& rowRng,
 
  245            const std::pair<size_t, size_t>& colRng) {
 
  246  using DualViewType = 
typename WrappedDualViewType::DVT;
 
  249  if (X.extent(0) == 0 && X.extent(1) != 0) {
 
  250    return WrappedDualViewType(DualViewType(
"MV::DualView", 0, colRng.second - colRng.first));
 
  252    return WrappedDualViewType(X, rowRng, colRng);
 
  256template <
class WrappedDualViewType>
 
  258takeSubview(
const WrappedDualViewType& X,
 
  259            const std::pair<size_t, size_t>& rowRng,
 
  260            const std::pair<size_t, size_t>& colRng) {
 
  261  using DualViewType = 
typename WrappedDualViewType::DVT;
 
  271  if (X.extent(0) == 0 && X.extent(1) != 0) {
 
  272    return WrappedDualViewType(DualViewType(
"MV::DualView", 0, colRng.second - colRng.first));
 
  274    return WrappedDualViewType(X, rowRng, colRng);
 
  278template <
class WrappedOrNotDualViewType>
 
  280getDualViewStride(
const WrappedOrNotDualViewType& dv) {
 
  285  size_t strides[WrappedOrNotDualViewType::t_dev::rank + 1];
 
  287  const size_t LDA     = strides[1];
 
  288  const size_t numRows = dv.extent(0);
 
  291    return (numRows == 0) ? size_t(1) : numRows;
 
  297template <
class ViewType>
 
  299getViewStride(
const ViewType& view) {
 
  300  const size_t LDA     = view.stride(1);
 
  301  const size_t numRows = view.extent(0);
 
  304    return (numRows == 0) ? size_t(1) : numRows;
 
  310template <
class impl_scalar_type, 
class buffer_device_type>
 
  312    Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports) {
 
  313  if (!imports.need_sync_device()) {
 
  317    size_t localLengthThreshold =
 
  319    return imports.extent(0) <= localLengthThreshold;
 
  323template <
class SC, 
class LO, 
class GO, 
class NT>
 
  324bool runKernelOnHost(const ::Tpetra::MultiVector<SC, LO, GO, NT>& X) {
 
  325  if (!X.need_sync_device()) {
 
  329    size_t localLengthThreshold =
 
  331    return X.getLocalLength() <= localLengthThreshold;
 
  335template <
class SC, 
class LO, 
class GO, 
class NT>
 
  336void multiVectorNormImpl(typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
 
  338                         const ::Tpetra::Details::EWhichNorm whichNorm) {
 
  340  using ::Tpetra::Details::normImpl;
 
  342  using val_type       = 
typename MV::impl_scalar_type;
 
  343  using mag_type       = 
typename MV::mag_type;
 
  344  using dual_view_type = 
typename MV::dual_view_type;
 
  346  auto map                    = 
X.getMap();
 
  347  auto comm                   = 
map.is_null() ? 
nullptr : 
map->getComm().getRawPtr();
 
  348  auto whichVecs              = getMultiVectorWhichVectors(
X);
 
  349  const bool isConstantStride = 
X.isConstantStride();
 
  350  const bool isDistributed    = 
X.isDistributed();
 
  354    using view_type    = 
typename dual_view_type::t_host;
 
  355    using array_layout = 
typename view_type::array_layout;
 
  356    using device_type  = 
typename view_type::device_type;
 
  358    auto X_lcl = 
X.getLocalViewHost(Access::ReadOnly);
 
  359    normImpl<val_type, array_layout, device_type,
 
  361                       isConstantStride, isDistributed, comm);
 
  363    using view_type    = 
typename dual_view_type::t_dev;
 
  364    using array_layout = 
typename view_type::array_layout;
 
  365    using device_type  = 
typename view_type::device_type;
 
  367    auto X_lcl = 
X.getLocalViewDevice(Access::ReadOnly);
 
  368    normImpl<val_type, array_layout, device_type,
 
  370                       isConstantStride, isDistributed, comm);
 
  378template <
typename DstView, 
typename SrcView>
 
  379struct AddAssignFunctor {
 
  387  operator()(
const size_t k)
 const { tgt(
k) += src(
k); }
 
  394template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  400template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  405template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  418template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  424  , whichVectors_(
source.whichVectors_) {
 
  427      "MultiVector(const MultiVector&, " 
  428      "const Teuchos::DataAccess): ";
 
  436    this->
view_         = cpy.view_;
 
  441        true, std::invalid_argument,
 
  442        "Second argument 'copyOrView' has an " 
  446            << Teuchos::Copy << 
" and Teuchos::View = " 
  447            << Teuchos::View << 
".");
 
 
  451template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  463                                        std::invalid_argument,
 
  464                                        "Kokkos::DualView does not match Map. " 
  465                                        "map->getLocalNumElements() = " 
  468                                            << 
", and getStride() = " << 
LDA << 
".");
 
  470  using ::Tpetra::Details::Behavior;
 
  471  const bool debug = Behavior::debug();
 
  473    using ::Tpetra::Details::checkGlobalDualViewValidity;
 
  475    const bool verbose = Behavior::verbose();
 
  476    const auto comm    = 
map.is_null() ? Teuchos::null : 
map->getComm();
 
  478        checkGlobalDualViewValidity(&
gblErrStrm, view, verbose,
 
  484template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  496                                        std::invalid_argument,
 
  497                                        "Kokkos::DualView does not match Map. " 
  498                                        "map->getLocalNumElements() = " 
  501                                            << 
", and getStride() = " << 
LDA << 
".");
 
  503  using ::Tpetra::Details::Behavior;
 
  504  const bool debug = Behavior::debug();
 
  506    using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
 
  508    const bool verbose = Behavior::verbose();
 
  509    const auto comm    = 
map.is_null() ? Teuchos::null : 
map->getComm();
 
  511        checkGlobalWrappedDualViewValidity(&
gblErrStrm, view, verbose,
 
  517template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  520                const typename dual_view_type::t_dev& 
d_view)
 
  522  using Teuchos::ArrayRCP;
 
  531                                        "Map does not match " 
  532                                        "Kokkos::View.  map->getLocalNumElements() = " 
  534                                            << 
", View's column stride = " << 
LDA 
  535                                            << 
", and View's extent(0) = " << 
d_view.extent(0) << 
".");
 
  541  using ::Tpetra::Details::Behavior;
 
  542  const bool debug = Behavior::debug();
 
  544    using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
 
  546    const bool verbose = Behavior::verbose();
 
  547    const auto comm    = 
map.is_null() ? Teuchos::null : 
map->getComm();
 
  559template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  572      "The input Kokkos::DualView's " 
  573      "column stride LDA = " 
  575          << 
".  This may also mean that the input origView's first dimension (number " 
  577          << 
origView.extent(0) << 
") does not not match the number " 
  578                                   "of entries in the Map on the calling process.");
 
  580  using ::Tpetra::Details::Behavior;
 
  581  const bool debug = Behavior::debug();
 
  583    using ::Tpetra::Details::checkGlobalDualViewValidity;
 
  585    const bool verbose = Behavior::verbose();
 
  586    const auto comm    = 
map.is_null() ? Teuchos::null : 
map->getComm();
 
  588        checkGlobalDualViewValidity(&
gblErrStrm, view, verbose,
 
  598template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  607  using Kokkos::subview;
 
  608  const char tfecfFuncName[] = 
"MultiVector(map,view,whichVectors): ";
 
  610  using ::Tpetra::Details::Behavior;
 
  611  const bool debug = Behavior::debug();
 
  613    using ::Tpetra::Details::checkGlobalDualViewValidity;
 
  615    const bool verbose = Behavior::verbose();
 
  616    const auto comm    = 
map.is_null() ? Teuchos::null : 
map->getComm();
 
  618        checkGlobalDualViewValidity(&
gblErrStrm, view, verbose,
 
  631      view.extent(1) != 0 && 
static_cast<size_t>(view.extent(0)) < 
lclNumRows,
 
  632      std::invalid_argument, 
"view.extent(0) = " << view.extent(0) << 
" < map->getLocalNumElements() = " << 
lclNumRows << 
".");
 
  635        view.extent(1) != 0 && view.extent(1) == 0,
 
  636        std::invalid_argument,
 
  637        "view.extent(1) = 0, but whichVectors.size()" 
  641    typedef Teuchos::ArrayView<const size_t>::size_type size_type;
 
  644          whichVectors[
k] == Teuchos::OrdinalTraits<size_t>::invalid(),
 
  645          std::invalid_argument, 
"whichVectors[" << 
k << 
"] = " 
  646                                                         "Teuchos::OrdinalTraits<size_t>::invalid().");
 
  650        view.extent(1) != 0 && 
static_cast<size_t>(view.extent(1)) <= 
maxColInd,
 
  651        std::invalid_argument, 
"view.extent(1) = " << view.extent(1) << 
" <= max(whichVectors) = " << 
maxColInd << 
".");
 
  658                                        "LDA = " << 
LDA << 
" < this->getLocalLength() = " << 
lclNumRows << 
".");
 
  674    whichVectors_.clear();
 
  678template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  687  using Kokkos::subview;
 
  688  const char tfecfFuncName[] = 
"MultiVector(map,view,whichVectors): ";
 
  690  using ::Tpetra::Details::Behavior;
 
  691  const bool debug = Behavior::debug();
 
  693    using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
 
  695    const bool verbose = Behavior::verbose();
 
  696    const auto comm    = 
map.is_null() ? Teuchos::null : 
map->getComm();
 
  698        checkGlobalWrappedDualViewValidity(&
gblErrStrm, view, verbose,
 
  711      view.extent(1) != 0 && 
static_cast<size_t>(view.extent(0)) < 
lclNumRows,
 
  712      std::invalid_argument, 
"view.extent(0) = " << view.extent(0) << 
" < map->getLocalNumElements() = " << 
lclNumRows << 
".");
 
  715        view.extent(1) != 0 && view.extent(1) == 0,
 
  716        std::invalid_argument,
 
  717        "view.extent(1) = 0, but whichVectors.size()" 
  721    typedef Teuchos::ArrayView<const size_t>::size_type size_type;
 
  724          whichVectors[
k] == Teuchos::OrdinalTraits<size_t>::invalid(),
 
  725          std::invalid_argument, 
"whichVectors[" << 
k << 
"] = " 
  726                                                         "Teuchos::OrdinalTraits<size_t>::invalid().");
 
  730        view.extent(1) != 0 && 
static_cast<size_t>(view.extent(1)) <= 
maxColInd,
 
  731        std::invalid_argument, 
"view.extent(1) = " << view.extent(1) << 
" <= max(whichVectors) = " << 
maxColInd << 
".");
 
  738                                        "LDA = " << 
LDA << 
" < this->getLocalLength() = " << 
lclNumRows << 
".");
 
  754    whichVectors_.clear();
 
  758template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  768  using Kokkos::subview;
 
  769  const char tfecfFuncName[] = 
"MultiVector(map,view,origView,whichVectors): ";
 
  771  using ::Tpetra::Details::Behavior;
 
  772  const bool debug = Behavior::debug();
 
  774    using ::Tpetra::Details::checkGlobalDualViewValidity;
 
  776    const bool verbose = Behavior::verbose();
 
  777    const auto comm    = 
map.is_null() ? Teuchos::null : 
map->getComm();
 
  779        checkGlobalDualViewValidity(&
gblErrStrm, view, verbose,
 
  796      view.extent(1) != 0 && 
static_cast<size_t>(view.extent(0)) < 
lclNumRows,
 
  797      std::invalid_argument, 
"view.extent(0) = " << view.extent(0) << 
" < map->getLocalNumElements() = " << 
lclNumRows << 
".");
 
  800        view.extent(1) != 0 && view.extent(1) == 0,
 
  801        std::invalid_argument,
 
  802        "view.extent(1) = 0, but whichVectors.size()" 
  806    typedef Teuchos::ArrayView<const size_t>::size_type size_type;
 
  809          whichVectors[
k] == Teuchos::OrdinalTraits<size_t>::invalid(),
 
  810          std::invalid_argument, 
"whichVectors[" << 
k << 
"] = " 
  811                                                         "Teuchos::OrdinalTraits<size_t>::invalid().");
 
  815        view.extent(1) != 0 && 
static_cast<size_t>(view.extent(1)) <= 
maxColInd,
 
  816        std::invalid_argument, 
"view.extent(1) = " << view.extent(1) << 
" <= max(whichVectors) = " << 
maxColInd << 
".");
 
  823                                        "Map and DualView origView " 
  824                                        "do not match.  LDA = " 
  825                                            << 
LDA << 
" < this->getLocalLength() = " << 
lclNumRows << 
".  origView.extent(0) = " << 
origView.extent(0)
 
  826                                            << 
", origView.stride(1) = " << 
origView.view_device().stride(1) << 
".");
 
  846template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  849                const Teuchos::ArrayView<const Scalar>& 
data,
 
  855  const char tfecfFuncName[] = 
"MultiVector(map,data,LDA,numVecs): ";
 
  864                                                                                                    "map->getLocalNumElements() = " 
  869                                          std::invalid_argument,
 
  870                                          "Input Teuchos::ArrayView does not have enough " 
  871                                          "entries, given the input Map and number of vectors in the MultiVector." 
  873                                              << 
data.size() << 
" < (LDA*(numVecs-1)) + " 
  874                                                                "map->getLocalNumElements () = " 
  880  auto X_out = this->getLocalViewDevice(Access::OverwriteAll);
 
  890               Kokkos::MemoryUnmanaged>
 
  906    typedef decltype(Kokkos::subview(
X_out, Kokkos::ALL(), 0))
 
  908    typedef decltype(Kokkos::subview(
X_in, Kokkos::ALL(), 0))
 
  919template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  922                const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& 
ArrayOfPtrs,
 
  928  const char tfecfFuncName[] = 
"MultiVector(map,ArrayOfPtrs,numVecs): ";
 
  934                                        std::runtime_error, 
"Either numVecs (= " << 
numVecs << 
") < 1, or " 
  935                                                                                               "ArrayOfPtrs.size() (= " 
  941        std::invalid_argument, 
"ArrayOfPtrs[" << 
j << 
"].size() = " << 
X_j_av.size() << 
" < map->getLocalNumElements() = " << 
lclNumRows << 
".");
 
  949  using array_layout        = 
typename decltype(
X_out)::array_layout;
 
  953                                                    Kokkos::MemoryUnmanaged>;
 
  957    Teuchos::ArrayView<const IST> 
X_j_av =
 
  958        Teuchos::av_reinterpret_cast<const IST>(
ArrayOfPtrs[
j]);
 
 
  966template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  969  return whichVectors_.empty();
 
 
  972template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  976  if (this->getMap().
is_null()) {  
 
  977    return static_cast<size_t>(0);
 
  979    return this->getMap()->getLocalNumElements();
 
 
  983template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
  987  if (this->getMap().
is_null()) {  
 
  988    return static_cast<size_t>(0);
 
  990    return this->getMap()->getGlobalNumElements();
 
  994template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1001template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1005  auto thisData    = view_.getDualView().view_host().data();
 
 1007  size_t thisSpan  = view_.getDualView().view_host().span();
 
 
 1012template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1019  const MV* src = 
dynamic_cast<const MV*
>(&
sourceObj);
 
 1020  if (src == 
nullptr) {
 
 1029    return src->getNumVectors() == this->getNumVectors();
 
 
 1033template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1037  return this->getNumVectors();
 
 
 1040template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1043                   const size_t numSameIDs,
 
 1044                   const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& 
permuteToLIDs,
 
 1045                   const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& 
permuteFromLIDs,
 
 1048  using Kokkos::Compat::create_const_view;
 
 1049  using KokkosRefactor::Details::permute_array_multi_column;
 
 1050  using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
 
 1052  using ::Tpetra::Details::Behavior;
 
 1053  using ::Tpetra::Details::getDualViewCopyFromArrayView;
 
 1054  using ::Tpetra::Details::ProfilingRegion;
 
 1061  if (importsAreAliased() && (this->constantNumberOfPackets() != 0) && Behavior::enableGranularTransfers()) {
 
 1066    copyOnHost = !Behavior::assumeMpiIsGPUAware();
 
 1071  const char longFuncNameHost[]   = 
"Tpetra::MultiVector::copyAndPermute[Host]";
 
 1076  const bool verbose = Behavior::verbose();
 
 1077  std::unique_ptr<std::string> 
prefix;
 
 1079    auto map         = this->getMap();
 
 1080    auto comm        = 
map.is_null() ? Teuchos::null : 
map->getComm();
 
 1081    const int myRank = comm.is_null() ? -1 : comm->getRank();
 
 1082    std::ostringstream 
os;
 
 1083    os << 
"Proc " << 
myRank << 
": MV::copyAndPermute: ";
 
 1084    prefix = std::unique_ptr<std::string>(
new std::string(
os.str()));
 
 1086    std::cerr << 
os.str();
 
 1090                                        std::logic_error, 
"permuteToLIDs.extent(0) = " << 
permuteToLIDs.extent(0) << 
" != permuteFromLIDs.extent(0) = " << 
permuteFromLIDs.extent(0) << 
".");
 
 1091  const size_t numCols = this->getNumVectors();
 
 1097                                        "Input MultiVector needs sync to both host " 
 1100    std::ostringstream 
os;
 
 1102    std::cerr << 
os.str();
 
 1106    std::ostringstream 
os;
 
 1107    os << *
prefix << 
"Copy first " << numSameIDs << 
" elements" << 
endl;
 
 1108    std::cerr << 
os.str();
 
 1133  if (numSameIDs > 0) {
 
 1134    const std::pair<size_t, size_t> 
rows(0, numSameIDs);
 
 1136      ProfilingRegion 
regionC(
"Tpetra::MultiVector::copy[Host]");
 
 1137      auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
 
 1140      for (
size_t j = 0; 
j < numCols; ++
j) {
 
 1141        const size_t tgtCol = isConstantStride() ? 
j : whichVectors_[
j];
 
 1150              Kokkos::RangePolicy<execution_space, size_t>;
 
 1152          Tpetra::Details::AddAssignFunctor<
decltype(
tgt_j), 
decltype(
src_j)>
 
 1154          Kokkos::parallel_for(
rp, 
aaf);
 
 1159          space.fence(
"Tpetra::MultiVector::copyAndPermute-1");
 
 1163      ProfilingRegion 
regionC(
"Tpetra::MultiVector::copy[Device]");
 
 1164      auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
 
 1167      for (
size_t j = 0; 
j < numCols; ++
j) {
 
 1168        const size_t tgtCol = isConstantStride() ? 
j : whichVectors_[
j];
 
 1177              Kokkos::RangePolicy<execution_space, size_t>;
 
 1179          Tpetra::Details::AddAssignFunctor<
decltype(
tgt_j), 
decltype(
src_j)>
 
 1181          Kokkos::parallel_for(
rp, 
aaf);
 
 1186          space.fence(
"Tpetra::MultiVector::copyAndPermute-2");
 
 1205      std::ostringstream 
os;
 
 1207      std::cerr << 
os.str();
 
 1211  ProfilingRegion 
regionP(
"Tpetra::MultiVector::permute");
 
 1214    std::ostringstream 
os;
 
 1216    std::cerr << 
os.str();
 
 1222      !this->isConstantStride() || !
sourceMV.isConstantStride();
 
 1225    std::ostringstream 
os;
 
 1228    std::cerr << 
os.str();
 
 1235  Kokkos::DualView<const size_t*, device_type> 
srcWhichVecs;
 
 1236  Kokkos::DualView<const size_t*, device_type> 
tgtWhichVecs;
 
 1238    if (this->whichVectors_.size() == 0) {
 
 1239      Kokkos::DualView<size_t*, device_type> 
tmpTgt(
"tgtWhichVecs", numCols);
 
 1241      for (
size_t j = 0; 
j < numCols; ++
j) {
 
 1249      Teuchos::ArrayView<const size_t> 
tgtWhichVecsT = this->whichVectors_();
 
 1257                                          std::logic_error, 
"tgtWhichVecs.extent(0) = " << 
tgtWhichVecs.extent(0) << 
" != this->getNumVectors() = " << 
this->getNumVectors() << 
".");
 
 1259    if (
sourceMV.whichVectors_.size() == 0) {
 
 1260      Kokkos::DualView<size_t*, device_type> 
tmpSrc(
"srcWhichVecs", numCols);
 
 1262      for (
size_t j = 0; 
j < numCols; ++
j) {
 
 1281                                                                      << 
" != sourceMV.getNumVectors() = " << 
sourceMV.getNumVectors()
 
 1287      std::ostringstream 
os;
 
 1288      os << *
prefix << 
"Get permute LIDs on host" << std::endl;
 
 1289      std::cerr << 
os.str();
 
 1291    auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
 
 1301      std::ostringstream 
os;
 
 1303      std::cerr << 
os.str();
 
 1313        using op_type = KokkosRefactor::Details::AddOp;
 
 1314        permute_array_multi_column_variable_stride(
tgt_h, 
src_h,
 
 1321        using op_type = KokkosRefactor::Details::InsertOp;
 
 1322        permute_array_multi_column_variable_stride(
tgt_h, 
src_h,
 
 1331        using op_type = KokkosRefactor::Details::AddOp;
 
 1335        using op_type = KokkosRefactor::Details::InsertOp;
 
 1342      std::ostringstream 
os;
 
 1344      std::cerr << 
os.str();
 
 1346    auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
 
 1356      std::ostringstream 
os;
 
 1366        using op_type = KokkosRefactor::Details::AddOp;
 
 1374        using op_type = KokkosRefactor::Details::InsertOp;
 
 1375        permute_array_multi_column_variable_stride(space, 
tgt_d, 
src_d,
 
 1384        using op_type = KokkosRefactor::Details::AddOp;
 
 1388        using op_type = KokkosRefactor::Details::InsertOp;
 
 1396    std::ostringstream 
os;
 
 1398    std::cerr << 
os.str();
 
 1402template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1405    const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& 
permuteToLIDs,
 
 1406    const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& 
permuteFromLIDs,
 
 
 1412template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1415                   const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& 
exportLIDs,
 
 1416                   Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
 
 1417                   Kokkos::DualView<size_t*, buffer_device_type> ,
 
 1420  using Kokkos::Compat::create_const_view;
 
 1421  using Kokkos::Compat::getKokkosViewDeepCopy;
 
 1423  using ::Tpetra::Details::Behavior;
 
 1424  using ::Tpetra::Details::ProfilingRegion;
 
 1425  using ::Tpetra::Details::reallocDualViewIfNeeded;
 
 1432  const char longFuncNameHost[]   = 
"Tpetra::MultiVector::packAndPrepare[Host]";
 
 1450  std::unique_ptr<std::string> 
prefix;
 
 1453    auto comm        = 
map.is_null() ? Teuchos::null : 
map->getComm();
 
 1454    const int myRank = comm.is_null() ? -1 : comm->getRank();
 
 1456    os << 
"Proc " << 
myRank << 
": MV::packAndPrepare: ";
 
 1457    prefix = std::unique_ptr<std::string>(
new std::string(
os.str()));
 
 1459    std::cerr << 
os.str();
 
 1462  const size_t numCols = 
sourceMV.getNumVectors();
 
 1473      std::ostringstream 
os;
 
 1474      os << *
prefix << 
"No exports on this proc, DONE" << std::endl;
 
 1475      std::cerr << 
os.str();
 
 1498    std::ostringstream 
os;
 
 1501       << 
", exports.extent(0): " << exports.extent(0)
 
 1503    std::cerr << 
os.str();
 
 1511                                        "Input MultiVector needs sync to both host " 
 1514    std::ostringstream 
os;
 
 1516    std::cerr << 
os.str();
 
 1528    exports.clear_sync_state();
 
 1529    exports.modify_host();
 
 1536    exports.clear_sync_state();
 
 1537    exports.modify_device();
 
 1554      using KokkosRefactor::Details::pack_array_single_column;
 
 1556        std::ostringstream 
os;
 
 1557        os << *
prefix << 
"Pack numCols=1 const stride" << 
endl;
 
 1558        std::cerr << 
os.str();
 
 1562        pack_array_single_column(exports.view_host(),
 
 1569        pack_array_single_column(exports.view_device(),
 
 1577      using KokkosRefactor::Details::pack_array_single_column;
 
 1579        std::ostringstream 
os;
 
 1581        std::cerr << 
os.str();
 
 1585        pack_array_single_column(exports.view_host(),
 
 1592        pack_array_single_column(exports.view_device(),
 
 1602      using KokkosRefactor::Details::pack_array_multi_column;
 
 1604        std::ostringstream 
os;
 
 1605        os << *
prefix << 
"Pack numCols=" << numCols << 
" const stride" << 
endl;
 
 1606        std::cerr << 
os.str();
 
 1610        pack_array_multi_column(exports.view_host(),
 
 1617        pack_array_multi_column(exports.view_device(),
 
 1625      using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
 
 1628        os << *
prefix << 
"Pack numCols=" << numCols << 
" nonconst stride" 
 1635      using IST                                  = impl_scalar_type;
 
 1636      using DV                                   = Kokkos::DualView<IST*, device_type>;
 
 1637      using HES                                  = 
typename DV::t_host::execution_space;
 
 1638      using DES                                  = 
typename DV::t_dev::execution_space;
 
 1642        pack_array_multi_column_variable_stride(exports.view_host(),
 
 1650        pack_array_multi_column_variable_stride(exports.view_device(),
 
 1661    std::ostringstream 
os;
 
 1663    std::cerr << 
os.str();
 
 1667template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1670                   const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& 
exportLIDs,
 
 1671                   Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
 
 1677template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1679typename std::enable_if<std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
 
 1680                                     typename NO::device_type::memory_space>::value,
 
 1685                               const std::string* 
prefix,
 
 1686                               const bool areRemoteLIDsContiguous,
 
 1694    std::ostringstream 
os;
 
 1695    os << *
prefix << 
"Realloc (if needed) imports_ from " 
 1696       << this->imports_.extent(0) << 
" to " << 
newSize << std::endl;
 
 1697    std::cerr << 
os.str();
 
 1702  using IST = impl_scalar_type;
 
 1703  using DV  = Kokkos::DualView<IST*, Kokkos::LayoutLeft, buffer_device_type>;
 
 1715  if ((std::is_same_v<typename dual_view_type::t_dev::device_type, typename dual_view_type::t_host::device_type> ||
 
 1718      areRemoteLIDsContiguous &&
 
 1720      (getNumVectors() == 1) &&
 
 1723    reallocated   = this->imports_.view_device().data() != view_.getDualView().view_device().data() + 
offset;
 
 1725      typedef std::pair<size_t, size_t> range_type;
 
 1726      this->imports_ = 
DV(view_.getDualView(),
 
 1727                          range_type(
offset, getLocalLength()),
 
 1731        std::ostringstream 
os;
 
 1732        os << *
prefix << 
"Aliased imports_ to MV.view_" << std::endl;
 
 1733        std::cerr << 
os.str();
 
 1739    using ::Tpetra::Details::reallocDualViewIfNeeded;
 
 1741        reallocDualViewIfNeeded(this->unaliased_imports_, 
newSize, 
"imports");
 
 1743      std::ostringstream 
os;
 
 1744      os << *
prefix << 
"Finished realloc'ing unaliased_imports_" << std::endl;
 
 1745      std::cerr << 
os.str();
 
 1747    this->imports_ = this->unaliased_imports_;
 
 1752template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1754typename std::enable_if<!std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
 
 1755                                      typename NO::device_type::memory_space>::value,
 
 1760                               const std::string* 
prefix,
 
 1761                               const bool areRemoteLIDsContiguous,
 
 1766template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1770                           const std::string* 
prefix,
 
 1771                           const bool areRemoteLIDsContiguous,
 
 
 1777template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1780  return (this->imports_.view_device().data() + 
this->imports_.view_device().extent(0) ==
 
 1781          view_.getDualView().view_device().data() + view_.getDualView().view_device().extent(0));
 
 1784template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 1787                     Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
 
 1788                     Kokkos::DualView<size_t*, buffer_device_type> ,
 
 1791                     const execution_space& space) {
 
 1792  using Kokkos::Compat::getKokkosViewDeepCopy;
 
 1793  using KokkosRefactor::Details::unpack_array_multi_column;
 
 1794  using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
 
 1796  using ::Tpetra::Details::Behavior;
 
 1797  using ::Tpetra::Details::ProfilingRegion;
 
 1815  std::unique_ptr<std::string> 
prefix;
 
 1817    auto map         = this->getMap();
 
 1818    auto comm        = 
map.is_null() ? Teuchos::null : 
map->getComm();
 
 1819    const int myRank = comm.is_null() ? -1 : comm->getRank();
 
 1820    std::ostringstream 
os;
 
 1822    prefix = std::unique_ptr<std::string>(
new std::string(
os.str()));
 
 1824    std::cerr << 
os.str();
 
 1830      std::ostringstream 
os;
 
 1832      std::cerr << 
os.str();
 
 1838  if (importsAreAliased()) {
 
 1840      std::ostringstream 
os;
 
 1841      os << *
prefix << 
"Skipping unpack, since imports_ is aliased to MV.view_. Done!" << 
endl;
 
 1842      std::cerr << 
os.str();
 
 1854                                          "imports.extent(0) = " << imports.extent(0)
 
 1855                                                                 << 
" != getNumVectors() * importLIDs.extent(0) = " << 
numVecs 
 1860                                          "constantNumPackets input argument must be nonzero.");
 
 1864                                          std::runtime_error, 
"constantNumPackets must equal numVecs.");
 
 1873    if (this->imports_.need_sync_host()) this->imports_.sync_host();
 
 1875    if (this->imports_.need_sync_device()) this->imports_.sync_device();
 
 1879    std::ostringstream 
os;
 
 1882    std::cerr << 
os.str();
 
 1892  Kokkos::DualView<size_t*, device_type> 
whichVecs;
 
 1893  if (!isConstantStride()) {
 
 1894    Kokkos::View<
const size_t*, Kokkos::HostSpace,
 
 1895                 Kokkos::MemoryUnmanaged>
 
 1921    using dev_exec_space  = 
typename dual_view_type::t_dev::execution_space;
 
 1922    using host_exec_space = 
typename dual_view_type::t_host::execution_space;
 
 1928      std::ostringstream 
os;
 
 1930      std::cerr << 
os.str();
 
 1937      using op_type = KokkosRefactor::Details::InsertOp;
 
 1938      if (isConstantStride()) {
 
 1940          auto X_h = this->getLocalViewHost(Access::ReadWrite);
 
 1941          unpack_array_multi_column(host_exec_space(),
 
 1948          auto X_d = this->getLocalViewDevice(Access::ReadWrite);
 
 1949          unpack_array_multi_column(space,
 
 1957          auto X_h = this->getLocalViewHost(Access::ReadWrite);
 
 1958          unpack_array_multi_column_variable_stride(host_exec_space(),
 
 1967          auto X_d = this->getLocalViewDevice(Access::ReadWrite);
 
 1968          unpack_array_multi_column_variable_stride(space,
 
 1979      using op_type = KokkosRefactor::Details::AddOp;
 
 1980      if (isConstantStride()) {
 
 1982          auto X_h = this->getLocalViewHost(Access::ReadWrite);
 
 1983          unpack_array_multi_column(host_exec_space(),
 
 1989          auto X_d = this->getLocalViewDevice(Access::ReadWrite);
 
 1990          unpack_array_multi_column(space,
 
 1998          auto X_h = this->getLocalViewHost(Access::ReadWrite);
 
 1999          unpack_array_multi_column_variable_stride(host_exec_space(),
 
 2008          auto X_d = this->getLocalViewDevice(Access::ReadWrite);
 
 2009          unpack_array_multi_column_variable_stride(space,
 
 2020      using op_type = KokkosRefactor::Details::AbsMaxOp;
 
 2021      if (isConstantStride()) {
 
 2023          auto X_h = this->getLocalViewHost(Access::ReadWrite);
 
 2024          unpack_array_multi_column(host_exec_space(),
 
 2030          auto X_d = this->getLocalViewDevice(Access::ReadWrite);
 
 2031          unpack_array_multi_column(space,
 
 2039          auto X_h = this->getLocalViewHost(Access::ReadWrite);
 
 2040          unpack_array_multi_column_variable_stride(host_exec_space(),
 
 2049          auto X_d = this->getLocalViewDevice(Access::ReadWrite);
 
 2050          unpack_array_multi_column_variable_stride(space,
 
 2065      std::ostringstream 
os;
 
 2067      std::cerr << 
os.str();
 
 2072    std::ostringstream 
os;
 
 2074    std::cerr << 
os.str();
 
 2078template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2081                     Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
 
 2088template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2092  if (isConstantStride()) {
 
 2093    return static_cast<size_t>(view_.extent(1));
 
 2095    return static_cast<size_t>(whichVectors_.size());
 
 
 2103                const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
 
 2105  using Teuchos::REDUCE_MAX;
 
 2106  using Teuchos::REDUCE_SUM;
 
 2107  using Teuchos::reduceAll;
 
 2108  typedef typename RV::non_const_value_type 
dot_type;
 
 2128    const int nv               = 
static_cast<int>(
numVecs);
 
 2135      typename RV::non_const_type 
lclDots(Kokkos::ViewAllocateWithoutInitializing(
"tmp"), 
numVecs);
 
 2149template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2152        const Kokkos::View<dot_type*, Kokkos::HostSpace>& 
dots)
 const {
 
 2153  using Kokkos::subview;
 
 2154  using Teuchos::Comm;
 
 2155  using Teuchos::null;
 
 2157  using ::Tpetra::Details::Behavior;
 
 2159  typedef Kokkos::View<dot_type*, Kokkos::HostSpace> 
RV;
 
 2160  typedef typename dual_view_type::t_dev_const 
XMV;
 
 2165  const size_t numVecs = this->getNumVectors();
 
 2169  const size_t lclNumRows = this->getLocalLength();
 
 2171  const bool debug        = Behavior::debug();
 
 2174    const bool compat = this->getMap()->isCompatible(*(
A.getMap()));
 
 2176                                          "'*this' MultiVector is not " 
 2177                                          "compatible with the input MultiVector A.  We only test for this " 
 2189      lclNumRows != 
A.getLocalLength(), std::runtime_error,
 
 2190      "MultiVectors do not have the same local length.  " 
 2191      "this->getLocalLength() = " 
 2193                           "A.getLocalLength() = " 
 2194          << 
A.getLocalLength() << 
".");
 
 2196      numVecs != 
A.getNumVectors(), std::runtime_error,
 
 2197      "MultiVectors must have the same number of columns (vectors).  " 
 2198      "this->getNumVectors() = " 
 2200                        "A.getNumVectors() = " 
 2201          << 
A.getNumVectors() << 
".");
 
 2204      "The output array 'dots' must have the same number of entries as the " 
 2205      "number of columns (vectors) in *this and A.  dots.extent(0) = " 
 2212  auto thisView = this->getLocalViewDevice(Access::ReadOnly);
 
 2213  auto A_view   = 
A.getLocalViewDevice(Access::ReadOnly);
 
 2216                                     this->whichVectors_.getRawPtr(),
 
 2217                                     A.whichVectors_.getRawPtr(),
 
 2218                                     this->isConstantStride(), 
A.isConstantStride());
 
 2233template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2237  using ::Tpetra::Details::ProfilingRegion;
 
 2239  using dot_type = 
typename MV::dot_type;
 
 2240  ProfilingRegion 
region(
"Tpetra::multiVectorSingleColumnDot");
 
 2242  auto map = 
x.getMap();
 
 2243  Teuchos::RCP<const Teuchos::Comm<int> > comm =
 
 2244      map.is_null() ? Teuchos::null : 
map->getComm();
 
 2245  if (comm.is_null()) {
 
 2246#if KOKKOS_VERSION >= 40799 
 2247    return KokkosKernels::ArithTraits<dot_type>::zero();
 
 2249    return Kokkos::ArithTraits<dot_type>::zero();
 
 2256    const LO 
lclNumRows = 
static_cast<LO
>(std::min(
x.getLocalLength(),
 
 2257                                                   y.getLocalLength()));
 
 2259#if KOKKOS_VERSION >= 40799 
 2260    dot_type lclDot = KokkosKernels::ArithTraits<dot_type>::zero();
 
 2262    dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero();
 
 2264#if KOKKOS_VERSION >= 40799 
 2274    auto x_2d = 
x.getLocalViewDevice(Access::ReadOnly);
 
 2276    auto y_2d = 
y.getLocalViewDevice(Access::ReadOnly);
 
 2278    lclDot    = KokkosBlas::dot(
x_1d, 
y_1d);
 
 2280    if (
x.isDistributed()) {
 
 2281      using Teuchos::outArg;
 
 2282      using Teuchos::REDUCE_SUM;
 
 2283      using Teuchos::reduceAll;
 
 2293template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2296        const Teuchos::ArrayView<dot_type>& 
dots)
 const {
 
 2300  const size_t numVecs    = this->getNumVectors();
 
 2301  const size_t lclNumRows = this->getLocalLength();
 
 2302  const size_t numDots    = 
static_cast<size_t>(
dots.size());
 
 2313                                        "MultiVectors do not have the same local length.  " 
 2314                                        "this->getLocalLength() = " 
 2316                                                             "A.getLocalLength() = " 
 2317                                            << 
A.getLocalLength() << 
".");
 
 2319                                        "MultiVectors must have the same number of columns (vectors).  " 
 2320                                        "this->getNumVectors() = " 
 2322                                                          "A.getNumVectors() = " 
 2323                                            << 
A.getNumVectors() << 
".");
 
 2325                                        "The output array 'dots' must have the same number of entries as the " 
 2326                                        "number of columns (vectors) in *this and A.  dots.extent(0) = " 
 2329  if (
numVecs == 1 && this->isConstantStride() && 
A.isConstantStride()) {
 
 2333    this->dot(
A, Kokkos::View<dot_type*, Kokkos::HostSpace>(
dots.getRawPtr(), 
numDots));
 
 2337template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2339    norm2(
const Teuchos::ArrayView<mag_type>& 
norms)
 const {
 
 2341  using ::Tpetra::Details::NORM_TWO;
 
 2342  using ::Tpetra::Details::ProfilingRegion;
 
 2343  ProfilingRegion 
region(
"Tpetra::MV::norm2 (host output)");
 
 2346  MV& 
X = 
const_cast<MV&
>(*this);
 
 
 2350template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2352    norm2(
const Kokkos::View<mag_type*, Kokkos::HostSpace>& 
norms)
 const {
 
 
 2357template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2359    norm1(
const Teuchos::ArrayView<mag_type>& 
norms)
 const {
 
 2361  using ::Tpetra::Details::NORM_ONE;
 
 2362  using ::Tpetra::Details::ProfilingRegion;
 
 2363  ProfilingRegion 
region(
"Tpetra::MV::norm1 (host output)");
 
 2366  MV& 
X = 
const_cast<MV&
>(*this);
 
 
 2370template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2372    norm1(
const Kokkos::View<mag_type*, Kokkos::HostSpace>& 
norms)
 const {
 
 
 2377template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2379    normInf(
const Teuchos::ArrayView<mag_type>& 
norms)
 const {
 
 2381  using ::Tpetra::Details::NORM_INF;
 
 2382  using ::Tpetra::Details::ProfilingRegion;
 
 2383  ProfilingRegion 
region(
"Tpetra::MV::normInf (host output)");
 
 2386  MV& 
X = 
const_cast<MV&
>(*this);
 
 2390template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2392    normInf(
const Kokkos::View<mag_type*, Kokkos::HostSpace>& 
norms)
 const {
 
 
 2397template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2399    meanValue(
const Teuchos::ArrayView<impl_scalar_type>& 
means)
 const {
 
 2402  using Kokkos::subview;
 
 2403  using Teuchos::Comm;
 
 2405  using Teuchos::REDUCE_SUM;
 
 2406  using Teuchos::reduceAll;
 
 2407#if KOKKOS_VERSION >= 40799 
 2408  typedef KokkosKernels::ArithTraits<impl_scalar_type> 
ATS;
 
 2410  typedef Kokkos::ArithTraits<impl_scalar_type> 
ATS;
 
 2413  const size_t lclNumRows = this->getLocalLength();
 
 2414  const size_t numVecs    = this->getNumVectors();
 
 2419      "Tpetra::MultiVector::meanValue: means.size() = " << 
numMeans 
 2420                                                        << 
" != this->getNumVectors() = " << 
numVecs << 
".");
 
 2430                       typename local_view_type::host_mirror_type::array_layout,
 
 2432                       Kokkos::MemoryTraits<Kokkos::Unmanaged> >
 
 2436  RCP<const Comm<int> > comm = this->getMap().is_null() ? Teuchos::null : this->getMap()->getComm();
 
 2442    auto X_lcl = 
subview(getLocalViewHost(Access::ReadOnly),
 
 2445    Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> 
lclSums(
"MV::meanValue tmp", 
numVecs);
 
 2446    if (isConstantStride()) {
 
 2450        const size_t col = whichVectors_[
j];
 
 2458    if (!comm.is_null() && 
this->isDistributed()) {
 
 2467    auto X_lcl = 
subview(this->getLocalViewDevice(Access::ReadOnly),
 
 2471    Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> 
lclSums(
"MV::meanValue tmp", 
numVecs);
 
 2472    if (isConstantStride()) {
 
 2476        const size_t col = whichVectors_[
j];
 
 2493    if (!comm.is_null() && 
this->isDistributed()) {
 
 2506      ATS::one() / 
static_cast<mag_type>(this->getGlobalLength());
 
 
 2512template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2516#if KOKKOS_VERSION >= 40799 
 2517  typedef KokkosKernels::ArithTraits<IST> 
ATS;
 
 2519  typedef Kokkos::ArithTraits<IST> 
ATS;
 
 2521  typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> 
pool_type;
 
 2524  const IST 
max = Kokkos::rand<generator_type, IST>::max();
 
 2525  const IST 
min = ATS::is_signed ? IST(-
max) : ATS::zero();
 
 
 2530template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2534  typedef Tpetra::Details::Static_Random_XorShift64_Pool<typename device_type::execution_space> 
tpetra_pool_type;
 
 2535  typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> 
pool_type;
 
 2538  if (!tpetra_pool_type::isSet())
 
 2539    tpetra_pool_type::resetPool(this->getMap()->getComm()->getRank());
 
 2542  const IST 
max        = 
static_cast<IST
>(maxVal);
 
 2543  const IST 
min        = 
static_cast<IST
>(minVal);
 
 2545  auto thisView = this->getLocalViewDevice(Access::OverwriteAll);
 
 2547  if (isConstantStride()) {
 
 2550    const size_t numVecs = getNumVectors();
 
 2552      const size_t col = whichVectors_[
k];
 
 2553      auto X_k         = Kokkos::subview(
thisView, Kokkos::ALL(), col);
 
 
 2559template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2562  using ::Tpetra::Details::ProfilingRegion;
 
 2563  using ::Tpetra::Details::Blas::fill;
 
 2564  using DES = 
typename dual_view_type::t_dev::execution_space;
 
 2565  using HES = 
typename dual_view_type::t_host::execution_space;
 
 2567  ProfilingRegion 
region(
"Tpetra::MultiVector::putScalar");
 
 2572  const LO 
lclNumRows             = 
static_cast<LO
>(this->getLocalLength());
 
 2573  const LO 
numVecs                = 
static_cast<LO
>(this->getNumVectors());
 
 2581    auto X = this->getLocalViewDevice(Access::OverwriteAll);
 
 2582    if (this->isConstantStride()) {
 
 2586           this->whichVectors_.getRawPtr());
 
 2589    auto X = this->getLocalViewHost(Access::OverwriteAll);
 
 2590    if (this->isConstantStride()) {
 
 2594           this->whichVectors_.getRawPtr());
 
 
 2599template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2602  using Teuchos::ArrayRCP;
 
 2603  using Teuchos::Comm;
 
 2614      !this->isConstantStride(), std::logic_error,
 
 2615      "Tpetra::MultiVector::replaceMap: This method does not currently work " 
 2616      "if the MultiVector is a column view of another MultiVector (that is, if " 
 2617      "isConstantStride() == false).");
 
 2647  if (this->getMap().
is_null()) {  
 
 2653        newMap.is_null(), std::invalid_argument,
 
 2654        "Tpetra::MultiVector::replaceMap: both current and new Maps are null.  " 
 2655        "This probably means that the input Map is incorrect.");
 
 2661    const size_t numCols     = this->getNumVectors();
 
 2666  } 
else if (
newMap.is_null()) {  
 
 2669    const size_t newNumRows = 
static_cast<size_t>(0);
 
 2670    const size_t numCols    = this->getNumVectors();
 
 
 2677template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2684#if KOKKOS_VERSION >= 40799 
 2685  if (
theAlpha == KokkosKernels::ArithTraits<IST>::one()) {
 
 2687  if (
theAlpha == Kokkos::ArithTraits<IST>::one()) {
 
 2692  const size_t numVecs    = getNumVectors();
 
 2704    auto Y_lcl = Kokkos::subview(getLocalViewHost(Access::ReadWrite), 
rowRng, 
ALL());
 
 2705    if (isConstantStride()) {
 
 2709        const size_t Y_col = whichVectors_[
k];
 
 2715    auto Y_lcl = Kokkos::subview(getLocalViewDevice(Access::ReadWrite), 
rowRng, 
ALL());
 
 2716    if (isConstantStride()) {
 
 2720        const size_t Y_col = isConstantStride() ? 
k : whichVectors_[
k];
 
 
 2728template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2730    scale(
const Teuchos::ArrayView<const Scalar>& 
alphas) {
 
 2731  const size_t numVecs   = this->getNumVectors();
 
 2735      "Tpetra::MultiVector::" 
 2736      "scale: alphas.size() = " 
 2737          << 
numAlphas << 
" != this->getNumVectors() = " 
 2741  using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
 
 2749  this->scale(
k_alphas.view_device());
 
 
 2752template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2754    scale(
const Kokkos::View<const impl_scalar_type*, device_type>& 
alphas) {
 
 2756  using Kokkos::subview;
 
 2758  const size_t lclNumRows = this->getLocalLength();
 
 2759  const size_t numVecs    = this->getNumVectors();
 
 2762      std::invalid_argument,
 
 2763      "Tpetra::MultiVector::scale(alphas): " 
 2764      "alphas.extent(0) = " 
 2766          << 
" != this->getNumVectors () = " << 
numVecs << 
".");
 
 2787    if (isConstantStride()) {
 
 2791        const size_t Y_col = this->isConstantStride() ? 
k : this->whichVectors_[
k];
 
 2800    if (isConstantStride()) {
 
 2812        const size_t Y_col = this->isConstantStride() ? 
k : this->whichVectors_[
k];
 
 
 2820template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2825  using Kokkos::subview;
 
 2829  const size_t numVecs    = getNumVectors();
 
 2832      lclNumRows != 
A.getLocalLength(), std::invalid_argument,
 
 2833      "this->getLocalLength() = " << 
lclNumRows << 
" != A.getLocalLength() = " 
 2834                                  << 
A.getLocalLength() << 
".");
 
 2836      numVecs != 
A.getNumVectors(), std::invalid_argument,
 
 2837      "this->getNumVectors() = " << 
numVecs << 
" != A.getNumVectors() = " 
 2838                                 << 
A.getNumVectors() << 
".");
 
 2844  auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
 
 2845  auto X_lcl_orig = 
A.getLocalViewDevice(Access::ReadOnly);
 
 2849  if (isConstantStride() && 
A.isConstantStride()) {
 
 2854      const size_t Y_col = this->isConstantStride() ? 
k : this->whichVectors_[
k];
 
 2855      const size_t X_col = 
A.isConstantStride() ? 
k : 
A.whichVectors_[
k];
 
 
 2864template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2870      getLocalLength() != 
A.getLocalLength(), std::runtime_error,
 
 2871      "MultiVectors do not have the same local length.  " 
 2872      "this->getLocalLength() = " 
 2874          << 
" != A.getLocalLength() = " << 
A.getLocalLength() << 
".");
 
 2876      A.getNumVectors() != 
this->getNumVectors(), std::runtime_error,
 
 2877      ": MultiVectors do not have the same number of columns (vectors).  " 
 2878      "this->getNumVectors() = " 
 2880          << 
" != A.getNumVectors() = " << 
A.getNumVectors() << 
".");
 
 2882  const size_t numVecs = getNumVectors();
 
 2884  auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
 
 2885  auto A_view_dev    = 
A.getLocalViewDevice(Access::ReadOnly);
 
 2887  if (isConstantStride() && 
A.isConstantStride()) {
 
 2891    using Kokkos::subview;
 
 2893      const size_t this_col = isConstantStride() ? 
k : whichVectors_[
k];
 
 2895      const size_t A_col    = isConstantStride() ? 
k : 
A.whichVectors_[
k];
 
 
 2902template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2908      getLocalLength() != 
A.getLocalLength(), std::runtime_error,
 
 2909      ": MultiVectors do not have the same local length.  " 
 2910      "this->getLocalLength() = " 
 2912          << 
" != A.getLocalLength() = " << 
A.getLocalLength() << 
".");
 
 2914      A.getNumVectors() != 
this->getNumVectors(), std::runtime_error,
 
 2915      ": MultiVectors do not have the same number of columns (vectors).  " 
 2916      "this->getNumVectors() = " 
 2918          << 
" != A.getNumVectors() = " << 
A.getNumVectors() << 
".");
 
 2919  const size_t numVecs = getNumVectors();
 
 2921  auto this_view_dev = this->getLocalViewDevice(Access::ReadWrite);
 
 2922  auto A_view_dev    = 
A.getLocalViewDevice(Access::ReadOnly);
 
 2924  if (isConstantStride() && 
A.isConstantStride()) {
 
 2928    using Kokkos::subview;
 
 2931      const size_t this_col = isConstantStride() ? 
k : whichVectors_[
k];
 
 2933      const size_t A_col    = isConstantStride() ? 
k : 
A.whichVectors_[
k];
 
 
 2940template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2947  using Kokkos::subview;
 
 2952  const size_t numVecs    = getNumVectors();
 
 2956        lclNumRows != 
A.getLocalLength(), std::invalid_argument,
 
 2957        "this->getLocalLength() = " << 
lclNumRows << 
" != A.getLocalLength() = " 
 2958                                    << 
A.getLocalLength() << 
".");
 
 2960        numVecs != 
A.getNumVectors(), std::invalid_argument,
 
 2961        "this->getNumVectors() = " << 
numVecs << 
" != A.getNumVectors() = " 
 2962                                   << 
A.getNumVectors() << 
".");
 
 2970  auto Y_lcl_orig = this->getLocalViewDevice(Access::ReadWrite);
 
 2972  auto X_lcl_orig = 
A.getLocalViewDevice(Access::ReadOnly);
 
 2976  if (isConstantStride() && 
A.isConstantStride()) {
 
 2981      const size_t Y_col = this->isConstantStride() ? 
k : this->whichVectors_[
k];
 
 2982      const size_t X_col = 
A.isConstantStride() ? 
k : 
A.whichVectors_[
k];
 
 
 2991template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 2999  using Kokkos::subview;
 
 3001  const char tfecfFuncName[] = 
"update(alpha,A,beta,B,gamma): ";
 
 3005  const size_t lclNumRows = this->getLocalLength();
 
 3006  const size_t numVecs    = getNumVectors();
 
 3010        lclNumRows != 
A.getLocalLength(), std::invalid_argument,
 
 3011        "The input MultiVector A has " << 
A.getLocalLength() << 
" local " 
 3012                                                                "row(s), but this MultiVector has " 
 3016        lclNumRows != 
B.getLocalLength(), std::invalid_argument,
 
 3017        "The input MultiVector B has " << 
B.getLocalLength() << 
" local " 
 3018                                                                "row(s), but this MultiVector has " 
 3022        A.getNumVectors() != 
numVecs, std::invalid_argument,
 
 3023        "The input MultiVector A has " << 
A.getNumVectors() << 
" column(s), " 
 3024                                                               "but this MultiVector has " 
 3027        B.getNumVectors() != 
numVecs, std::invalid_argument,
 
 3028        "The input MultiVector B has " << 
B.getNumVectors() << 
" column(s), " 
 3029                                                               "but this MultiVector has " 
 3047  if (isConstantStride() && 
A.isConstantStride() && 
B.isConstantStride()) {
 
 3053      const size_t this_col = isConstantStride() ? 
k : whichVectors_[
k];
 
 3054      const size_t A_col    = 
A.isConstantStride() ? 
k : 
A.whichVectors_[
k];
 
 3055      const size_t B_col    = 
B.isConstantStride() ? 
k : 
B.whichVectors_[
k];
 
 
 3063template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3064Teuchos::ArrayRCP<const Scalar>
 
 3076  auto hostView    = getLocalViewHost(Access::ReadOnly);
 
 3077  const size_t col = isConstantStride() ? 
j : whichVectors_[
j];
 
 3080      Kokkos::Compat::persistingView(
hostView_j, 0, getLocalLength());
 
 3085                                        "hostView_j.extent(0) = " << 
hostView_j.extent(0)
 
 3086                                                                  << 
" < dataAsArcp.size() = " << 
dataAsArcp.size() << 
".  " 
 3087                                                                                                                       "Please report this bug to the Tpetra developers.");
 
 3089  return Teuchos::arcp_reinterpret_cast<const Scalar>(
dataAsArcp);
 
 
 3092template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3093Teuchos::ArrayRCP<Scalar>
 
 3097  using Kokkos::subview;
 
 3101  auto hostView    = getLocalViewHost(Access::ReadWrite);
 
 3102  const size_t col = isConstantStride() ? 
j : whichVectors_[
j];
 
 3105      Kokkos::Compat::persistingView(
hostView_j, 0, getLocalLength());
 
 3110                                        "hostView_j.extent(0) = " << 
hostView_j.extent(0)
 
 3111                                                                  << 
" < dataAsArcp.size() = " << 
dataAsArcp.size() << 
".  " 
 3112                                                                                                                       "Please report this bug to the Tpetra developers.");
 
 3114  return Teuchos::arcp_reinterpret_cast<Scalar>(
dataAsArcp);
 
 
 3117template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3118Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
 
 3120    subCopy(
const Teuchos::ArrayView<const size_t>& 
cols)
 const {
 
 3129    if (
cols[
j] != 
cols[
j - 1] + 
static_cast<size_t>(1)) {
 
 
 3144template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3145Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
 
 3152  RCP<MV> Y(
new MV(this->getMap(), 
static_cast<size_t>(
colRng.size()), 
false));
 
 
 3157template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3161  return view_.origExtent(0);
 
 
 3164template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3168  return view_.origExtent(1);
 
 
 3171template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 3174                const Teuchos::RCP<const map_type>& 
subMap,
 
 3178  using Kokkos::subview;
 
 3180  using Teuchos::outArg;
 
 3183  using Teuchos::REDUCE_MIN;
 
 3184  using Teuchos::reduceAll;
 
 3186  const char prefix[] = 
"Tpetra::MultiVector constructor (offsetView): ";
 
 3187  const char suffix[] = 
"Please report this bug to the Tpetra developers.";
 
 3190  std::unique_ptr<std::ostringstream> 
errStrm;
 
 3197  const auto comm = 
subMap->getComm();
 
 3199  const int myRank = comm->getRank();
 
 3202  const LO numCols          = 
static_cast<LO
>(
X.getNumVectors());
 
 3205    std::ostringstream 
os;
 
 3208       << 
", origLclNumRows: " << 
X.getOrigNumLocalRows()
 
 3209       << 
", numCols: " << numCols << 
"}, " 
 3211    std::cerr << 
os.str();
 
 3219    errStrm = std::unique_ptr<std::ostringstream>(
new std::ostringstream);
 
 3220    *
errStrm << 
"  Proc " << 
myRank << 
": subMap->getLocalNumElements() (=" 
 3222             << 
") > X.getOrigNumLocalRows() (=" << 
X.getOrigNumLocalRows()
 
 3240  using range_type = std::pair<LO, LO>;
 
 3252      newView.extent(1) != 
X.view_.extent(1)) {
 
 3264      if (
errStrm.get() == 
nullptr) {
 
 3265        errStrm = std::unique_ptr<std::ostringstream>(
new std::ostringstream);
 
 3274                                "dimensions on one or more processes:" 
 3286    std::ostringstream 
os;
 
 3288    std::cerr << 
os.str();
 
 3294    std::ostringstream 
os;
 
 3296    std::cerr << 
os.str();
 
 3300template <
class Scalar, 
class LO, 
class GO, 
class Node>
 
 3308template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3309Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
 
 3312               const size_t offset)
 const {
 
 
 3317template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3318Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
 
 3326template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3327Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
 
 3329    subView(
const Teuchos::ArrayView<const size_t>& 
cols)
 const {
 
 3330  using Teuchos::Array;
 
 3337      "Tpetra::MultiVector::subView" 
 3338      "(const Teuchos::ArrayView<const size_t>&): The input array cols must " 
 3339      "contain at least one entry, but cols.size() = " 
 3347    if (
cols[
j] != 
cols[
j - 1] + 
static_cast<size_t>(1)) {
 
 3362  if (isConstantStride()) {
 
 3363    return rcp(
new MV(this->getMap(), view_, 
cols));
 
 3369    return rcp(
new MV(this->getMap(), view_, 
newcols()));
 
 
 3373template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3374Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
 
 3378  using Kokkos::subview;
 
 3379  using Teuchos::Array;
 
 3382  using ::Tpetra::Details::Behavior;
 
 3386  const size_t lclNumRows = this->getLocalLength();
 
 3387  const size_t numVecs    = this->getNumVectors();
 
 3392      static_cast<size_t>(
colRng.size()) > 
numVecs, std::runtime_error,
 
 3393      "colRng.size() = " << 
colRng.size() << 
" > this->getNumVectors() = " 
 3397          (
colRng.lbound() < 
static_cast<Teuchos::Ordinal
>(0) ||
 
 3399      std::invalid_argument, 
"Nonempty input range [" << 
colRng.lbound() << 
"," << 
colRng.ubound() << 
"] exceeds the valid range of column indices " 
 3414  if (
colRng.size() == 0) {
 
 3415    const std::pair<size_t, size_t> 
cols(0, 0);  
 
 3420    if (isConstantStride()) {
 
 3421      const std::pair<size_t, size_t> 
cols(
colRng.lbound(),
 
 3426      if (
static_cast<size_t>(
colRng.size()) == 
static_cast<size_t>(1)) {
 
 3429        const std::pair<size_t, size_t> col(whichVectors_[0] + 
colRng.lbound(),
 
 3430                                            whichVectors_[0] + 
colRng.ubound() + 1);
 
 3435                            whichVectors_.begin() + 
colRng.ubound() + 1);
 
 3441  const bool debug = Behavior::debug();
 
 3443    using Teuchos::Comm;
 
 3444    using Teuchos::outArg;
 
 3445    using Teuchos::REDUCE_MIN;
 
 3446    using Teuchos::reduceAll;
 
 3448    RCP<const Comm<int> > comm = this->getMap().is_null() ? Teuchos::null : this->getMap()->getComm();
 
 3449    if (!comm.is_null()) {
 
 3453      if (
X_ret.is_null()) {
 
 3458                                            "X_ret (the subview of this " 
 3459                                            "MultiVector; the return value of this method) is null on some MPI " 
 3460                                            "process in this MultiVector's communicator.  This should never " 
 3461                                            "happen.  Please report this bug to the Tpetra developers.");
 
 3462      if (!
X_ret.is_null() &&
 
 3463          X_ret->getNumVectors() != 
static_cast<size_t>(
colRng.size())) {
 
 3469                                            "X_ret->getNumVectors() != " 
 3470                                            "colRng.size(), on at least one MPI process in this MultiVector's " 
 3471                                            "communicator.  This should never happen.  " 
 3472                                            "Please report this bug to the Tpetra developers.");
 
 
 3478template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3479Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
 
 3483  return Teuchos::rcp_const_cast<MV>(this->subView(
cols));
 
 
 3486template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3487Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
 
 3491  return Teuchos::rcp_const_cast<MV>(this->subView(
colRng));
 
 
 3494template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3499  using Kokkos::subview;
 
 3500  typedef std::pair<size_t, size_t> range_type;
 
 3501  const char tfecfFuncName[] = 
"MultiVector(const MultiVector&, const size_t): ";
 
 3503  const size_t numCols = 
X.getNumVectors();
 
 3505  const size_t jj = 
X.isConstantStride() ? 
static_cast<size_t>(
j) : 
static_cast<size_t>(
X.whichVectors_[
j]);
 
 3517    const size_t newSize = 
X.imports_.extent(0) / numCols;
 
 3526    const size_t newSize = 
X.exports_.extent(0) / numCols;
 
 
 3541template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3542Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
 
 3546  return Teuchos::rcp(
new V(*
this, 
j));
 
 
 3549template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3550Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
 
 3554  return Teuchos::rcp(
new V(*
this, 
j));
 
 
 3557template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3559    get1dCopy(
const Teuchos::ArrayView<Scalar>& 
A, 
const size_t LDA)
 const {
 
 3561  using input_view_type      = Kokkos::View<IST**, Kokkos::LayoutLeft,
 
 3563                                       Kokkos::MemoryUnmanaged>;
 
 3566  const size_t numRows = this->getLocalLength();
 
 3567  const size_t numCols = this->getNumVectors();
 
 3570                                        "LDA = " << 
LDA << 
" < numRows = " << 
numRows << 
".");
 
 3574                                        "A.size() = " << 
A.size() << 
", but its size must be at least " 
 3575                                                      << (
LDA * (numCols - 1) + 
numRows) << 
" to hold all the entries.");
 
 3578  const std::pair<size_t, size_t> 
colRange(0, numCols);
 
 3580  input_view_type 
A_view_orig(
reinterpret_cast<IST*
>(
A.getRawPtr()),
 
 3595  if (this->need_sync_host() && this->need_sync_device()) {
 
 3598    throw std::runtime_error(
"Tpetra::MultiVector: A non-const view is alive outside and we cannot give a copy where host or device view can be modified outside");
 
 3600    const bool useHostView = view_.host_view_use_count() >= view_.device_view_use_count();
 
 3601    if (this->isConstantStride()) {
 
 3603        auto srcView_host = this->getLocalViewHost(Access::ReadOnly);
 
 3607        auto srcView_device = this->getLocalViewDevice(Access::ReadOnly);
 
 3612      for (
size_t j = 0; 
j < numCols; ++
j) {
 
 3613        const size_t srcCol = this->whichVectors_[
j];
 
 3617          auto srcView_host    = this->getLocalViewHost(Access::ReadOnly);
 
 3622          auto srcView_device    = this->getLocalViewDevice(Access::ReadOnly);
 
 
 3632template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3637  const size_t numRows       = this->getLocalLength();
 
 3638  const size_t numCols       = this->getNumVectors();
 
 3641      static_cast<size_t>(
ArrayOfPtrs.size()) != numCols,
 
 3643      "Input array of pointers must contain as many " 
 3644      "entries (arrays) as the MultiVector has columns.  ArrayOfPtrs.size() = " 
 3645          << 
ArrayOfPtrs.size() << 
" != getNumVectors() = " << numCols << 
".");
 
 3647  if (
numRows != 0 && numCols != 0) {
 
 3649    for (
size_t j = 0; 
j < numCols; ++
j) {
 
 3652          dstLen < 
numRows, std::invalid_argument, 
"Array j = " << 
j << 
" of " 
 3653                                                                        "the input array of arrays is not long enough to fit all entries in " 
 3654                                                                        "that column of the MultiVector.  ArrayOfPtrs[j].size() = " 
 3659    for (
size_t j = 0; 
j < numCols; ++
j) {
 
 3660      Teuchos::RCP<const V> 
X_j = this->getVector(
j);
 
 
 3667template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3668Teuchos::ArrayRCP<const Scalar>
 
 3671  if (getLocalLength() == 0 || getNumVectors() == 0) {
 
 3672    return Teuchos::null;
 
 3675        !isConstantStride(), std::runtime_error,
 
 3676        "Tpetra::MultiVector::" 
 3677        "get1dView: This MultiVector does not have constant stride, so it is " 
 3678        "not possible to view its data as a single array.  You may check " 
 3679        "whether a MultiVector has constant stride by calling " 
 3680        "isConstantStride().");
 
 3684    auto X_lcl = getLocalViewHost(Access::ReadOnly);
 
 3685    Teuchos::ArrayRCP<const impl_scalar_type> 
dataAsArcp =
 
 3686        Kokkos::Compat::persistingView(
X_lcl);
 
 3687    return Teuchos::arcp_reinterpret_cast<const Scalar>(
dataAsArcp);
 
 
 3691template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3692Teuchos::ArrayRCP<Scalar>
 
 3695  if (this->getLocalLength() == 0 || this->getNumVectors() == 0) {
 
 3696    return Teuchos::null;
 
 3699                               "Tpetra::MultiVector::" 
 3700                               "get1dViewNonConst: This MultiVector does not have constant stride, " 
 3701                               "so it is not possible to view its data as a single array.  You may " 
 3702                               "check whether a MultiVector has constant stride by calling " 
 3703                               "isConstantStride().");
 
 3704    auto X_lcl = getLocalViewHost(Access::ReadWrite);
 
 3705    Teuchos::ArrayRCP<impl_scalar_type> 
dataAsArcp =
 
 3706        Kokkos::Compat::persistingView(
X_lcl);
 
 3707    return Teuchos::arcp_reinterpret_cast<Scalar>(
dataAsArcp);
 
 
 3711template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3712Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
 
 3715  auto X_lcl = getLocalViewHost(Access::ReadWrite);
 
 3721  const size_t myNumRows = this->getLocalLength();
 
 3722  const size_t numCols   = this->getNumVectors();
 
 3725  Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > 
views(numCols);
 
 3726  for (
size_t j = 0; 
j < numCols; ++
j) {
 
 3727    const size_t col = this->isConstantStride() ? 
j : this->whichVectors_[
j];
 
 3730        Kokkos::Compat::persistingView(
X_lcl_j);
 
 
 3736template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3740  return view_.getHostView(
s);
 
 
 3743template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3747  return view_.getHostView(
s);
 
 
 3750template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3754  return view_.getHostView(
s);
 
 
 3757template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3761  return view_.getDeviceView(
s);
 
 
 3764template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3768  return view_.getDeviceView(
s);
 
 
 3771template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3775  return view_.getDeviceView(
s);
 
 
 3778template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3785template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3786Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
 
 3792  auto X_lcl = getLocalViewHost(Access::ReadOnly);
 
 3798  const size_t myNumRows = this->getLocalLength();
 
 3799  const size_t numCols   = this->getNumVectors();
 
 3802  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > 
views(numCols);
 
 3803  for (
size_t j = 0; 
j < numCols; ++
j) {
 
 3804    const size_t col = this->isConstantStride() ? 
j : this->whichVectors_[
j];
 
 3806    Teuchos::ArrayRCP<const impl_scalar_type> 
X_lcl_j_arcp =
 
 3807        Kokkos::Compat::persistingView(
X_lcl_j);
 
 
 3813template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 3822  using Teuchos::CONJ_TRANS;
 
 3823  using Teuchos::NO_TRANS;
 
 3826  using Teuchos::rcpFromRef;
 
 3827  using Teuchos::TRANS;
 
 3828  using ::Tpetra::Details::ProfilingRegion;
 
 3829#if KOKKOS_VERSION >= 40799 
 3830  using ATS = KokkosKernels::ArithTraits<impl_scalar_type>;
 
 3832  using ATS = Kokkos::ArithTraits<impl_scalar_type>;
 
 3835  using STS                  = Teuchos::ScalarTraits<Scalar>;
 
 3838  ProfilingRegion 
region(
"Tpetra::MV::multiply");
 
 3870  const bool C_is_local = !this->isDistributed();
 
 3880    auto myMap = this->getMap();
 
 3881    if (!
myMap.is_null() && !
myMap->getComm().is_null()) {
 
 3882      using Teuchos::outArg;
 
 3883      using Teuchos::REDUCE_MIN;
 
 3884      using Teuchos::reduceAll;
 
 3886      auto comm = 
myMap->getComm();
 
 3899      const int myRank = comm->getRank();
 
 3901      if (this->getLocalLength() != 
A_nrows) {
 
 3903                << this->getLocalLength() << 
" != A_nrows=" << 
A_nrows 
 3904                << 
"." << std::endl;
 
 3906      if (this->getNumVectors() != 
B_ncols) {
 
 3908                << this->getNumVectors() << 
" != B_ncols=" << 
B_ncols 
 3909                << 
"." << std::endl;
 
 3914                << 
"." << std::endl;
 
 3921        std::ostringstream 
os;
 
 3925                                              "Inconsistent local dimensions on at " 
 3926                                              "least one process in this object's communicator." 
 3929                                                  << 
"C(" << (
C_is_local ? 
"local" : 
"distr") << 
") = " 
 3931                                                  << (
transA == Teuchos::TRANS ? 
"^T" : (
transA == Teuchos::CONJ_TRANS ? 
"^H" : 
""))
 
 3932                                                  << 
"(" << (
A_is_local ? 
"local" : 
"distr") << 
") * " 
 3934                                                  << (
transA == Teuchos::TRANS ? 
"^T" : (
transA == Teuchos::CONJ_TRANS ? 
"^H" : 
""))
 
 3935                                                  << 
"(" << (
B_is_local ? 
"local" : 
"distr") << 
")." << std::endl
 
 3936                                                  << 
"Global dimensions: C(" << this->getGlobalLength() << 
", " 
 3937                                                  << this->getNumVectors() << 
"), A(" << 
A.getGlobalLength()
 
 3938                                                  << 
", " << 
A.getNumVectors() << 
"), B(" << 
B.getGlobalLength()
 
 3939                                                  << 
", " << 
B.getNumVectors() << 
")." << std::endl
 
 3957                                        "Multiplication of op(A) and op(B) into *this is not a " 
 3958                                        "supported use case.");
 
 3966    const int myRank = this->getMap()->getComm()->getRank();
 
 3977  if (!isConstantStride()) {
 
 3978    C_tmp = 
rcp(
new MV(*
this, Teuchos::Copy));  
 
 3984  if (!
A.isConstantStride()) {
 
 3991  if (!
B.isConstantStride()) {
 
 3998                                            !
A_tmp->isConstantStride(),
 
 4000                                        "Failed to make temporary constant-stride copies of MultiVectors.");
 
 4005    auto A_lcl            = 
A_tmp->getLocalViewDevice(Access::ReadOnly);
 
 4006    auto A_sub            = Kokkos::subview(A_lcl,
 
 4012    auto B_lcl            = 
B_tmp->getLocalViewDevice(Access::ReadOnly);
 
 4013    auto B_sub            = Kokkos::subview(B_lcl,
 
 4020    auto C_lcl         = 
C_tmp->getLocalViewDevice(Access::ReadWrite);
 
 4024    const char ctransA = (
transA == Teuchos::NO_TRANS ? 
'N' : (
transA == Teuchos::TRANS ? 
'T' : 
'C'));
 
 4025    const char ctransB = (
transB == Teuchos::NO_TRANS ? 
'N' : (
transB == Teuchos::TRANS ? 
'T' : 
'C'));
 
 4028    ProfilingRegion 
regionGemm(
"Tpetra::MV::multiply-call-gemm");
 
 4034  if (!isConstantStride()) {
 
 4039  A_tmp = Teuchos::null;
 
 4040  B_tmp = Teuchos::null;
 
 
 4048template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4055  using Kokkos::subview;
 
 4058  const size_t lclNumRows = this->getLocalLength();
 
 4059  const size_t numVecs    = this->getNumVectors();
 
 4062                                        std::runtime_error, 
"MultiVectors do not have the same local length.");
 
 4064      numVecs != 
B.getNumVectors(), std::runtime_error,
 
 4065      "this->getNumVectors" 
 4067          << 
numVecs << 
" != B.getNumVectors() = " << 
B.getNumVectors()
 
 4070  auto this_view = this->getLocalViewDevice(Access::ReadWrite);
 
 4071  auto A_view    = 
A.getLocalViewDevice(Access::ReadOnly);
 
 4072  auto B_view    = 
B.getLocalViewDevice(Access::ReadOnly);
 
 4074  if (isConstantStride() && 
B.isConstantStride()) {
 
 4087      const size_t C_col = isConstantStride() ? 
j : whichVectors_[
j];
 
 4088      const size_t B_col = 
B.isConstantStride() ? 
j : 
B.whichVectors_[
j];
 
 
 4098template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4101  using ::Tpetra::Details::allReduceView;
 
 4102  using ::Tpetra::Details::ProfilingRegion;
 
 4103  ProfilingRegion 
region(
"Tpetra::MV::reduce");
 
 4105  const auto map = this->getMap();
 
 4106  if (
map.get() == 
nullptr) {
 
 4109  const auto comm = 
map->getComm();
 
 4110  if (comm.get() == 
nullptr) {
 
 4121    Kokkos::fence(
"MultiVector::reduce");  
 
 4122    auto X_lcl = this->getLocalViewDevice(Access::ReadWrite);
 
 4125    auto X_lcl = this->getLocalViewHost(Access::ReadWrite);
 
 
 4130template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4141        "Tpetra::MultiVector::replaceLocalValue: row index " << 
lclRow 
 4142                                                             << 
" is invalid.  The range of valid row indices on this process " 
 4143                                                             << this->getMap()->getComm()->getRank() << 
" is [" << 
minLocalIndex 
 4146        vectorIndexOutOfRange(col),
 
 4148        "Tpetra::MultiVector::replaceLocalValue: vector index " << col
 
 4149                                                                << 
" of the multivector is invalid.");
 
 4152  auto view            = this->getLocalViewHost(Access::ReadWrite);
 
 4153  const size_t colInd  = isConstantStride() ? col : whichVectors_[col];
 
 
 4157template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4162                      const bool atomic) {
 
 4169        "Tpetra::MultiVector::sumIntoLocalValue: row index " << 
lclRow 
 4170                                                             << 
" is invalid.  The range of valid row indices on this process " 
 4171                                                             << this->getMap()->getComm()->getRank() << 
" is [" << 
minLocalIndex 
 4174        vectorIndexOutOfRange(col),
 
 4176        "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
 
 4177                                                                << 
" of the multivector is invalid.");
 
 4180  const size_t colInd = isConstantStride() ? col : whichVectors_[col];
 
 4182  auto view = this->getLocalViewHost(Access::ReadWrite);
 
 
 4190template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4203                                          "Global row index " << 
gblRow << 
" is not present on this process " 
 4204                                                              << this->getMap()->getComm()->getRank() << 
".");
 
 4206                                          "Vector index " << col << 
" of the MultiVector is invalid.");
 
 
 4212template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4217                       const bool atomic) {
 
 4224        lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
 
 4226        "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << 
globalRow 
 4227                                                                     << 
" is not present on this process " 
 4228                                                                     << this->getMap()->getComm()->getRank() << 
".");
 
 4230        vectorIndexOutOfRange(col),
 
 4232        "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
 
 4233                                                                 << 
" of the multivector is invalid.");
 
 4236  this->sumIntoLocalValue(
lclRow, col, value, atomic);
 
 
 4239template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4246                           typename dual_view_type::array_layout,
 
 4249  const size_t col = isConstantStride() ? 
j : whichVectors_[
j];
 
 4251      Kokkos::subview(view_, Kokkos::ALL(), col);
 
 4252  return Kokkos::Compat::persistingView(
X_col.view_device());
 
 
 4255template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4258  return view_.need_sync_host();
 
 
 4261template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4264  return view_.need_sync_device();
 
 
 4267template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4271  using Teuchos::TypeNameTraits;
 
 4273  std::ostringstream 
out;
 
 4278      << 
", Node" << Node::name()
 
 4283  out << 
", numRows: " << this->getGlobalLength();
 
 4285    out << 
", numCols: " << this->getNumVectors()
 
 4286        << 
", isConstantStride: " << this->isConstantStride();
 
 4288  if (this->isConstantStride()) {
 
 4289    out << 
", columnStride: " << this->getStride();
 
 
 4296template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4300  return this->descriptionImpl(
"Tpetra::MultiVector");
 
 
 4304template <
typename ViewType>
 
 4305void print_vector(Teuchos::FancyOStream& 
out, 
const char* 
prefix, 
const ViewType& 
v) {
 
 4307  static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace, typename ViewType::memory_space>::accessible,
 
 4308                "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a host-accessible view.");
 
 4309  static_assert(ViewType::rank == 2,
 
 4310                "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a rank-2 view.");
 
 4313  out << 
"Values(" << 
prefix << 
"): " << std::endl
 
 4315  const size_t numRows = 
v.extent(0);
 
 4316  const size_t numCols = 
v.extent(1);
 
 4326      for (
size_t j = 0; 
j < numCols; ++
j) {
 
 4328        if (
j + 1 < numCols) {
 
 4341template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4348  if (
vl <= Teuchos::VERB_LOW) {
 
 4349    return std::string();
 
 4351  auto map = this->getMap();
 
 4352  if (
map.is_null()) {
 
 4353    return std::string();
 
 4355  auto outStringP            = Teuchos::rcp(
new std::ostringstream());
 
 4357  Teuchos::FancyOStream& 
out = *
outp;
 
 4358  auto comm                  = 
map->getComm();
 
 4359  const int myRank           = comm->getRank();
 
 4360  const int numProcs         = comm->getSize();
 
 4366  const LO 
lclNumRows = 
static_cast<LO
>(this->getLocalLength());
 
 4369  if (
vl > Teuchos::VERB_MEDIUM) {
 
 4372    if (this->getNumVectors() != 
static_cast<size_t>(1)) {
 
 4373      out << 
"isConstantStride: " << this->isConstantStride() << 
endl;
 
 4375    if (this->isConstantStride()) {
 
 4376      out << 
"Column stride: " << this->getStride() << 
endl;
 
 4387      auto X_dev  = view_.getDeviceCopy();
 
 4388      auto X_host = view_.getHostCopy();
 
 4392        Details::print_vector(
out, 
"unified", 
X_host);
 
 4394        Details::print_vector(
out, 
"host", 
X_host);
 
 4395        Details::print_vector(
out, 
"dev", 
X_dev);
 
 
 4403template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4407                 const Teuchos::EVerbosityLevel 
verbLevel)
 const {
 
 4409  using Teuchos::TypeNameTraits;
 
 4410  using Teuchos::VERB_DEFAULT;
 
 4411  using Teuchos::VERB_LOW;
 
 4412  using Teuchos::VERB_NONE;
 
 4413  const Teuchos::EVerbosityLevel 
vl =
 
 4423  auto map = this->getMap();
 
 4424  if (
map.is_null()) {
 
 4427  auto comm = 
map->getComm();
 
 4428  if (comm.is_null()) {
 
 4432  const int myRank = comm->getRank();
 
 4441  Teuchos::RCP<Teuchos::OSTab> 
tab0, 
tab1;
 
 4445    tab0 = Teuchos::rcp(
new Teuchos::OSTab(
out));
 
 4447    tab1 = Teuchos::rcp(
new Teuchos::OSTab(
out));
 
 4449      out << 
"Template parameters:" << 
endl;
 
 4454          << 
"Node: " << Node::name() << 
endl;
 
 4459    out << 
"Global number of rows: " << this->getGlobalLength() << 
endl;
 
 4461      out << 
"Number of columns: " << this->getNumVectors() << 
endl;
 
 4469    const std::string 
lclStr = this->localDescribeToString(
vl);
 
 
 4474template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4477             const Teuchos::EVerbosityLevel 
verbLevel)
 const {
 
 4478  this->describeImpl(
out, 
"Tpetra::MultiVector", 
verbLevel);
 
 
 4481template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4487template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4490  using ::Tpetra::Details::localDeepCopy;
 
 4491  const char prefix[] = 
"Tpetra::MultiVector::assign: ";
 
 4494                                 this->getNumVectors() != src.getNumVectors(),
 
 4495                             std::invalid_argument,
 
 4496                             prefix << 
"Global dimensions of the two Tpetra::MultiVector " 
 4497                                       "objects do not match.  src has dimensions [" 
 4498                                    << src.getGlobalLength()
 
 4499                                    << 
"," << src.getNumVectors() << 
"], and *this has dimensions [" 
 4500                                    << 
this->getGlobalLength() << 
"," << 
this->getNumVectors() << 
"].");
 
 4503                             prefix << 
"The local row counts of the two Tpetra::MultiVector " 
 4504                                       "objects do not match.  src has " 
 4505                                    << src.getLocalLength() << 
" row(s) " 
 4506                                    << 
" and *this has " << 
this->getLocalLength() << 
" row(s).");
 
 4521    localDeepCopy(this->getLocalViewHost(Access::ReadWrite),
 
 4522                  src.getLocalViewHost(Access::ReadOnly),
 
 4523                  this->isConstantStride(),
 
 4524                  src.isConstantStride(),
 
 4525                  this->whichVectors_(),
 
 4526                  src.whichVectors_());
 
 4528    localDeepCopy(this->getLocalViewDevice(Access::ReadWrite),
 
 4529                  src.getLocalViewDevice(Access::ReadOnly),
 
 4530                  this->isConstantStride(),
 
 4531                  src.isConstantStride(),
 
 4532                  this->whichVectors_(),
 
 4533                  src.whichVectors_());
 
 
 4537template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4539Teuchos::RCP<MultiVector<T, LocalOrdinal, GlobalOrdinal, Node> >
 
 4545                                                                                  this->getNumVectors(),
 
 
 4551template <
class Scalar, 
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
 4554  using ::Tpetra::Details::PackTraits;
 
 4556  const size_t l1 = this->getLocalLength();
 
 4557  const size_t l2 = 
vec.getLocalLength();
 
 4558  if ((
l1 != 
l2) || (this->getNumVectors() != 
vec.getNumVectors())) {
 
 
 4565template <
class ST, 
class LO, 
class GO, 
class NT>
 
 4568  std::swap(
mv.map_, 
this->map_);
 
 4569  std::swap(
mv.view_, 
this->view_);
 
 4570  std::swap(
mv.whichVectors_, 
this->whichVectors_);
 
 
 4573#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS 
 4574template <
class ST, 
class LO, 
class GO, 
class NT>
 
 4576               const Teuchos::SerialDenseMatrix<int, ST>& src) {
 
 4577  using ::Tpetra::Details::localDeepCopy;
 
 4579  using IST = 
typename MV::impl_scalar_type;
 
 4580  using input_view_type =
 
 4581      Kokkos::View<
const IST**, Kokkos::LayoutLeft,
 
 4582                   Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
 
 4583  using pair_type = std::pair<LO, LO>;
 
 4585  const LO 
numRows = 
static_cast<LO
>(src.numRows());
 
 4586  const LO numCols = 
static_cast<LO
>(src.numCols());
 
 4589                                 numCols != 
static_cast<LO
>(dst.getNumVectors()),
 
 4590                             std::invalid_argument, 
"Tpetra::deep_copy: On Process " << dst.getMap()->getComm()->getRank() << 
", dst is " << dst.getLocalLength() << 
" x " << dst.getNumVectors() << 
", but src is " << 
numRows << 
" x " << numCols << 
".");
 
 4592  const IST* 
src_raw = 
reinterpret_cast<const IST*
>(src.values());
 
 4599  localDeepCopy(dst.getLocalViewHost(Access::ReadWrite),
 
 4601                dst.isConstantStride(),
 
 4603                getMultiVectorWhichVectors(dst),
 
 4607template <
class ST, 
class LO, 
class GO, 
class NT>
 
 4608void deep_copy(Teuchos::SerialDenseMatrix<int, ST>& dst,
 
 4610  using ::Tpetra::Details::localDeepCopy;
 
 4612  using IST = 
typename MV::impl_scalar_type;
 
 4613  using output_view_type =
 
 4614      Kokkos::View<IST**, Kokkos::LayoutLeft,
 
 4615                   Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
 
 4616  using pair_type = std::pair<LO, LO>;
 
 4618  const LO 
numRows = 
static_cast<LO
>(dst.numRows());
 
 4619  const LO numCols = 
static_cast<LO
>(dst.numCols());
 
 4623                             std::invalid_argument, 
"Tpetra::deep_copy: On Process " << src.
getMap()->getComm()->getRank() << 
", src is " << src.
getLocalLength() << 
" x " << src.
getNumVectors() << 
", but dst is " << 
numRows << 
" x " << numCols << 
".");
 
 4625  IST* 
dst_raw = 
reinterpret_cast<IST*
>(dst.values());
 
 4639                getMultiVectorWhichVectors(src));
 
 4643template <
class Scalar, 
class LO, 
class GO, 
class NT>
 
 4644Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
 
 4651template <
class ST, 
class LO, 
class GO, 
class NT>
 
 4655  MV 
cpy(src.getMap(), src.getNumVectors(), 
false);
 
 
 4668#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS 
 4669#define TPETRA_MULTIVECTOR_INSTANT(SCALAR, LO, GO, NODE)                                                                                             \ 
 4670  template class MultiVector<SCALAR, LO, GO, NODE>;                                                                                                  \ 
 4671  template MultiVector<SCALAR, LO, GO, NODE> createCopy(const MultiVector<SCALAR, LO, GO, NODE>& src);                                               \ 
 4672  template Teuchos::RCP<MultiVector<SCALAR, LO, GO, NODE> > createMultiVector(const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \ 
 4673  template void deep_copy(MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src);                               \ 
 4674  template void deep_copy(Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src); 
 4677#define TPETRA_MULTIVECTOR_INSTANT(SCALAR, LO, GO, NODE) \ 
 4678  template class MultiVector<SCALAR, LO, GO, NODE>;      \ 
 4679  template MultiVector<SCALAR, LO, GO, NODE> createCopy(const MultiVector<SCALAR, LO, GO, NODE>& src); 
 4683#define TPETRA_MULTIVECTOR_CONVERT_INSTANT(SO, SI, LO, GO, NODE) \ 
 4685  template Teuchos::RCP<MultiVector<SO, LO, GO, NODE> >          \ 
 4686  MultiVector<SI, LO, GO, NODE>::convert<SO>() const; 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Functions for initializing and finalizing Tpetra.
 
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
 
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
 
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
 
All-reduce a 1-D or 2-D Kokkos::View.
 
Declaration of functions for checking whether a given pointer is accessible from a given Kokkos execu...
 
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
 
Declaration of a function that prints strings from each process.
 
Declaration of Tpetra::Details::isInterComm.
 
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
 
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
 
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
 
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const GlobalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::padding_type &padding, const bool unpack_pids, const int myRank, const bool verbose)
Perform the unpack operation for the graph.
 
Stand-alone utility functions and macros.
 
Struct that holds views of the contents of a CrsMatrix.
 
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
 
static bool debug()
Whether Tpetra is in debug mode.
 
static bool verbose()
Whether Tpetra is in verbose mode.
 
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
 
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
 
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Number of packets to receive for each receive operation.
 
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
 
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
 
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Number of packets to send for each send operation.
 
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
 
A parallel distribution of indices over processes.
 
One or more distributed dense vectors.
 
void reciprocal(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise reciprocal values of input Multi-vector in target, this(i,j) = 1/A(i,...
 
void normInf(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the infinity-norm of each vector (column), storing the result in a host View.
 
void get1dCopy(const Teuchos::ArrayView< Scalar > &A, const size_t LDA) const
Fill the given array with a copy of this multivector's local values.
 
size_t getStride() const
Stride between columns in the multivector.
 
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
 
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool areRemoteLIDsContiguous=false, const CombineMode CM=INSERT) override
Reallocate imports_ if needed.
 
void reduce()
Sum values of a locally replicated multivector across all processes.
 
virtual bool checkSizes(const SrcDistObject &sourceObj) override
Whether data redistribution between sourceObj and this object is legal.
 
void randomize()
Set all values in the multivector to pseudorandom numbers.
 
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
 
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
 
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
 
void dot(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayView< dot_type > &dots) const
Compute the dot product of each corresponding pair of vectors (columns) in A and B.
 
void describeImpl(Teuchos::FancyOStream &out, const std::string &className, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Implementation of describe() for this class, and its subclass Vector.
 
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
 
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Map and their communicator.
 
Teuchos::ArrayRCP< Scalar > get1dViewNonConst()
Nonconst persisting (1-D) view of this multivector's local values.
 
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
 
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
 
void meanValue(const Teuchos::ArrayView< impl_scalar_type > &means) const
Compute mean (average) value of each column.
 
std::string descriptionImpl(const std::string &className) const
Implementation of description() for this class, and its subclass Vector.
 
void multiply(Teuchos::ETransp transA, Teuchos::ETransp transB, const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Scalar &beta)
Matrix-matrix multiplication: this = beta*this + alpha*op(A)*op(B).
 
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
 
wrapped_dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
 
void replaceLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &value)
Replace value in host memory, using local (row) index.
 
void swap(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &mv)
Swap contents of mv with contents of *this.
 
size_t getLocalLength() const
Local number of rows on the calling process.
 
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subCopy(const Teuchos::Range1D &colRng) const
Return a MultiVector with copies of selected columns.
 
typename Kokkos::Details::InnerProductSpaceTraits< impl_scalar_type >::dot_type dot_type
Type of an inner ("dot") product result.
 
Teuchos::ArrayRCP< const Scalar > getData(size_t j) const
Const view of the local values in a particular vector of this multivector.
 
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
 
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
 
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM, const execution_space &space) override
Same as copyAndPermute, but do operations in space.
 
void norm2(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the two-norm of each vector (column), storing the result in a host View.
 
global_size_t getGlobalLength() const
Global number of rows in the multivector.
 
Teuchos::ArrayRCP< const Scalar > get1dView() const
Const persisting (1-D) view of this multivector's local values.
 
Teuchos::ArrayRCP< T > getSubArrayRCP(Teuchos::ArrayRCP< T > arr, size_t j) const
Persisting view of j-th column in the given ArrayRCP.
 
void replaceGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value)
Replace value in host memory, using global row index.
 
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, device_type > dual_view_type
Kokkos::DualView specialization used by this class.
 
void elementWiseMultiply(Scalar scalarAB, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarThis)
Multiply a Vector A elementwise by a MultiVector B.
 
size_t getNumVectors() const
Number of columns in the multivector.
 
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
 
virtual size_t constantNumberOfPackets() const override
Number of packets to send per LID.
 
wrapped_dual_view_type getWrappedDualView() const
Return the wrapped dual view holding this MultiVector's local data.
 
void sumIntoLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &val, const bool atomic=useAtomicUpdatesByDefault)
Update (+=) a value in host memory, using local row index.
 
void replaceMap(const Teuchos::RCP< const map_type > &map)
Replace the underlying Map in place.
 
Teuchos::RCP< MultiVector< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another MultiVector with the same entries, but converted to a different Scalar type T.
 
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
 
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
 
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
 
void abs(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise absolute values of input Multi-vector in target: A = abs(this)
 
void norm1(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the one-norm of each vector (column), storing the result in a host view.
 
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print the object with the given verbosity level to a FancyOStream.
 
void get2dCopy(const Teuchos::ArrayView< const Teuchos::ArrayView< Scalar > > &ArrayOfPtrs) const
Fill the given array with a copy of this multivector's local values.
 
void sumIntoGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value, const bool atomic=useAtomicUpdatesByDefault)
Update (+=) a value in host memory, using global row index.
 
bool aliases(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &other) const
Whether this multivector's memory might alias other. This is conservative: if either this or other is...
 
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
 
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subViewNonConst(const Teuchos::Range1D &colRng)
Return a MultiVector with views of selected columns.
 
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
 
Teuchos::ArrayRCP< Scalar > getDataNonConst(size_t j)
View of the local values in a particular vector of this multivector.
 
bool isConstantStride() const
Whether this multivector has constant stride between columns.
 
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
 
size_t getOrigNumLocalCols() const
"Original" number of columns in the (local) data.
 
Teuchos::ArrayRCP< Teuchos::ArrayRCP< Scalar > > get2dViewNonConst()
Return non-const persisting pointers to values.
 
virtual std::string description() const override
A simple one-line description of this object.
 
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetViewNonConst(const Teuchos::RCP< const map_type > &subMap, const size_t offset)
Return a nonconst view of a subset of rows.
 
std::string localDescribeToString(const Teuchos::EVerbosityLevel vl) const
Print the calling process' verbose describe() information to the returned string.
 
bool isSameSize(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &vec) const
 
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
 
Teuchos::ArrayRCP< Teuchos::ArrayRCP< const Scalar > > get2dView() const
Return const persisting pointers to values.
 
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
 
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
 
Abstract base class for objects that can be the source of an Import or Export operation.
 
Implementation details of Tpetra.
 
int local_ordinal_type
Default value of Scalar template parameter.
 
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
 
bool isInterComm(const Teuchos::Comm< int > &)
Return true if and only if the input communicator wraps an MPI intercommunicator.
 
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
 
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
 
Namespace Tpetra contains the class and methods constituting the Tpetra library.
 
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
 
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
 
size_t global_size_t
Global size_t object.
 
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
 
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
 
CombineMode
Rule for combining data in an Import or Export.
 
@ REPLACE
Replace existing values with new values.
 
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
 
@ ADD_ASSIGN
Accumulate new values into existing values (may not be supported in all classes)
 
@ INSERT
Insert new values that don't currently exist.