Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_BlockMultiVector_def.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
11#define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
12
13#include "Tpetra_BlockMultiVector_decl.hpp"
15#include "Tpetra_BlockView.hpp"
16#include "Teuchos_OrdinalTraits.hpp"
17
18namespace Tpetra {
19
20template <class Scalar, class LO, class GO, class Node>
26
27template <class Scalar, class LO, class GO, class Node>
31 return mv_;
32}
33
34template <class Scalar, class LO, class GO, class Node>
35Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
36BlockMultiVector<Scalar, LO, GO, Node>::
37 getBlockMultiVectorFromSrcDistObject(const Tpetra::SrcDistObject& src) {
38 typedef BlockMultiVector<Scalar, LO, GO, Node> BMV;
39 const BMV* src_bmv = dynamic_cast<const BMV*>(&src);
40 TEUCHOS_TEST_FOR_EXCEPTION(
41 src_bmv == nullptr, std::invalid_argument,
42 "Tpetra::"
43 "BlockMultiVector: The source object of an Import or Export to a "
44 "BlockMultiVector, must also be a BlockMultiVector.");
45 return Teuchos::rcp(src_bmv, false); // nonowning RCP
46}
47
48template <class Scalar, class LO, class GO, class Node>
51 const Teuchos::DataAccess copyOrView)
53 , meshMap_(in.meshMap_)
54 , pointMap_(in.pointMap_)
55 , mv_(in.mv_, copyOrView)
56 , blockSize_(in.blockSize_) {}
57
58template <class Scalar, class LO, class GO, class Node>
61 const LO blockSize,
62 const LO numVecs)
64 , // shallow copy
65 meshMap_(meshMap)
66 , pointMap_(makePointMapRCP(meshMap, blockSize))
67 , mv_(pointMap_, numVecs)
68 , // nonowning RCP is OK, since pointMap_ won't go away
69 blockSize_(blockSize) {}
70
71template <class Scalar, class LO, class GO, class Node>
74 const map_type& pointMap,
75 const LO blockSize,
76 const LO numVecs)
78 , // shallow copy
79 meshMap_(meshMap)
80 , pointMap_(new map_type(pointMap))
81 , mv_(pointMap_, numVecs)
82 , blockSize_(blockSize) {}
83
84template <class Scalar, class LO, class GO, class Node>
87 const map_type& meshMap,
88 const LO blockSize)
90 , // shallow copy
91 meshMap_(meshMap)
92 , blockSize_(blockSize) {
93 using Teuchos::RCP;
94
95 if (X_mv.getCopyOrView() == Teuchos::View) {
96 // The input MultiVector has view semantics, so assignment just
97 // does a shallow copy.
98 mv_ = X_mv;
99 } else if (X_mv.getCopyOrView() == Teuchos::Copy) {
100 // The input MultiVector has copy semantics. We can't change
101 // that, but we can make a view of the input MultiVector and
102 // change the view to have view semantics. (That sounds silly;
103 // shouldn't views always have view semantics? but remember that
104 // "view semantics" just governs the default behavior of the copy
105 // constructor and assignment operator.)
107 const size_t numCols = X_mv.getNumVectors();
108 if (numCols == 0) {
109 Teuchos::Array<size_t> cols(0); // view will have zero columns
110 X_view_const = X_mv.subView(cols());
111 } else { // Range1D is an inclusive range
112 X_view_const = X_mv.subView(Teuchos::Range1D(0, numCols - 1));
113 }
115 X_view_const.is_null(), std::logic_error,
116 "Tpetra::"
117 "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
118 "should never happen. Please report this bug to the Tpetra developers.");
119
120 // It's perfectly OK to cast away const here. Those view methods
121 // should be marked const anyway, because views can't change the
122 // allocation (just the entries themselves).
123 RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type>(X_view_const);
125 X_view->getCopyOrView() != Teuchos::View, std::logic_error,
126 "Tpetra::"
127 "BlockMultiVector constructor: We just set a MultiVector "
128 "to have view semantics, but it claims that it doesn't have view "
129 "semantics. This should never happen. "
130 "Please report this bug to the Tpetra developers.");
131 mv_ = *X_view; // MultiVector::operator= does a shallow copy here
132 }
133
134 // At this point, mv_ has been assigned, so we can ignore X_mv.
135 Teuchos::RCP<const map_type> pointMap = mv_.getMap();
136 pointMap_ = pointMap;
137}
138
139template <class Scalar, class LO, class GO, class Node>
142 const map_type& newMeshMap,
143 const map_type& newPointMap,
144 const size_t offset)
146 , // shallow copy
147 meshMap_(newMeshMap)
148 , pointMap_(new map_type(newPointMap))
149 , mv_(X.mv_, newPointMap, offset * X.getBlockSize())
150 , // MV "offset view" constructor
151 blockSize_(X.getBlockSize()) {}
152
153template <class Scalar, class LO, class GO, class Node>
156 const map_type& newMeshMap,
157 const size_t offset)
159 , // shallow copy
160 meshMap_(newMeshMap)
161 , pointMap_(makePointMapRCP(newMeshMap, X.getBlockSize()))
162 , mv_(X.mv_, pointMap_, offset * X.getBlockSize())
163 , // MV "offset view" constructor
164 blockSize_(X.getBlockSize()) {}
165
166template <class Scalar, class LO, class GO, class Node>
171
172template <class Scalar, class LO, class GO, class Node>
175 makePointMap(const map_type& meshMap, const LO blockSize) {
177 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
178
179 const GST gblNumMeshMapInds =
180 static_cast<GST>(meshMap.getGlobalNumElements());
181 const size_t lclNumMeshMapIndices =
182 static_cast<size_t>(meshMap.getLocalNumElements());
183 const GST gblNumPointMapInds =
184 gblNumMeshMapInds * static_cast<GST>(blockSize);
185 const size_t lclNumPointMapInds =
186 lclNumMeshMapIndices * static_cast<size_t>(blockSize);
187 const GO indexBase = meshMap.getIndexBase();
188
189 if (meshMap.isContiguous()) {
191 meshMap.getComm());
192 } else {
193 // "Hilbert's Hotel" trick: multiply each process' GIDs by
194 // blockSize, and fill in. That ensures correctness even if the
195 // mesh Map is overlapping.
196 Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList();
197 const size_type lclNumMeshGblInds = lclMeshGblInds.size();
198 Teuchos::Array<GO> lclPointGblInds(lclNumPointMapInds);
199 for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
200 const GO meshGid = lclMeshGblInds[g];
201 const GO pointGidStart = indexBase +
202 (meshGid - indexBase) * static_cast<GO>(blockSize);
203 const size_type offset = g * static_cast<size_type>(blockSize);
204 for (LO k = 0; k < blockSize; ++k) {
205 const GO pointGid = pointGidStart + static_cast<GO>(k);
206 lclPointGblInds[offset + static_cast<size_type>(k)] = pointGid;
207 }
208 }
210 meshMap.getComm());
211 }
212}
213
214template <class Scalar, class LO, class GO, class Node>
215Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::map_type>
217 makePointMapRCP(const map_type& meshMap, const LO blockSize) {
219 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
220
221 const GST gblNumMeshMapInds =
222 static_cast<GST>(meshMap.getGlobalNumElements());
223 const size_t lclNumMeshMapIndices =
224 static_cast<size_t>(meshMap.getLocalNumElements());
225 const GST gblNumPointMapInds =
226 gblNumMeshMapInds * static_cast<GST>(blockSize);
227 const size_t lclNumPointMapInds =
228 lclNumMeshMapIndices * static_cast<size_t>(blockSize);
229 const GO indexBase = meshMap.getIndexBase();
230
231 if (meshMap.isContiguous()) {
233 meshMap.getComm()));
234 } else {
235 // "Hilbert's Hotel" trick: multiply each process' GIDs by
236 // blockSize, and fill in. That ensures correctness even if the
237 // mesh Map is overlapping.
238 Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList();
239 const size_type lclNumMeshGblInds = lclMeshGblInds.size();
240 Teuchos::Array<GO> lclPointGblInds(lclNumPointMapInds);
241 for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
242 const GO meshGid = lclMeshGblInds[g];
243 const GO pointGidStart = indexBase +
244 (meshGid - indexBase) * static_cast<GO>(blockSize);
245 const size_type offset = g * static_cast<size_type>(blockSize);
246 for (LO k = 0; k < blockSize; ++k) {
247 const GO pointGid = pointGidStart + static_cast<GO>(k);
248 lclPointGblInds[offset + static_cast<size_type>(k)] = pointGid;
249 }
250 }
251 return Teuchos::rcp(new map_type(gblNumPointMapInds, lclPointGblInds(), indexBase,
252 meshMap.getComm()));
253 }
254}
255
256template <class Scalar, class LO, class GO, class Node>
259 const LO colIndex,
260 const Scalar vals[]) {
261 auto X_dst = getLocalBlockHost(localRowIndex, colIndex, Access::ReadWrite);
262 typename const_little_vec_type::host_mirror_type::const_type X_src(reinterpret_cast<const impl_scalar_type*>(vals),
263 getBlockSize());
264 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
265 using exec_space = typename device_type::execution_space;
266 Kokkos::deep_copy(exec_space(), X_dst, X_src);
267}
268
269template <class Scalar, class LO, class GO, class Node>
272 const LO colIndex,
273 const Scalar vals[]) {
274 if (!meshMap_.isNodeLocalElement(localRowIndex)) {
275 return false;
276 } else {
277 replaceLocalValuesImpl(localRowIndex, colIndex, vals);
278 return true;
279 }
280}
281
282template <class Scalar, class LO, class GO, class Node>
285 const LO colIndex,
286 const Scalar vals[]) {
287 const LO localRowIndex = meshMap_.getLocalElement(globalRowIndex);
288 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid()) {
289 return false;
290 } else {
291 replaceLocalValuesImpl(localRowIndex, colIndex, vals);
292 return true;
293 }
294}
295
296template <class Scalar, class LO, class GO, class Node>
299 const LO colIndex,
300 const Scalar vals[]) {
301 auto X_dst = getLocalBlockHost(localRowIndex, colIndex, Access::ReadWrite);
302 typename const_little_vec_type::host_mirror_type::const_type X_src(reinterpret_cast<const impl_scalar_type*>(vals),
303 getBlockSize());
304 AXPY(static_cast<impl_scalar_type>(STS::one()), X_src, X_dst);
305}
306
307template <class Scalar, class LO, class GO, class Node>
310 const LO colIndex,
311 const Scalar vals[]) {
312 if (!meshMap_.isNodeLocalElement(localRowIndex)) {
313 return false;
314 } else {
315 sumIntoLocalValuesImpl(localRowIndex, colIndex, vals);
316 return true;
317 }
318}
319
320template <class Scalar, class LO, class GO, class Node>
323 const LO colIndex,
324 const Scalar vals[]) {
325 const LO localRowIndex = meshMap_.getLocalElement(globalRowIndex);
326 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid()) {
327 return false;
328 } else {
329 sumIntoLocalValuesImpl(localRowIndex, colIndex, vals);
330 return true;
331 }
332}
333
334template <class Scalar, class LO, class GO, class Node>
335typename BlockMultiVector<Scalar, LO, GO, Node>::const_little_host_vec_type
338 const LO colIndex,
339 const Access::ReadOnlyStruct) const {
340 if (!isValidLocalMeshIndex(localRowIndex)) {
341 return const_little_host_vec_type();
342 } else {
343 const size_t blockSize = getBlockSize();
344 auto hostView = mv_.getLocalViewHost(Access::ReadOnly);
345 LO startRow = localRowIndex * blockSize;
346 LO endRow = startRow + blockSize;
347 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
348 colIndex);
349 }
350}
351
352template <class Scalar, class LO, class GO, class Node>
353typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
356 const LO colIndex,
357 const Access::OverwriteAllStruct) {
358 if (!isValidLocalMeshIndex(localRowIndex)) {
359 return little_host_vec_type();
360 } else {
361 const size_t blockSize = getBlockSize();
362 auto hostView = mv_.getLocalViewHost(Access::OverwriteAll);
365 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
366 colIndex);
367 }
368}
369
370template <class Scalar, class LO, class GO, class Node>
371typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
374 const LO colIndex,
375 const Access::ReadWriteStruct) {
376 if (!isValidLocalMeshIndex(localRowIndex)) {
377 return little_host_vec_type();
378 } else {
379 const size_t blockSize = getBlockSize();
380 auto hostView = mv_.getLocalViewHost(Access::ReadWrite);
381 LO startRow = localRowIndex * blockSize;
382 LO endRow = startRow + blockSize;
383 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
384 colIndex);
385 }
386}
387
388template <class Scalar, class LO, class GO, class Node>
389Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
390BlockMultiVector<Scalar, LO, GO, Node>::
391 getMultiVectorFromSrcDistObject(const Tpetra::SrcDistObject& src) {
392 using Teuchos::rcp;
393 using Teuchos::rcpFromRef;
394
395 // The source object of an Import or Export must be a
396 // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
397 // them in that order; one must succeed. Note that the target of
398 // the Import or Export calls checkSizes in DistObject's doTransfer.
399 typedef BlockMultiVector<Scalar, LO, GO, Node> this_BMV_type;
400 const this_BMV_type* srcBlkVec = dynamic_cast<const this_BMV_type*>(&src);
401 if (srcBlkVec == nullptr) {
402 const mv_type* srcMultiVec = dynamic_cast<const mv_type*>(&src);
403 if (srcMultiVec == nullptr) {
404 // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
405 // currently does a shallow copy by default. This is why we
406 // return by RCP, rather than by value.
407 return rcp(new mv_type());
408 } else { // src is a MultiVector
409 return rcp(srcMultiVec, false); // nonowning RCP
410 }
411 } else { // src is a BlockMultiVector
412 return rcpFromRef(srcBlkVec->mv_); // nonowning RCP
413 }
414}
415
416template <class Scalar, class LO, class GO, class Node>
419 return !getMultiVectorFromSrcDistObject(src).is_null();
420}
421
422template <class Scalar, class LO, class GO, class Node>
425 const size_t numSameIDs,
426 const Kokkos::DualView<const local_ordinal_type*,
428 const Kokkos::DualView<const local_ordinal_type*,
430 const CombineMode CM) {
431 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
432 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
433 "instead, create a point importer using makePointMap function.");
434}
435
436template <class Scalar, class LO, class GO, class Node>
439 const Kokkos::DualView<const local_ordinal_type*,
441 Kokkos::DualView<packet_type*,
442 buffer_device_type>& exports,
443 Kokkos::DualView<size_t*,
446 size_t& constantNumPackets) {
447 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
448 "Tpetra::BlockMultiVector::packAndPrepare: Do NOT use this; "
449 "instead, create a point importer using makePointMap function.");
450}
451
452template <class Scalar, class LO, class GO, class Node>
454 unpackAndCombine(const Kokkos::DualView<const local_ordinal_type*,
456 Kokkos::DualView<packet_type*,
458 imports,
459 Kokkos::DualView<size_t*,
462 const size_t constantNumPackets,
463 const CombineMode combineMode) {
464 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
465 "Tpetra::BlockMultiVector::unpackAndCombine: Do NOT use this; "
466 "instead, create a point importer using makePointMap function.");
467}
468
469template <class Scalar, class LO, class GO, class Node>
472 return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid() &&
473 meshMap_.isNodeLocalElement(meshLocalIndex);
474}
475
476template <class Scalar, class LO, class GO, class Node>
478 putScalar(const Scalar& val) {
479 mv_.putScalar(val);
480}
481
482template <class Scalar, class LO, class GO, class Node>
484 scale(const Scalar& val) {
485 mv_.scale(val);
486}
487
488template <class Scalar, class LO, class GO, class Node>
490 update(const Scalar& alpha,
492 const Scalar& beta) {
493 mv_.update(alpha, X.mv_, beta);
494}
495
496namespace Impl {
497// Y := alpha * D * X
498template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
499struct BlockWiseMultiply {
500 typedef typename ViewD::size_type Size;
501
502 private:
503 typedef typename ViewD::device_type Device;
504 typedef typename ViewD::non_const_value_type ImplScalar;
505 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
506
507 template <typename View>
508 using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
509 typename View::device_type, Unmanaged>;
510 typedef UnmanagedView<ViewY> UnMViewY;
511 typedef UnmanagedView<ViewD> UnMViewD;
512 typedef UnmanagedView<ViewX> UnMViewX;
513
514 const Size block_size_;
515 Scalar alpha_;
516 UnMViewY Y_;
517 UnMViewD D_;
518 UnMViewX X_;
519
520 public:
521 BlockWiseMultiply(const Size block_size, const Scalar& alpha,
522 const ViewY& Y, const ViewD& D, const ViewX& X)
523 : block_size_(block_size)
524 , alpha_(alpha)
525 , Y_(Y)
526 , D_(D)
527 , X_(X) {}
528
529 KOKKOS_INLINE_FUNCTION
530 void operator()(const Size k) const {
531#if KOKKOS_VERSION >= 40799
532 const auto zero = KokkosKernels::ArithTraits<Scalar>::zero();
533#else
534 const auto zero = Kokkos::ArithTraits<Scalar>::zero();
535#endif
536 auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL(), Kokkos::ALL());
537 const auto num_vecs = X_.extent(1);
538 for (Size i = 0; i < num_vecs; ++i) {
539 Kokkos::pair<Size, Size> kslice(k * block_size_, (k + 1) * block_size_);
540 auto X_curBlk = Kokkos::subview(X_, kslice, i);
541 auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
542 // Y_curBlk := alpha * D_curBlk * X_curBlk.
543 // Recall that GEMV does an update (+=) of the last argument.
544 Tpetra::FILL(Y_curBlk, zero);
545 Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
546 }
547 }
548};
549
550template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
551inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
552createBlockWiseMultiply(const int block_size, const Scalar& alpha,
553 const ViewY& Y, const ViewD& D, const ViewX& X) {
554 return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
555}
556
557template <typename ViewY,
558 typename Scalar,
559 typename ViewD,
560 typename ViewZ,
561 typename LO = typename ViewY::size_type>
562class BlockJacobiUpdate {
563 private:
564 typedef typename ViewD::device_type Device;
565 typedef typename ViewD::non_const_value_type ImplScalar;
566 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
567
568 template <typename ViewType>
569 using UnmanagedView = Kokkos::View<typename ViewType::data_type,
570 typename ViewType::array_layout,
571 typename ViewType::device_type,
572 Unmanaged>;
573 typedef UnmanagedView<ViewY> UnMViewY;
574 typedef UnmanagedView<ViewD> UnMViewD;
575 typedef UnmanagedView<ViewZ> UnMViewZ;
576
577 const LO blockSize_;
578 UnMViewY Y_;
579 const Scalar alpha_;
580 UnMViewD D_;
581 UnMViewZ Z_;
582 const Scalar beta_;
583
584 public:
585 BlockJacobiUpdate(const ViewY& Y,
586 const Scalar& alpha,
587 const ViewD& D,
588 const ViewZ& Z,
589 const Scalar& beta)
590 : blockSize_(D.extent(1))
591 ,
592 // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
593 Y_(Y)
594 , alpha_(alpha)
595 , D_(D)
596 , Z_(Z)
597 , beta_(beta) {
598 static_assert(static_cast<int>(ViewY::rank) == 1,
599 "Y must have rank 1.");
600 static_assert(static_cast<int>(ViewD::rank) == 3, "D must have rank 3.");
601 static_assert(static_cast<int>(ViewZ::rank) == 1,
602 "Z must have rank 1.");
603 // static_assert (static_cast<int> (ViewZ::rank) ==
604 // static_cast<int> (ViewY::rank),
605 // "Z must have the same rank as Y.");
606 }
607
608 KOKKOS_INLINE_FUNCTION void
609 operator()(const LO& k) const {
610 using Kokkos::ALL;
611 using Kokkos::subview;
612 typedef Kokkos::pair<LO, LO> range_type;
613#if KOKKOS_VERSION >= 40799
614 typedef KokkosKernels::ArithTraits<Scalar> KAT;
615#else
616 typedef Kokkos::ArithTraits<Scalar> KAT;
617#endif
618
619 // We only have to implement the alpha != 0 case.
620
621 auto D_curBlk = subview(D_, k, ALL(), ALL());
622 const range_type kslice(k * blockSize_, (k + 1) * blockSize_);
623
624 // Z.update (STS::one (), X, -STS::one ()); // assume this is done
625
626 auto Z_curBlk = subview(Z_, kslice);
627 auto Y_curBlk = subview(Y_, kslice);
628 // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
629 if (beta_ == KAT::zero()) {
630 Tpetra::FILL(Y_curBlk, KAT::zero());
631 } else if (beta_ != KAT::one()) {
632 Tpetra::SCAL(beta_, Y_curBlk);
633 }
634 Tpetra::GEMV(alpha_, D_curBlk, Z_curBlk, Y_curBlk);
635 }
636};
637
638template <class ViewY,
639 class Scalar,
640 class ViewD,
641 class ViewZ,
642 class LO = typename ViewD::size_type>
643void blockJacobiUpdate(const ViewY& Y,
644 const Scalar& alpha,
645 const ViewD& D,
646 const ViewZ& Z,
647 const Scalar& beta) {
648 static_assert(Kokkos::is_view<ViewY>::value, "Y must be a Kokkos::View.");
649 static_assert(Kokkos::is_view<ViewD>::value, "D must be a Kokkos::View.");
650 static_assert(Kokkos::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
651 static_assert(static_cast<int>(ViewY::rank) == static_cast<int>(ViewZ::rank),
652 "Y and Z must have the same rank.");
653 static_assert(static_cast<int>(ViewD::rank) == 3, "D must have rank 3.");
654
655 const auto lclNumMeshRows = D.extent(0);
656
657#ifdef HAVE_TPETRA_DEBUG
658 // D.extent(0) is the (local) number of mesh rows.
659 // D.extent(1) is the block size. Thus, their product should be
660 // the local number of point rows, that is, the number of rows in Y.
661 const auto blkSize = D.extent(1);
662 const auto lclNumPtRows = lclNumMeshRows * blkSize;
663 TEUCHOS_TEST_FOR_EXCEPTION(Y.extent(0) != lclNumPtRows, std::invalid_argument,
664 "blockJacobiUpdate: Y.extent(0) = " << Y.extent(0) << " != "
665 "D.extent(0)*D.extent(1) = "
666 << lclNumMeshRows << " * " << blkSize
667 << " = " << lclNumPtRows << ".");
668 TEUCHOS_TEST_FOR_EXCEPTION(Y.extent(0) != Z.extent(0), std::invalid_argument,
669 "blockJacobiUpdate: Y.extent(0) = " << Y.extent(0) << " != "
670 "Z.extent(0) = "
671 << Z.extent(0) << ".");
672 TEUCHOS_TEST_FOR_EXCEPTION(Y.extent(1) != Z.extent(1), std::invalid_argument,
673 "blockJacobiUpdate: Y.extent(1) = " << Y.extent(1) << " != "
674 "Z.extent(1) = "
675 << Z.extent(1) << ".");
676#endif // HAVE_TPETRA_DEBUG
677
678 BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor(Y, alpha, D, Z, beta);
679 typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
680 // lclNumMeshRows must fit in LO, else the Map would not be correct.
681 range_type range(0, static_cast<LO>(lclNumMeshRows));
682 Kokkos::parallel_for(range, functor);
683}
684
685} // namespace Impl
686
687template <class Scalar, class LO, class GO, class Node>
690 const Kokkos::View<const impl_scalar_type***,
691 device_type, Kokkos::MemoryUnmanaged>& D,
693 using Kokkos::ALL;
694 typedef typename device_type::execution_space exec_space;
695 const LO lclNumMeshRows = meshMap_.getLocalNumElements();
696
697 if (alpha == STS::zero()) {
698 this->putScalar(STS::zero());
699 } else { // alpha != 0
700 const LO blockSize = this->getBlockSize();
701 const impl_scalar_type alphaImpl = static_cast<impl_scalar_type>(alpha);
702 auto X_lcl = X.mv_.getLocalViewDevice(Access::ReadOnly);
703 auto Y_lcl = this->mv_.getLocalViewDevice(Access::ReadWrite);
704 auto bwm = Impl::createBlockWiseMultiply(blockSize, alphaImpl, Y_lcl, D, X_lcl);
705
706 // Use an explicit RangePolicy with the desired execution space.
707 // Otherwise, if you just use a number, it runs on the default
708 // execution space. For example, if the default execution space
709 // is Cuda but the current execution space is Serial, using just a
710 // number would incorrectly run with Cuda.
711 Kokkos::RangePolicy<exec_space, LO> range(0, lclNumMeshRows);
712 Kokkos::parallel_for(range, bwm);
713 }
714}
715
716template <class Scalar, class LO, class GO, class Node>
719 const Kokkos::View<const impl_scalar_type***,
720 device_type, Kokkos::MemoryUnmanaged>& D,
723 const Scalar& beta) {
724 using Kokkos::ALL;
725 using Kokkos::subview;
726 typedef impl_scalar_type IST;
727
728 const IST alphaImpl = static_cast<IST>(alpha);
729 const IST betaImpl = static_cast<IST>(beta);
730 const LO numVecs = mv_.getNumVectors();
731
732 if (alpha == STS::zero()) { // Y := beta * Y
733 this->scale(beta);
734 } else { // alpha != 0
735 Z.update(STS::one(), X, -STS::one());
736 for (LO j = 0; j < numVecs; ++j) {
737 auto Y_lcl = this->mv_.getLocalViewDevice(Access::ReadWrite);
738 auto Z_lcl = Z.mv_.getLocalViewDevice(Access::ReadWrite);
739 auto Y_lcl_j = subview(Y_lcl, ALL(), j);
740 auto Z_lcl_j = subview(Z_lcl, ALL(), j);
741 Impl::blockJacobiUpdate(Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
742 }
743 }
744}
745
746} // namespace Tpetra
747
748//
749// Explicit instantiation macro
750//
751// Must be expanded from within the Tpetra namespace!
752//
753#define TPETRA_BLOCKMULTIVECTOR_INSTANT(S, LO, GO, NODE) \
754 template class BlockMultiVector<S, LO, GO, NODE>;
755
756#endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
MultiVector for multiple degrees of freedom per mesh point.
virtual bool checkSizes(const Tpetra::SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
const mv_type & getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a local index.
static Teuchos::RCP< const map_type > makePointMapRCP(const map_type &meshMap, const LO blockSize)
Create and return an owning RCP to the point Map corresponding to the given mesh Map and block size.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a global index.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using local row and column indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization used for communication buffers.
Tpetra::MultiVector< Scalar, LO, GO, Node > mv_type
The specialization of Tpetra::MultiVector that this class uses.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
typename mv_type::device_type device_type
The Kokkos Device type.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
LO local_ordinal_type
The type of local indices.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using a global index.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Struct that holds views of the contents of a CrsMatrix.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Abstract base class for objects that can be the source of an Import or Export operation.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
CombineMode
Rule for combining data in an Import or Export.