Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_BlockMultiVector_def.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
11#define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
12
13#include "Tpetra_BlockMultiVector_decl.hpp"
15#include "Tpetra_BlockView.hpp"
16#include "Teuchos_OrdinalTraits.hpp"
17
18namespace Tpetra {
19
20template <class Scalar, class LO, class GO, class Node>
26
27template <class Scalar, class LO, class GO, class Node>
31 return mv_;
32}
33
34template <class Scalar, class LO, class GO, class Node>
35Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
36BlockMultiVector<Scalar, LO, GO, Node>::
37 getBlockMultiVectorFromSrcDistObject(const Tpetra::SrcDistObject& src) {
38 typedef BlockMultiVector<Scalar, LO, GO, Node> BMV;
39 const BMV* src_bmv = dynamic_cast<const BMV*>(&src);
40 TEUCHOS_TEST_FOR_EXCEPTION(
41 src_bmv == nullptr, std::invalid_argument,
42 "Tpetra::"
43 "BlockMultiVector: The source object of an Import or Export to a "
44 "BlockMultiVector, must also be a BlockMultiVector.");
45 return Teuchos::rcp(src_bmv, false); // nonowning RCP
46}
47
48template <class Scalar, class LO, class GO, class Node>
51 const Teuchos::DataAccess copyOrView)
53 , meshMap_(in.meshMap_)
54 , pointMap_(in.pointMap_)
55 , mv_(in.mv_, copyOrView)
56 , blockSize_(in.blockSize_) {}
57
58template <class Scalar, class LO, class GO, class Node>
61 const LO blockSize,
62 const LO numVecs)
64 , // shallow copy
65 meshMap_(meshMap)
66 , pointMap_(makePointMapRCP(meshMap, blockSize))
67 , mv_(pointMap_, numVecs)
68 , // nonowning RCP is OK, since pointMap_ won't go away
69 blockSize_(blockSize) {}
70
71template <class Scalar, class LO, class GO, class Node>
74 const map_type& pointMap,
75 const LO blockSize,
76 const LO numVecs)
78 , // shallow copy
79 meshMap_(meshMap)
80 , pointMap_(new map_type(pointMap))
81 , mv_(pointMap_, numVecs)
82 , blockSize_(blockSize) {}
83
84template <class Scalar, class LO, class GO, class Node>
87 const map_type& meshMap,
88 const LO blockSize)
90 , // shallow copy
91 meshMap_(meshMap)
92 , blockSize_(blockSize) {
93 using Teuchos::RCP;
94
95 if (X_mv.getCopyOrView() == Teuchos::View) {
96 // The input MultiVector has view semantics, so assignment just
97 // does a shallow copy.
98 mv_ = X_mv;
99 } else if (X_mv.getCopyOrView() == Teuchos::Copy) {
100 // The input MultiVector has copy semantics. We can't change
101 // that, but we can make a view of the input MultiVector and
102 // change the view to have view semantics. (That sounds silly;
103 // shouldn't views always have view semantics? but remember that
104 // "view semantics" just governs the default behavior of the copy
105 // constructor and assignment operator.)
107 const size_t numCols = X_mv.getNumVectors();
108 if (numCols == 0) {
109 Teuchos::Array<size_t> cols(0); // view will have zero columns
110 X_view_const = X_mv.subView(cols());
111 } else { // Range1D is an inclusive range
112 X_view_const = X_mv.subView(Teuchos::Range1D(0, numCols - 1));
113 }
115 X_view_const.is_null(), std::logic_error,
116 "Tpetra::"
117 "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
118 "should never happen. Please report this bug to the Tpetra developers.");
119
120 // It's perfectly OK to cast away const here. Those view methods
121 // should be marked const anyway, because views can't change the
122 // allocation (just the entries themselves).
123 RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type>(X_view_const);
125 X_view->getCopyOrView() != Teuchos::View, std::logic_error,
126 "Tpetra::"
127 "BlockMultiVector constructor: We just set a MultiVector "
128 "to have view semantics, but it claims that it doesn't have view "
129 "semantics. This should never happen. "
130 "Please report this bug to the Tpetra developers.");
131 mv_ = *X_view; // MultiVector::operator= does a shallow copy here
132 }
133
134 // At this point, mv_ has been assigned, so we can ignore X_mv.
135 Teuchos::RCP<const map_type> pointMap = mv_.getMap();
136 pointMap_ = pointMap;
137}
138
139template <class Scalar, class LO, class GO, class Node>
142 const map_type& newMeshMap,
143 const map_type& newPointMap,
144 const size_t offset)
146 , // shallow copy
147 meshMap_(newMeshMap)
148 , pointMap_(new map_type(newPointMap))
149 , mv_(X.mv_, newPointMap, offset * X.getBlockSize())
150 , // MV "offset view" constructor
151 blockSize_(X.getBlockSize()) {}
152
153template <class Scalar, class LO, class GO, class Node>
156 const map_type& newMeshMap,
157 const size_t offset)
159 , // shallow copy
160 meshMap_(newMeshMap)
161 , pointMap_(makePointMapRCP(newMeshMap, X.getBlockSize()))
162 , mv_(X.mv_, pointMap_, offset * X.getBlockSize())
163 , // MV "offset view" constructor
164 blockSize_(X.getBlockSize()) {}
165
166template <class Scalar, class LO, class GO, class Node>
171
172template <class Scalar, class LO, class GO, class Node>
175 makePointMap(const map_type& meshMap, const LO blockSize) {
177 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
178
179 const GST gblNumMeshMapInds =
180 static_cast<GST>(meshMap.getGlobalNumElements());
181 const size_t lclNumMeshMapIndices =
182 static_cast<size_t>(meshMap.getLocalNumElements());
183 const GST gblNumPointMapInds =
184 gblNumMeshMapInds * static_cast<GST>(blockSize);
185 const size_t lclNumPointMapInds =
186 lclNumMeshMapIndices * static_cast<size_t>(blockSize);
187 const GO indexBase = meshMap.getIndexBase();
188
189 if (meshMap.isContiguous()) {
191 meshMap.getComm());
192 } else {
193 // "Hilbert's Hotel" trick: multiply each process' GIDs by
194 // blockSize, and fill in. That ensures correctness even if the
195 // mesh Map is overlapping.
196 Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList();
197 const size_type lclNumMeshGblInds = lclMeshGblInds.size();
198 Teuchos::Array<GO> lclPointGblInds(lclNumPointMapInds);
199 for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
200 const GO meshGid = lclMeshGblInds[g];
201 const GO pointGidStart = indexBase +
202 (meshGid - indexBase) * static_cast<GO>(blockSize);
203 const size_type offset = g * static_cast<size_type>(blockSize);
204 for (LO k = 0; k < blockSize; ++k) {
205 const GO pointGid = pointGidStart + static_cast<GO>(k);
206 lclPointGblInds[offset + static_cast<size_type>(k)] = pointGid;
207 }
208 }
210 meshMap.getComm());
211 }
212}
213
214template <class Scalar, class LO, class GO, class Node>
215Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::map_type>
217 makePointMapRCP(const map_type& meshMap, const LO blockSize) {
219 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
220
221 const GST gblNumMeshMapInds =
222 static_cast<GST>(meshMap.getGlobalNumElements());
223 const size_t lclNumMeshMapIndices =
224 static_cast<size_t>(meshMap.getLocalNumElements());
225 const GST gblNumPointMapInds =
226 gblNumMeshMapInds * static_cast<GST>(blockSize);
227 const size_t lclNumPointMapInds =
228 lclNumMeshMapIndices * static_cast<size_t>(blockSize);
229 const GO indexBase = meshMap.getIndexBase();
230
231 if (meshMap.isContiguous()) {
233 meshMap.getComm()));
234 } else {
235 // "Hilbert's Hotel" trick: multiply each process' GIDs by
236 // blockSize, and fill in. That ensures correctness even if the
237 // mesh Map is overlapping.
238 Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList();
239 const size_type lclNumMeshGblInds = lclMeshGblInds.size();
240 Teuchos::Array<GO> lclPointGblInds(lclNumPointMapInds);
241 for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
242 const GO meshGid = lclMeshGblInds[g];
243 const GO pointGidStart = indexBase +
244 (meshGid - indexBase) * static_cast<GO>(blockSize);
245 const size_type offset = g * static_cast<size_type>(blockSize);
246 for (LO k = 0; k < blockSize; ++k) {
247 const GO pointGid = pointGidStart + static_cast<GO>(k);
248 lclPointGblInds[offset + static_cast<size_type>(k)] = pointGid;
249 }
250 }
251 return Teuchos::rcp(new map_type(gblNumPointMapInds, lclPointGblInds(), indexBase,
252 meshMap.getComm()));
253 }
254}
255
256template <class Scalar, class LO, class GO, class Node>
259 const LO colIndex,
260 const Scalar vals[]) {
261 auto X_dst = getLocalBlockHost(localRowIndex, colIndex, Access::ReadWrite);
262 typename const_little_vec_type::host_mirror_type::const_type X_src(reinterpret_cast<const impl_scalar_type*>(vals),
263 getBlockSize());
264 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
265 using exec_space = typename device_type::execution_space;
266 Kokkos::deep_copy(exec_space(), X_dst, X_src);
267}
268
269template <class Scalar, class LO, class GO, class Node>
272 const LO colIndex,
273 const Scalar vals[]) {
274 if (!meshMap_.isNodeLocalElement(localRowIndex)) {
275 return false;
276 } else {
277 replaceLocalValuesImpl(localRowIndex, colIndex, vals);
278 return true;
279 }
280}
281
282template <class Scalar, class LO, class GO, class Node>
285 const LO colIndex,
286 const Scalar vals[]) {
287 const LO localRowIndex = meshMap_.getLocalElement(globalRowIndex);
288 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid()) {
289 return false;
290 } else {
291 replaceLocalValuesImpl(localRowIndex, colIndex, vals);
292 return true;
293 }
294}
295
296template <class Scalar, class LO, class GO, class Node>
299 const LO colIndex,
300 const Scalar vals[]) {
301 auto X_dst = getLocalBlockHost(localRowIndex, colIndex, Access::ReadWrite);
302 typename const_little_vec_type::host_mirror_type::const_type X_src(reinterpret_cast<const impl_scalar_type*>(vals),
303 getBlockSize());
304 AXPY(static_cast<impl_scalar_type>(STS::one()), X_src, X_dst);
305}
306
307template <class Scalar, class LO, class GO, class Node>
310 const LO colIndex,
311 const Scalar vals[]) {
312 if (!meshMap_.isNodeLocalElement(localRowIndex)) {
313 return false;
314 } else {
315 sumIntoLocalValuesImpl(localRowIndex, colIndex, vals);
316 return true;
317 }
318}
319
320template <class Scalar, class LO, class GO, class Node>
323 const LO colIndex,
324 const Scalar vals[]) {
325 const LO localRowIndex = meshMap_.getLocalElement(globalRowIndex);
326 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid()) {
327 return false;
328 } else {
329 sumIntoLocalValuesImpl(localRowIndex, colIndex, vals);
330 return true;
331 }
332}
333
334template <class Scalar, class LO, class GO, class Node>
335typename BlockMultiVector<Scalar, LO, GO, Node>::const_little_host_vec_type
338 const LO colIndex,
339 const Access::ReadOnlyStruct) const {
340 if (!isValidLocalMeshIndex(localRowIndex)) {
341 return const_little_host_vec_type();
342 } else {
343 const size_t blockSize = getBlockSize();
344 auto hostView = mv_.getLocalViewHost(Access::ReadOnly);
345 LO startRow = localRowIndex * blockSize;
346 LO endRow = startRow + blockSize;
347 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
348 colIndex);
349 }
350}
351
352template <class Scalar, class LO, class GO, class Node>
353typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
356 const LO colIndex,
357 const Access::OverwriteAllStruct) {
358 if (!isValidLocalMeshIndex(localRowIndex)) {
359 return little_host_vec_type();
360 } else {
361 const size_t blockSize = getBlockSize();
362 auto hostView = mv_.getLocalViewHost(Access::OverwriteAll);
365 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
366 colIndex);
367 }
368}
369
370template <class Scalar, class LO, class GO, class Node>
371typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
374 const LO colIndex,
375 const Access::ReadWriteStruct) {
376 if (!isValidLocalMeshIndex(localRowIndex)) {
377 return little_host_vec_type();
378 } else {
379 const size_t blockSize = getBlockSize();
380 auto hostView = mv_.getLocalViewHost(Access::ReadWrite);
381 LO startRow = localRowIndex * blockSize;
382 LO endRow = startRow + blockSize;
383 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
384 colIndex);
385 }
386}
387
388template <class Scalar, class LO, class GO, class Node>
389Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
390BlockMultiVector<Scalar, LO, GO, Node>::
391 getMultiVectorFromSrcDistObject(const Tpetra::SrcDistObject& src) {
392 using Teuchos::rcp;
393 using Teuchos::rcpFromRef;
394
395 // The source object of an Import or Export must be a
396 // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
397 // them in that order; one must succeed. Note that the target of
398 // the Import or Export calls checkSizes in DistObject's doTransfer.
399 typedef BlockMultiVector<Scalar, LO, GO, Node> this_BMV_type;
400 const this_BMV_type* srcBlkVec = dynamic_cast<const this_BMV_type*>(&src);
401 if (srcBlkVec == nullptr) {
402 const mv_type* srcMultiVec = dynamic_cast<const mv_type*>(&src);
403 if (srcMultiVec == nullptr) {
404 // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
405 // currently does a shallow copy by default. This is why we
406 // return by RCP, rather than by value.
407 return rcp(new mv_type());
408 } else { // src is a MultiVector
409 return rcp(srcMultiVec, false); // nonowning RCP
410 }
411 } else { // src is a BlockMultiVector
412 return rcpFromRef(srcBlkVec->mv_); // nonowning RCP
413 }
414}
415
416template <class Scalar, class LO, class GO, class Node>
419 return !getMultiVectorFromSrcDistObject(src).is_null();
420}
421
422template <class Scalar, class LO, class GO, class Node>
425 const size_t numSameIDs,
426 const Kokkos::DualView<const local_ordinal_type*,
428 const Kokkos::DualView<const local_ordinal_type*,
430 const CombineMode CM) {
431 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
432 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
433 "instead, create a point importer using makePointMap function.");
434}
435
436template <class Scalar, class LO, class GO, class Node>
439 const Kokkos::DualView<const local_ordinal_type*,
441 Kokkos::DualView<packet_type*,
442 buffer_device_type>& exports,
443 Kokkos::DualView<size_t*,
446 size_t& constantNumPackets) {
447 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
448 "Tpetra::BlockMultiVector::packAndPrepare: Do NOT use this; "
449 "instead, create a point importer using makePointMap function.");
450}
451
452template <class Scalar, class LO, class GO, class Node>
454 unpackAndCombine(const Kokkos::DualView<const local_ordinal_type*,
456 Kokkos::DualView<packet_type*,
458 imports,
459 Kokkos::DualView<size_t*,
462 const size_t constantNumPackets,
463 const CombineMode combineMode) {
464 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
465 "Tpetra::BlockMultiVector::unpackAndCombine: Do NOT use this; "
466 "instead, create a point importer using makePointMap function.");
467}
468
469template <class Scalar, class LO, class GO, class Node>
472 return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid() &&
473 meshMap_.isNodeLocalElement(meshLocalIndex);
474}
475
476template <class Scalar, class LO, class GO, class Node>
478 putScalar(const Scalar& val) {
479 mv_.putScalar(val);
480}
481
482template <class Scalar, class LO, class GO, class Node>
484 scale(const Scalar& val) {
485 mv_.scale(val);
486}
487
488template <class Scalar, class LO, class GO, class Node>
490 update(const Scalar& alpha,
492 const Scalar& beta) {
493 mv_.update(alpha, X.mv_, beta);
494}
495
496namespace Impl {
497// Y := alpha * D * X
498template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
499struct BlockWiseMultiply {
500 typedef typename ViewD::size_type Size;
501
502 private:
503 typedef typename ViewD::device_type Device;
504 typedef typename ViewD::non_const_value_type ImplScalar;
505 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
506
507 template <typename View>
508 using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
509 typename View::device_type, Unmanaged>;
510 typedef UnmanagedView<ViewY> UnMViewY;
511 typedef UnmanagedView<ViewD> UnMViewD;
512 typedef UnmanagedView<ViewX> UnMViewX;
513
514 const Size block_size_;
515 Scalar alpha_;
516 UnMViewY Y_;
517 UnMViewD D_;
518 UnMViewX X_;
519
520 public:
521 BlockWiseMultiply(const Size block_size, const Scalar& alpha,
522 const ViewY& Y, const ViewD& D, const ViewX& X)
523 : block_size_(block_size)
524 , alpha_(alpha)
525 , Y_(Y)
526 , D_(D)
527 , X_(X) {}
528
529 KOKKOS_INLINE_FUNCTION
530 void operator()(const Size k) const {
531 const auto zero = KokkosKernels::ArithTraits<Scalar>::zero();
532 auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL(), Kokkos::ALL());
533 const auto num_vecs = X_.extent(1);
534 for (Size i = 0; i < num_vecs; ++i) {
535 Kokkos::pair<Size, Size> kslice(k * block_size_, (k + 1) * block_size_);
536 auto X_curBlk = Kokkos::subview(X_, kslice, i);
537 auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
538 // Y_curBlk := alpha * D_curBlk * X_curBlk.
539 // Recall that GEMV does an update (+=) of the last argument.
540 Tpetra::FILL(Y_curBlk, zero);
541 Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
542 }
543 }
544};
545
546template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
547inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
548createBlockWiseMultiply(const int block_size, const Scalar& alpha,
549 const ViewY& Y, const ViewD& D, const ViewX& X) {
550 return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
551}
552
553template <typename ViewY,
554 typename Scalar,
555 typename ViewD,
556 typename ViewZ,
557 typename LO = typename ViewY::size_type>
558class BlockJacobiUpdate {
559 private:
560 typedef typename ViewD::device_type Device;
561 typedef typename ViewD::non_const_value_type ImplScalar;
562 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
563
564 template <typename ViewType>
565 using UnmanagedView = Kokkos::View<typename ViewType::data_type,
566 typename ViewType::array_layout,
567 typename ViewType::device_type,
568 Unmanaged>;
569 typedef UnmanagedView<ViewY> UnMViewY;
570 typedef UnmanagedView<ViewD> UnMViewD;
571 typedef UnmanagedView<ViewZ> UnMViewZ;
572
573 const LO blockSize_;
574 UnMViewY Y_;
575 const Scalar alpha_;
576 UnMViewD D_;
577 UnMViewZ Z_;
578 const Scalar beta_;
579
580 public:
581 BlockJacobiUpdate(const ViewY& Y,
582 const Scalar& alpha,
583 const ViewD& D,
584 const ViewZ& Z,
585 const Scalar& beta)
586 : blockSize_(D.extent(1))
587 ,
588 // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
589 Y_(Y)
590 , alpha_(alpha)
591 , D_(D)
592 , Z_(Z)
593 , beta_(beta) {
594 static_assert(static_cast<int>(ViewY::rank) == 1,
595 "Y must have rank 1.");
596 static_assert(static_cast<int>(ViewD::rank) == 3, "D must have rank 3.");
597 static_assert(static_cast<int>(ViewZ::rank) == 1,
598 "Z must have rank 1.");
599 // static_assert (static_cast<int> (ViewZ::rank) ==
600 // static_cast<int> (ViewY::rank),
601 // "Z must have the same rank as Y.");
602 }
603
604 KOKKOS_INLINE_FUNCTION void
605 operator()(const LO& k) const {
606 using Kokkos::ALL;
607 using Kokkos::subview;
608 typedef Kokkos::pair<LO, LO> range_type;
609 typedef KokkosKernels::ArithTraits<Scalar> KAT;
610
611 // We only have to implement the alpha != 0 case.
612
613 auto D_curBlk = subview(D_, k, ALL(), ALL());
614 const range_type kslice(k * blockSize_, (k + 1) * blockSize_);
615
616 // Z.update (STS::one (), X, -STS::one ()); // assume this is done
617
618 auto Z_curBlk = subview(Z_, kslice);
619 auto Y_curBlk = subview(Y_, kslice);
620 // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
621 if (beta_ == KAT::zero()) {
622 Tpetra::FILL(Y_curBlk, KAT::zero());
623 } else if (beta_ != KAT::one()) {
624 Tpetra::SCAL(beta_, Y_curBlk);
625 }
626 Tpetra::GEMV(alpha_, D_curBlk, Z_curBlk, Y_curBlk);
627 }
628};
629
630template <class ViewY,
631 class Scalar,
632 class ViewD,
633 class ViewZ,
634 class LO = typename ViewD::size_type>
635void blockJacobiUpdate(const ViewY& Y,
636 const Scalar& alpha,
637 const ViewD& D,
638 const ViewZ& Z,
639 const Scalar& beta) {
640 static_assert(Kokkos::is_view<ViewY>::value, "Y must be a Kokkos::View.");
641 static_assert(Kokkos::is_view<ViewD>::value, "D must be a Kokkos::View.");
642 static_assert(Kokkos::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
643 static_assert(static_cast<int>(ViewY::rank) == static_cast<int>(ViewZ::rank),
644 "Y and Z must have the same rank.");
645 static_assert(static_cast<int>(ViewD::rank) == 3, "D must have rank 3.");
646
647 const auto lclNumMeshRows = D.extent(0);
648
649#ifdef HAVE_TPETRA_DEBUG
650 // D.extent(0) is the (local) number of mesh rows.
651 // D.extent(1) is the block size. Thus, their product should be
652 // the local number of point rows, that is, the number of rows in Y.
653 const auto blkSize = D.extent(1);
654 const auto lclNumPtRows = lclNumMeshRows * blkSize;
655 TEUCHOS_TEST_FOR_EXCEPTION(Y.extent(0) != lclNumPtRows, std::invalid_argument,
656 "blockJacobiUpdate: Y.extent(0) = " << Y.extent(0) << " != "
657 "D.extent(0)*D.extent(1) = "
658 << lclNumMeshRows << " * " << blkSize
659 << " = " << lclNumPtRows << ".");
660 TEUCHOS_TEST_FOR_EXCEPTION(Y.extent(0) != Z.extent(0), std::invalid_argument,
661 "blockJacobiUpdate: Y.extent(0) = " << Y.extent(0) << " != "
662 "Z.extent(0) = "
663 << Z.extent(0) << ".");
664 TEUCHOS_TEST_FOR_EXCEPTION(Y.extent(1) != Z.extent(1), std::invalid_argument,
665 "blockJacobiUpdate: Y.extent(1) = " << Y.extent(1) << " != "
666 "Z.extent(1) = "
667 << Z.extent(1) << ".");
668#endif // HAVE_TPETRA_DEBUG
669
670 BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor(Y, alpha, D, Z, beta);
671 typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
672 // lclNumMeshRows must fit in LO, else the Map would not be correct.
673 range_type range(0, static_cast<LO>(lclNumMeshRows));
674 Kokkos::parallel_for(range, functor);
675}
676
677} // namespace Impl
678
679template <class Scalar, class LO, class GO, class Node>
682 const Kokkos::View<const impl_scalar_type***,
683 device_type, Kokkos::MemoryUnmanaged>& D,
685 using Kokkos::ALL;
686 typedef typename device_type::execution_space exec_space;
687 const LO lclNumMeshRows = meshMap_.getLocalNumElements();
688
689 if (alpha == STS::zero()) {
690 this->putScalar(STS::zero());
691 } else { // alpha != 0
692 const LO blockSize = this->getBlockSize();
693 const impl_scalar_type alphaImpl = static_cast<impl_scalar_type>(alpha);
694 auto X_lcl = X.mv_.getLocalViewDevice(Access::ReadOnly);
695 auto Y_lcl = this->mv_.getLocalViewDevice(Access::ReadWrite);
696 auto bwm = Impl::createBlockWiseMultiply(blockSize, alphaImpl, Y_lcl, D, X_lcl);
697
698 // Use an explicit RangePolicy with the desired execution space.
699 // Otherwise, if you just use a number, it runs on the default
700 // execution space. For example, if the default execution space
701 // is Cuda but the current execution space is Serial, using just a
702 // number would incorrectly run with Cuda.
703 Kokkos::RangePolicy<exec_space, LO> range(0, lclNumMeshRows);
704 Kokkos::parallel_for(range, bwm);
705 }
706}
707
708template <class Scalar, class LO, class GO, class Node>
711 const Kokkos::View<const impl_scalar_type***,
712 device_type, Kokkos::MemoryUnmanaged>& D,
715 const Scalar& beta) {
716 using Kokkos::ALL;
717 using Kokkos::subview;
718 typedef impl_scalar_type IST;
719
720 const IST alphaImpl = static_cast<IST>(alpha);
721 const IST betaImpl = static_cast<IST>(beta);
722 const LO numVecs = mv_.getNumVectors();
723
724 if (alpha == STS::zero()) { // Y := beta * Y
725 this->scale(beta);
726 } else { // alpha != 0
727 Z.update(STS::one(), X, -STS::one());
728 for (LO j = 0; j < numVecs; ++j) {
729 auto Y_lcl = this->mv_.getLocalViewDevice(Access::ReadWrite);
730 auto Z_lcl = Z.mv_.getLocalViewDevice(Access::ReadWrite);
731 auto Y_lcl_j = subview(Y_lcl, ALL(), j);
732 auto Z_lcl_j = subview(Z_lcl, ALL(), j);
733 Impl::blockJacobiUpdate(Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
734 }
735 }
736}
737
738} // namespace Tpetra
739
740//
741// Explicit instantiation macro
742//
743// Must be expanded from within the Tpetra namespace!
744//
745#define TPETRA_BLOCKMULTIVECTOR_INSTANT(S, LO, GO, NODE) \
746 template class BlockMultiVector<S, LO, GO, NODE>;
747
748#endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
MultiVector for multiple degrees of freedom per mesh point.
virtual bool checkSizes(const Tpetra::SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
const mv_type & getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a local index.
static Teuchos::RCP< const map_type > makePointMapRCP(const map_type &meshMap, const LO blockSize)
Create and return an owning RCP to the point Map corresponding to the given mesh Map and block size.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a global index.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using local row and column indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization used for communication buffers.
Tpetra::MultiVector< Scalar, LO, GO, Node > mv_type
The specialization of Tpetra::MultiVector that this class uses.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
typename mv_type::device_type device_type
The Kokkos Device type.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
LO local_ordinal_type
The type of local indices.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using a global index.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Struct that holds views of the contents of a CrsMatrix.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Abstract base class for objects that can be the source of an Import or Export operation.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
CombineMode
Rule for combining data in an Import or Export.