Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_BlockView.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_BLOCKVIEW_HPP
11#define TPETRA_BLOCKVIEW_HPP
12
20
21#include "TpetraCore_config.h"
22#if KOKKOS_VERSION >= 40799
23#include "KokkosKernels_ArithTraits.hpp"
24#else
25#include "Kokkos_ArithTraits.hpp"
26#endif
27#include "Kokkos_Complex.hpp"
28
29namespace Tpetra {
30
35
36namespace Impl {
37
44template <class ViewType1,
45 class ViewType2,
46 const int rank1 = ViewType1::rank>
47struct AbsMax {
48 static KOKKOS_INLINE_FUNCTION void
49 run(const ViewType2& Y, const ViewType1& X);
50};
51
56template <class ViewType1,
57 class ViewType2>
61 static KOKKOS_INLINE_FUNCTION void
62 run(const ViewType2& Y, const ViewType1& X) {
63 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
64 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
65 typedef typename std::remove_reference<decltype(Y(0, 0))>::type STY;
66 static_assert(!std::is_const<STY>::value,
67 "AbsMax: The type of each entry of Y must be nonconst.");
68 typedef typename std::decay<decltype(X(0, 0))>::type STX;
69 static_assert(std::is_same<STX, STY>::value,
70 "AbsMax: The type of each entry of X and Y must be the same.");
71#if KOKKOS_VERSION >= 40799
72 typedef KokkosKernels::ArithTraits<STY> KAT;
73#else
74 typedef Kokkos::ArithTraits<STY> KAT;
75#endif
76
77 const int numCols = Y.extent(1);
78 const int numRows = Y.extent(0);
79 for (int j = 0; j < numCols; ++j) {
80 for (int i = 0; i < numRows; ++i) {
81 STY& Y_ij = Y(i, j); // use ref here to avoid 2nd op() call on Y
82 const STX X_ij = X(i, j);
83 // NOTE: no std::max (not a CUDA __device__ function); must
84 // cast back up to complex.
85 const auto Y_ij_abs = KAT::abs(Y_ij);
86 const auto X_ij_abs = KAT::abs(X_ij);
87 Y_ij = (Y_ij_abs >= X_ij_abs) ? static_cast<STY>(Y_ij_abs) : static_cast<STY>(X_ij_abs);
88 }
89 }
90 }
91};
92
97template <class ViewType1,
98 class ViewType2>
102 static KOKKOS_INLINE_FUNCTION void
103 run(const ViewType2& Y, const ViewType1& X) {
104 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
105 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
106
107 typedef typename std::remove_reference<decltype(Y(0))>::type STY;
108 static_assert(!std::is_const<STY>::value,
109 "AbsMax: The type of each entry of Y must be nonconst.");
110 typedef typename std::remove_const<typename std::remove_reference<decltype(X(0))>::type>::type STX;
111 static_assert(std::is_same<STX, STY>::value,
112 "AbsMax: The type of each entry of X and Y must be the same.");
113#if KOKKOS_VERSION >= 40799
114 typedef KokkosKernels::ArithTraits<STY> KAT;
115#else
116 typedef Kokkos::ArithTraits<STY> KAT;
117#endif
118
119 const int numRows = Y.extent(0);
120 for (int i = 0; i < numRows; ++i) {
121 STY& Y_i = Y(i); // use ref here to avoid 2nd op() call on Y
122 const STX X_i = X(i);
123 // NOTE: no std::max (not a CUDA __device__ function); must
124 // cast back up to complex.
125 const auto Y_i_abs = KAT::abs(Y_i);
126 const auto X_i_abs = KAT::abs(X_i);
127 Y_i = (Y_i_abs >= X_i_abs) ? static_cast<STY>(Y_i_abs) : static_cast<STY>(X_i_abs);
128 }
129 }
130};
131
138template <class ViewType1, class ViewType2, const int rank = ViewType1::rank>
140absMax(const ViewType2& Y, const ViewType1& X) {
141 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
142 "absMax: ViewType1 and ViewType2 must have the same rank.");
144}
145
150template <class ViewType,
151 class CoefficientType,
152 class IndexType = int,
153 const bool is_contiguous = false,
154 const int rank = ViewType::rank>
155struct SCAL {
156 static KOKKOS_INLINE_FUNCTION void
157 run(const CoefficientType& alpha, const ViewType& x);
158};
159
162template <class ViewType,
163 class CoefficientType,
164 class IndexType>
167 static KOKKOS_INLINE_FUNCTION void
169 const IndexType numRows = static_cast<IndexType>(x.extent(0));
170
172#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
173#pragma unroll
174#endif
175 for (IndexType i = 0; i < numRows; ++i)
176 x(i) = alpha * x(i);
177 }
178};
181template <class ViewType,
182 class CoefficientType,
183 class IndexType>
186 static KOKKOS_INLINE_FUNCTION void
188 const IndexType numRows = static_cast<IndexType>(A.extent(0));
189 const IndexType numCols = static_cast<IndexType>(A.extent(1));
190
191 for (IndexType j = 0; j < numCols; ++j)
192 for (IndexType i = 0; i < numRows; ++i)
193 A(i, j) = alpha * A(i, j);
194 }
195};
196template <class ViewType,
197 class CoefficientType,
198 class IndexType,
199 const int rank>
202 static KOKKOS_INLINE_FUNCTION void
203 run(const CoefficientType& alpha, const ViewType& x) {
204 using x_value_type = typename std::decay<decltype(*x.data())>::type;
205 const IndexType span = static_cast<IndexType>(x.span());
207#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
208#pragma unroll
209#endif
210 for (IndexType i = 0; i < span; ++i)
211 x_ptr[i] = alpha * x_ptr[i];
212 }
213};
214
219template <class ViewType,
220 class InputType,
221 class IndexType = int,
222 const bool is_contiguous = false,
223 const int rank = ViewType::rank>
224struct FILL {
225 static KOKKOS_INLINE_FUNCTION void
226 run(const ViewType& x, const InputType& val);
227};
228
231template <class ViewType,
232 class InputType,
233 class IndexType>
235 static KOKKOS_INLINE_FUNCTION void
236 run(const ViewType& x, const InputType& val) {
237 const IndexType numRows = static_cast<IndexType>(x.extent(0));
238 for (IndexType i = 0; i < numRows; ++i)
239 x(i) = val;
240 }
241};
244template <class ViewType,
245 class InputType,
246 class IndexType>
248 static KOKKOS_INLINE_FUNCTION void
249 run(const ViewType& X, const InputType& val) {
250 const IndexType numRows = static_cast<IndexType>(X.extent(0));
251 const IndexType numCols = static_cast<IndexType>(X.extent(1));
252 for (IndexType j = 0; j < numCols; ++j)
253 for (IndexType i = 0; i < numRows; ++i)
254 X(i, j) = val;
255 }
256};
257template <class ViewType,
258 class InputType,
259 class IndexType,
260 const int rank>
262 static KOKKOS_INLINE_FUNCTION void
263 run(const ViewType& x, const InputType& val) {
264 const IndexType span = static_cast<IndexType>(x.span());
265 auto x_ptr = x.data();
266#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
267#pragma unroll
268#endif
269 for (IndexType i = 0; i < span; ++i)
270 x_ptr[i] = val;
271 }
272};
273
278template <class CoefficientType,
279 class ViewType1,
280 class ViewType2,
281 class IndexType = int,
282 const bool is_contiguous = false,
283 const int rank = ViewType1::rank>
284struct AXPY {
285 static KOKKOS_INLINE_FUNCTION void
286 run(const CoefficientType& alpha,
287 const ViewType1& x,
288 const ViewType2& y);
289};
290
293template <class CoefficientType,
294 class ViewType1,
295 class ViewType2,
296 class IndexType>
299 static KOKKOS_INLINE_FUNCTION void
301 const ViewType1& x,
302 const ViewType2& y) {
303#if KOKKOS_VERSION >= 40799
304 using KokkosKernels::ArithTraits;
305#else
306 using Kokkos::ArithTraits;
307#endif
308 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
309 "AXPY: x and y must have the same rank.");
310
311 const IndexType numRows = static_cast<IndexType>(y.extent(0));
314 for (IndexType i = 0; i < numRows; ++i)
315 y(i) += alpha * x(i);
316 }
317 }
318};
319
322template <class CoefficientType,
323 class ViewType1,
324 class ViewType2,
325 class IndexType>
328 static KOKKOS_INLINE_FUNCTION void
330 const ViewType1& X,
331 const ViewType2& Y) {
332#if KOKKOS_VERSION >= 40799
333 using KokkosKernels::ArithTraits;
334#else
335 using Kokkos::ArithTraits;
336#endif
337 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
338 "AXPY: X and Y must have the same rank.");
339 const IndexType numRows = static_cast<IndexType>(Y.extent(0));
340 const IndexType numCols = static_cast<IndexType>(Y.extent(1));
341
343 for (IndexType j = 0; j < numCols; ++j)
344 for (IndexType i = 0; i < numRows; ++i)
345 Y(i, j) += alpha * X(i, j);
346 }
347 }
348};
349
350template <class CoefficientType,
351 class ViewType1,
352 class ViewType2,
353 class IndexType,
354 const int rank>
357 static KOKKOS_INLINE_FUNCTION void
358 run(const CoefficientType& alpha,
359 const ViewType1& x,
360 const ViewType2& y) {
361#if KOKKOS_VERSION >= 40799
362 using KokkosKernels::ArithTraits;
363#else
364 using Kokkos::ArithTraits;
365#endif
366 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
367 "AXPY: x and y must have the same rank.");
368
370 using x_value_type = typename std::decay<decltype(*x.data())>::type;
371 using y_value_type = typename std::decay<decltype(*y.data())>::type;
372 const IndexType span = static_cast<IndexType>(y.span());
373 const x_value_type* __restrict x_ptr(x.data());
375#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
376#pragma unroll
377#endif
378 for (IndexType i = 0; i < span; ++i)
379 y_ptr[i] += alpha * x_ptr[i];
380 }
381 }
382};
383
388template <class ViewType1,
389 class ViewType2,
390 class IndexType = int,
391 const bool is_contiguous = false,
392 const int rank = ViewType1::rank>
393struct COPY {
394 static KOKKOS_INLINE_FUNCTION void
395 run(const ViewType1& x, const ViewType2& y);
396};
397
400template <class ViewType1,
401 class ViewType2,
402 class IndexType>
405 static KOKKOS_INLINE_FUNCTION void
406 run(const ViewType1& x, const ViewType2& y) {
407 const IndexType numRows = static_cast<IndexType>(x.extent(0));
409 for (IndexType i = 0; i < numRows; ++i)
410 y(i) = x(i);
411 }
412};
413
416template <class ViewType1,
417 class ViewType2,
418 class IndexType>
421 static KOKKOS_INLINE_FUNCTION void
422 run(const ViewType1& X, const ViewType2& Y) {
423 const IndexType numRows = static_cast<IndexType>(Y.extent(0));
424 const IndexType numCols = static_cast<IndexType>(Y.extent(1));
426 for (IndexType j = 0; j < numCols; ++j)
427 for (IndexType i = 0; i < numRows; ++i)
428 Y(i, j) = X(i, j);
429 }
430};
431
432template <class ViewType1,
433 class ViewType2,
434 class IndexType,
435 const int rank>
438 static KOKKOS_INLINE_FUNCTION void
439 run(const ViewType1& x, const ViewType2& y) {
440 const IndexType span = static_cast<IndexType>(x.span());
441 using x_value_type = typename std::decay<decltype(*x.data())>::type;
442 using y_value_type = typename std::decay<decltype(*y.data())>::type;
443 const x_value_type* __restrict x_ptr(x.data());
445
446#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
447#pragma unroll
448#endif
449 for (IndexType i = 0; i < span; ++i)
450 y_ptr[i] = x_ptr[i];
451 }
452};
453
454template <class VecType1,
455 class BlkType,
456 class VecType2,
457 class CoeffType,
458 class IndexType = int,
459 bool is_contiguous = false,
460 class BlkLayoutType = typename BlkType::array_layout>
461struct GEMV {
462 static KOKKOS_INLINE_FUNCTION void
463 run(const CoeffType& alpha,
464 const BlkType& A,
465 const VecType1& x,
466 const VecType2& y) {
467 static_assert(VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
468 static_assert(BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
469 static_assert(VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
470
471 const IndexType numRows = static_cast<IndexType>(A.extent(0));
472 const IndexType numCols = static_cast<IndexType>(A.extent(1));
473
475 for (IndexType i = 0; i < numRows; ++i)
476 for (IndexType j = 0; j < numCols; ++j)
477 y(i) += alpha * A(i, j) * x(j);
478 }
479};
480
481template <class VecType1,
482 class BlkType,
483 class VecType2,
484 class CoeffType,
485 class IndexType>
486struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, true, Kokkos::LayoutLeft> {
487 static KOKKOS_INLINE_FUNCTION void
488 run(const CoeffType& alpha,
489 const BlkType& A,
490 const VecType1& x,
491 const VecType2& y) {
492 static_assert(VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
493 static_assert(BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
494 static_assert(VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
495
496 using A_value_type = typename std::decay<decltype(A(0, 0))>::type;
497 using x_value_type = typename std::decay<decltype(x(0))>::type;
498 using y_value_type = typename std::decay<decltype(y(0))>::type;
499
500 const IndexType numRows = static_cast<IndexType>(A.extent(0));
501 const IndexType numCols = static_cast<IndexType>(A.extent(1));
502
503 const A_value_type* __restrict A_ptr(A.data());
504 const IndexType as1(A.stride(1));
505 const x_value_type* __restrict x_ptr(x.data());
506 y_value_type* __restrict y_ptr(y.data());
507
508#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
509#pragma unroll
510#endif
511 for (IndexType j = 0; j < numCols; ++j) {
512 const x_value_type x_at_j = alpha * x_ptr[j];
513 const A_value_type* __restrict A_at_j = A_ptr + j * as1;
514#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
515#pragma unroll
516#endif
517 for (IndexType i = 0; i < numRows; ++i)
518 y_ptr[i] += A_at_j[i] * x_at_j;
519 }
520 }
521};
522
523template <class VecType1,
524 class BlkType,
525 class VecType2,
526 class CoeffType,
527 class IndexType>
528struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, true, Kokkos::LayoutRight> {
529 static KOKKOS_INLINE_FUNCTION void
530 run(const CoeffType& alpha,
531 const BlkType& A,
532 const VecType1& x,
533 const VecType2& y) {
534 static_assert(VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
535 static_assert(BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
536 static_assert(VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
537
538 using A_value_type = typename std::decay<decltype(A(0, 0))>::type;
539 using x_value_type = typename std::decay<decltype(x(0))>::type;
540 using y_value_type = typename std::decay<decltype(y(0))>::type;
541
542 const IndexType numRows = static_cast<IndexType>(A.extent(0));
543 const IndexType numCols = static_cast<IndexType>(A.extent(1));
544
545 const A_value_type* __restrict A_ptr(A.data());
546 const IndexType as0(A.stride(0));
547 const x_value_type* __restrict x_ptr(x.data());
548 y_value_type* __restrict y_ptr(y.data());
549
550#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
551#pragma unroll
552#endif
553 for (IndexType i = 0; i < numRows; ++i) {
554 y_value_type y_at_i(0);
555 const auto A_at_i = A_ptr + i * as0;
556#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
557#pragma unroll
558#endif
559 for (IndexType j = 0; j < numCols; ++j)
560 y_at_i += A_at_i[j] * x_ptr[j];
561 y_ptr[i] += alpha * y_at_i;
562 }
563 }
564};
565
566} // namespace Impl
567
570template <class ViewType,
571 class CoefficientType,
572 class IndexType = int,
573 const int rank = ViewType::rank>
574KOKKOS_INLINE_FUNCTION void
576 using LayoutType = typename ViewType::array_layout;
577 constexpr bool is_contiguous = (std::is_same<LayoutType, Kokkos::LayoutLeft>::value ||
578 std::is_same<LayoutType, Kokkos::LayoutRight>::value);
580}
581
583template <class ViewType,
584 class InputType,
585 class IndexType = int,
586 const int rank = ViewType::rank>
587KOKKOS_INLINE_FUNCTION void
588FILL(const ViewType& x, const InputType& val) {
589 using LayoutType = typename ViewType::array_layout;
590 constexpr bool is_contiguous = (std::is_same<LayoutType, Kokkos::LayoutLeft>::value ||
591 std::is_same<LayoutType, Kokkos::LayoutRight>::value);
593}
594
600template <class CoefficientType,
601 class ViewType1,
602 class ViewType2,
603 class IndexType = int,
604 const int rank = ViewType1::rank>
605KOKKOS_INLINE_FUNCTION void
607 const ViewType1& x,
608 const ViewType2& y) {
609 static_assert(static_cast<int>(ViewType1::rank) ==
610 static_cast<int>(ViewType2::rank),
611 "AXPY: x and y must have the same rank.");
612 using LayoutType1 = typename ViewType1::array_layout;
613 using LayoutType2 = typename ViewType2::array_layout;
614 constexpr bool is_layout_same = std::is_same<LayoutType1, LayoutType2>::value;
615 constexpr bool is_x_contiguous = (std::is_same<LayoutType1, Kokkos::LayoutLeft>::value ||
616 std::is_same<LayoutType1, Kokkos::LayoutRight>::value);
617 constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
619}
620
629template <class ViewType1,
630 class ViewType2,
631 class IndexType = int,
632 const int rank = ViewType1::rank>
633KOKKOS_INLINE_FUNCTION void
634COPY(const ViewType1& x, const ViewType2& y) {
635 static_assert(static_cast<int>(ViewType1::rank) ==
636 static_cast<int>(ViewType2::rank),
637 "COPY: x and y must have the same rank.");
638 using LayoutType1 = typename ViewType1::array_layout;
639 using LayoutType2 = typename ViewType2::array_layout;
640 constexpr bool is_layout_same = std::is_same<LayoutType1, LayoutType2>::value;
641 constexpr bool is_x_contiguous = (std::is_same<LayoutType1, Kokkos::LayoutLeft>::value ||
642 std::is_same<LayoutType1, Kokkos::LayoutRight>::value);
643 constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
645}
646
658template <class VecType1,
659 class BlkType,
660 class VecType2,
661 class CoeffType,
662 class IndexType = int>
663KOKKOS_INLINE_FUNCTION void
665 const BlkType& A,
666 const VecType1& x,
667 const VecType2& y) {
668 constexpr bool is_A_contiguous = (std::is_same<typename BlkType::array_layout, Kokkos::LayoutLeft>::value ||
669 std::is_same<typename BlkType::array_layout, Kokkos::LayoutRight>::value);
670 constexpr bool is_x_contiguous = (std::is_same<typename VecType1::array_layout, Kokkos::LayoutLeft>::value ||
671 std::is_same<typename VecType1::array_layout, Kokkos::LayoutRight>::value);
672 constexpr bool is_y_contiguous = (std::is_same<typename VecType2::array_layout, Kokkos::LayoutLeft>::value ||
673 std::is_same<typename VecType2::array_layout, Kokkos::LayoutRight>::value);
675 Impl::GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, is_contiguous>::run(alpha, A, x, y);
676}
677
685template <class ViewType1,
686 class ViewType2,
687 class ViewType3,
688 class CoefficientType,
689 class IndexType = int>
690KOKKOS_INLINE_FUNCTION void
691GEMM(const char transA[],
692 const char transB[],
693 const CoefficientType& alpha,
694 const ViewType1& A,
695 const ViewType2& B,
696 const CoefficientType& beta,
697 const ViewType3& C) {
698 // Assert that A, B, and C are in fact matrices
699 static_assert(ViewType1::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
700 static_assert(ViewType2::rank == 2, "GEMM: B must have rank 2 (be a matrix).");
701 static_assert(ViewType3::rank == 2, "GEMM: C must have rank 2 (be a matrix).");
702
703 typedef typename std::remove_reference<decltype(A(0, 0))>::type Scalar;
704#if KOKKOS_VERSION >= 40799
705 typedef KokkosKernels::ArithTraits<Scalar> STS;
706#else
707 typedef Kokkos::ArithTraits<Scalar> STS;
708#endif
709 const Scalar ZERO = STS::zero();
710 const Scalar ONE = STS::one();
711
712 // Get the dimensions
713 IndexType m, n, k;
714 if (transA[0] == 'N' || transA[0] == 'n') {
715 m = static_cast<IndexType>(A.extent(0));
716 n = static_cast<IndexType>(A.extent(1));
717 } else {
718 m = static_cast<IndexType>(A.extent(1));
719 n = static_cast<IndexType>(A.extent(0));
720 }
721 k = static_cast<IndexType>(C.extent(1));
722
723 // quick return if possible
724 if (alpha == ZERO && beta == ONE)
725 return;
726
727 // And if alpha equals zero...
728 if (alpha == ZERO) {
729 if (beta == ZERO) {
730 for (IndexType i = 0; i < m; i++) {
731 for (IndexType j = 0; j < k; j++) {
732 C(i, j) = ZERO;
733 }
734 }
735 } else {
736 for (IndexType i = 0; i < m; i++) {
737 for (IndexType j = 0; j < k; j++) {
738 C(i, j) = beta * C(i, j);
739 }
740 }
741 }
742 }
743
744 // Start the operations
745 if (transB[0] == 'n' || transB[0] == 'N') {
746 if (transA[0] == 'n' || transA[0] == 'N') {
747 // Form C = alpha*A*B + beta*C
748 for (IndexType j = 0; j < n; j++) {
749 if (beta == ZERO) {
750 for (IndexType i = 0; i < m; i++) {
751 C(i, j) = ZERO;
752 }
753 } else if (beta != ONE) {
754 for (IndexType i = 0; i < m; i++) {
755 C(i, j) = beta * C(i, j);
756 }
757 }
758 for (IndexType l = 0; l < k; l++) {
759 Scalar temp = alpha * B(l, j);
760 for (IndexType i = 0; i < m; i++) {
761 C(i, j) = C(i, j) + temp * A(i, l);
762 }
763 }
764 }
765 } else {
766 // Form C = alpha*A**T*B + beta*C
767 for (IndexType j = 0; j < n; j++) {
768 for (IndexType i = 0; i < m; i++) {
769 Scalar temp = ZERO;
770 for (IndexType l = 0; l < k; l++) {
771 temp = temp + A(l, i) * B(l, j);
772 }
773 if (beta == ZERO) {
774 C(i, j) = alpha * temp;
775 } else {
776 C(i, j) = alpha * temp + beta * C(i, j);
777 }
778 }
779 }
780 }
781 } else {
782 if (transA[0] == 'n' || transA[0] == 'N') {
783 // Form C = alpha*A*B**T + beta*C
784 for (IndexType j = 0; j < n; j++) {
785 if (beta == ZERO) {
786 for (IndexType i = 0; i < m; i++) {
787 C(i, j) = ZERO;
788 }
789 } else if (beta != ONE) {
790 for (IndexType i = 0; i < m; i++) {
791 C(i, j) = beta * C(i, j);
792 }
793 }
794 for (IndexType l = 0; l < k; l++) {
795 Scalar temp = alpha * B(j, l);
796 for (IndexType i = 0; i < m; i++) {
797 C(i, j) = C(i, j) + temp * A(i, l);
798 }
799 }
800 }
801 } else {
802 // Form C = alpha*A**T*B**T + beta*C
803 for (IndexType j = 0; j < n; j++) {
804 for (IndexType i = 0; i < m; i++) {
805 Scalar temp = ZERO;
806 for (IndexType l = 0; l < k; l++) {
807 temp = temp + A(l, i) * B(j, l);
808 }
809 if (beta == ZERO) {
810 C(i, j) = alpha * temp;
811 } else {
812 C(i, j) = alpha * temp + beta * C(i, j);
813 }
814 }
815 }
816 }
817 }
818}
819
821template <class LittleBlockType,
822 class LittleVectorType>
823KOKKOS_INLINE_FUNCTION void
825 // The type of an entry of ipiv is the index type.
826 typedef typename std::decay<decltype(ipiv(0))>::type IndexType;
827 static_assert(std::is_integral<IndexType>::value,
828 "GETF2: The type of each entry of ipiv must be an integer type.");
829 typedef typename std::remove_reference<decltype(A(0, 0))>::type Scalar;
830 static_assert(!std::is_const<Scalar>::value,
831 "GETF2: A must not be a const View (or LittleBlock).");
832 static_assert(!std::is_const<std::remove_reference<decltype(ipiv(0))>>::value,
833 "GETF2: ipiv must not be a const View (or LittleBlock).");
834 static_assert(LittleBlockType::rank == 2, "GETF2: A must have rank 2 (be a matrix).");
835#if KOKKOS_VERSION >= 40799
836 typedef KokkosKernels::ArithTraits<Scalar> STS;
837#else
838 typedef Kokkos::ArithTraits<Scalar> STS;
839#endif
840 const Scalar ZERO = STS::zero();
841
842 const IndexType numRows = static_cast<IndexType>(A.extent(0));
843 const IndexType numCols = static_cast<IndexType>(A.extent(1));
844 const IndexType pivDim = static_cast<IndexType>(ipiv.extent(0));
845
846 // std::min is not a CUDA device function
847 const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
848 if (pivDim < minPivDim) {
849 info = -2;
850 return;
851 }
852
853 // Initialize info
854 info = 0;
855
856 for (IndexType j = 0; j < pivDim; j++) {
857 // Find pivot and test for singularity
858 IndexType jp = j;
859 for (IndexType i = j + 1; i < numRows; i++) {
860 if (STS::abs(A(i, j)) > STS::abs(A(jp, j))) {
861 jp = i;
862 }
863 }
864 ipiv(j) = jp + 1;
865
866 if (A(jp, j) != ZERO) {
867 // Apply the interchange to columns 1:N
868 if (jp != j) {
869 for (IndexType i = 0; i < numCols; i++) {
870 Scalar temp = A(jp, i);
871 A(jp, i) = A(j, i);
872 A(j, i) = temp;
873 }
874 }
875
876 // Compute elements J+1:M of J-th column
877 for (IndexType i = j + 1; i < numRows; i++) {
878 A(i, j) = A(i, j) / A(j, j);
879 }
880 } else if (info == 0) {
881 info = j;
882 }
883
884 // Update trailing submatrix
885 for (IndexType r = j + 1; r < numRows; r++) {
886 for (IndexType c = j + 1; c < numCols; c++) {
887 A(r, c) = A(r, c) - A(r, j) * A(j, c);
888 }
889 }
890 }
891}
892
893namespace Impl {
894
898template <class LittleBlockType,
899 class LittleIntVectorType,
900 class LittleScalarVectorType,
901 const int rank = LittleScalarVectorType::rank>
902struct GETRS {
903 static KOKKOS_INLINE_FUNCTION void
904 run(const char mode[],
905 const LittleBlockType& A,
908 int& info);
909};
910
912template <class LittleBlockType,
916 static KOKKOS_INLINE_FUNCTION void
917 run(const char mode[],
918 const LittleBlockType& A,
921 int& info) {
922 // The type of an entry of ipiv is the index type.
923 typedef typename std::decay<decltype(ipiv(0))>::type IndexType;
924 // IndexType must be signed, because this code does a countdown loop
925 // to zero. Unsigned integers are always >= 0, even on underflow.
926 static_assert(std::is_integral<IndexType>::value &&
927 std::is_signed<IndexType>::value,
928 "GETRS: The type of each entry of ipiv must be a signed integer.");
929 typedef typename std::decay<decltype(A(0, 0))>::type Scalar;
930 static_assert(!std::is_const<std::remove_reference<decltype(B(0))>>::value,
931 "GETRS: B must not be a const View (or LittleBlock).");
932 static_assert(LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
933 static_assert(LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
934 static_assert(LittleScalarVectorType::rank == 1, "GETRS: For this specialization, B must have rank 1.");
935
936#if KOKKOS_VERSION >= 40799
937 typedef KokkosKernels::ArithTraits<Scalar> STS;
938#else
939 typedef Kokkos::ArithTraits<Scalar> STS;
940#endif
941 const Scalar ZERO = STS::zero();
942 const IndexType numRows = static_cast<IndexType>(A.extent(0));
943 const IndexType numCols = static_cast<IndexType>(A.extent(1));
944 const IndexType pivDim = static_cast<IndexType>(ipiv.extent(0));
945
946 info = 0;
947
948 // Ensure that the matrix is square
949 if (numRows != numCols) {
950 info = -2;
951 return;
952 }
953
954 // Ensure that the pivot array is sufficiently large
955 if (pivDim < numRows) {
956 info = -3;
957 return;
958 }
959
960 // No transpose case
961 if (mode[0] == 'n' || mode[0] == 'N') {
962 // Apply row interchanges to the RHS
963 for (IndexType i = 0; i < numRows; i++) {
964 if (ipiv(i) != i + 1) {
965 Scalar temp = B(i);
966 B(i) = B(ipiv(i) - 1);
967 B(ipiv(i) - 1) = temp;
968 }
969 }
970
971 // Solve Lx=b, overwriting b with x
972 for (IndexType r = 1; r < numRows; r++) {
973 for (IndexType c = 0; c < r; c++) {
974 B(r) = B(r) - A(r, c) * B(c);
975 }
976 }
977
978 // Solve Ux=b, overwriting b with x
979 for (IndexType r = numRows - 1; r >= 0; r--) {
980 // Check whether U is singular
981 if (A(r, r) == ZERO) {
982 info = r + 1;
983 return;
984 }
985
986 for (IndexType c = r + 1; c < numCols; c++) {
987 B(r) = B(r) - A(r, c) * B(c);
988 }
989 B(r) = B(r) / A(r, r);
990 }
991 }
992 // Transpose case
993 else if (mode[0] == 't' || mode[0] == 'T') {
994 info = -1; // NOT YET IMPLEMENTED
995 return;
996 }
997 // Conjugate transpose case
998 else if (mode[0] == 'c' || mode[0] == 'C') {
999 info = -1; // NOT YET IMPLEMENTED
1000 return;
1001 } else { // invalid mode
1002 info = -1;
1003 return;
1004 }
1005 }
1006};
1007
1009template <class LittleBlockType,
1010 class LittleIntVectorType,
1013 static KOKKOS_INLINE_FUNCTION void
1014 run(const char mode[],
1015 const LittleBlockType& A,
1018 int& info) {
1019 // The type of an entry of ipiv is the index type.
1020 typedef typename std::decay<decltype(ipiv(0))>::type IndexType;
1021 static_assert(std::is_integral<IndexType>::value,
1022 "GETRS: The type of each entry of ipiv must be an integer type.");
1023 static_assert(!std::is_const<std::remove_reference<decltype(B(0))>>::value,
1024 "GETRS: B must not be a const View (or LittleBlock).");
1025 static_assert(LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1026 static_assert(LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1027 static_assert(LittleScalarVectorType::rank == 2, "GETRS: For this specialization, B must have rank 2.");
1028
1029 // The current implementation iterates over one right-hand side at
1030 // a time. It might be faster to do this differently, but this
1031 // should work for now.
1032 const IndexType numRhs = B.extent(1);
1033 info = 0;
1034
1035 for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1036 auto B_cur = Kokkos::subview(B, Kokkos::ALL(), rhs);
1038 if (info != 0) {
1039 return;
1040 }
1041 }
1042 }
1043};
1044
1045} // namespace Impl
1046
1050template <class LittleBlockType,
1051 class LittleIntVectorType,
1054GETRS(const char mode[],
1055 const LittleBlockType& A,
1058 int& info) {
1060 LittleScalarVectorType::rank>::run(mode, A, ipiv, B, info);
1061}
1062
1077template <class LittleBlockType,
1078 class LittleIntVectorType,
1079 class LittleScalarVectorType>
1080KOKKOS_INLINE_FUNCTION void
1084 int& info) {
1085 // The type of an entry of ipiv is the index type.
1086 typedef typename std::decay<decltype(ipiv(0))>::type IndexType;
1087 // IndexType must be signed, because this code does a countdown loop
1088 // to zero. Unsigned integers are always >= 0, even on underflow.
1089 static_assert(std::is_integral<IndexType>::value &&
1090 std::is_signed<IndexType>::value,
1091 "GETRI: The type of each entry of ipiv must be a signed integer.");
1092 typedef typename std::remove_reference<decltype(A(0, 0))>::type Scalar;
1093 static_assert(!std::is_const<std::remove_reference<decltype(A(0, 0))>>::value,
1094 "GETRI: A must not be a const View (or LittleBlock).");
1095 static_assert(!std::is_const<std::remove_reference<decltype(work(0))>>::value,
1096 "GETRI: work must not be a const View (or LittleBlock).");
1097 static_assert(LittleBlockType::rank == 2, "GETRI: A must have rank 2 (be a matrix).");
1098#if KOKKOS_VERSION >= 40799
1099 typedef KokkosKernels::ArithTraits<Scalar> STS;
1100#else
1101 typedef Kokkos::ArithTraits<Scalar> STS;
1102#endif
1103 const Scalar ZERO = STS::zero();
1104 const Scalar ONE = STS::one();
1105
1106 const IndexType numRows = static_cast<IndexType>(A.extent(0));
1107 const IndexType numCols = static_cast<IndexType>(A.extent(1));
1108 const IndexType pivDim = static_cast<IndexType>(ipiv.extent(0));
1109 const IndexType workDim = static_cast<IndexType>(work.extent(0));
1110
1111 info = 0;
1112
1113 // Ensure that the matrix is square
1114 if (numRows != numCols) {
1115 info = -1;
1116 return;
1117 }
1118
1119 // Ensure that the pivot array is sufficiently large
1120 if (pivDim < numRows) {
1121 info = -2;
1122 return;
1123 }
1124
1125 // Ensure that the work array is sufficiently large
1126 if (workDim < numRows) {
1127 info = -3;
1128 return;
1129 }
1130
1131 // Form Uinv in place
1132 for (IndexType j = 0; j < numRows; j++) {
1133 if (A(j, j) == ZERO) {
1134 info = j + 1;
1135 return;
1136 }
1137
1138 A(j, j) = ONE / A(j, j);
1139
1140 // Compute elements 1:j-1 of j-th column
1141 for (IndexType r = 0; r < j; r++) {
1142 A(r, j) = A(r, r) * A(r, j);
1143 for (IndexType c = r + 1; c < j; c++) {
1144 A(r, j) = A(r, j) + A(r, c) * A(c, j);
1145 }
1146 }
1147 for (IndexType r = 0; r < j; r++) {
1148 A(r, j) = -A(j, j) * A(r, j);
1149 }
1150 }
1151
1152 // Compute Ainv by solving A\L = Uinv
1153 for (IndexType j = numCols - 2; j >= 0; j--) {
1154 // Copy lower triangular data to work array and replace with 0
1155 for (IndexType r = j + 1; r < numRows; r++) {
1156 work(r) = A(r, j);
1157 A(r, j) = 0;
1158 }
1159
1160 for (IndexType r = 0; r < numRows; r++) {
1161 for (IndexType i = j + 1; i < numRows; i++) {
1162 A(r, j) = A(r, j) - work(i) * A(r, i);
1163 }
1164 }
1165 }
1166
1167 // Apply column interchanges
1168 for (IndexType j = numRows - 1; j >= 0; j--) {
1169 IndexType jp = ipiv(j) - 1;
1170 if (j != jp) {
1171 for (IndexType r = 0; r < numRows; r++) {
1172 Scalar temp = A(r, j);
1173 A(r, j) = A(r, jp);
1174 A(r, jp) = temp;
1175 }
1176 }
1177 }
1178}
1179
1180// mfh 08 Nov 2015: I haven't tested this overload yet. It also needs
1181// an implementation for trans != 'N' (the transpose and conjugate
1182// transpose cases).
1183#if 0
1184template<class LittleBlockType,
1185 class LittleVectorTypeX,
1186 class LittleVectorTypeY,
1187 class CoefficientType,
1188 class IndexType = int>
1189KOKKOS_INLINE_FUNCTION void
1190GEMV (const char trans,
1191 const CoefficientType& alpha,
1192 const LittleBlockType& A,
1193 const LittleVectorTypeX& x,
1194 const CoefficientType& beta,
1195 const LittleVectorTypeY& y)
1196{
1197 // y(0) returns a reference to the 0-th entry of y. Remove that
1198 // reference to get the type of each entry of y. It's OK if y has
1199 // zero entries -- this doesn't actually do y(i), it just returns
1200 // the type of that expression.
1201 typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1202 const IndexType numRows = static_cast<IndexType> (A.extent (0));
1203 const IndexType numCols = static_cast<IndexType> (A.extent (1));
1204
1205 if (beta == 0.0) {
1206 if (alpha == 0.0) {
1207 for (IndexType i = 0; i < numRows; ++i) {
1208 y(i) = 0.0;
1209 }
1210 }
1211 else {
1212 for (IndexType i = 0; i < numRows; ++i) {
1213 y_value_type y_i = 0.0;
1214 for (IndexType j = 0; j < numCols; ++j) {
1215 y_i += A(i,j) * x(j);
1216 }
1217 y(i) = y_i;
1218 }
1219 }
1220 }
1221 else { // beta != 0
1222 if (alpha == 0.0) {
1223 if (beta == 0.0) {
1224 for (IndexType i = 0; i < numRows; ++i) {
1225 y(i) = 0.0;
1226 }
1227 }
1228 else {
1229 for (IndexType i = 0; i < numRows; ++i) {
1230 y(i) *= beta;
1231 }
1232 }
1233 }
1234 else {
1235 for (IndexType i = 0; i < numRows; ++i) {
1236 y_value_type y_i = beta * y(i);
1237 for (IndexType j = 0; j < numCols; ++j) {
1238 y_i += alpha * A(i,j) * x(j);
1239 }
1240 y(i) = y_i;
1241 }
1242 }
1243 }
1244}
1245
1246#endif // 0
1247
1248} // namespace Tpetra
1249
1250#endif // TPETRA_BLOCKVIEW_HPP
Struct that holds views of the contents of a CrsMatrix.
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
@ ZERO
Replace old values with zero.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra::AXPY function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra::COPY function.
Implementation of Tpetra::FILL function.
Computes the solution to Ax=b.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Implementation of Tpetra::SCAL function.