Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_BlockView.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_BLOCKVIEW_HPP
11#define TPETRA_BLOCKVIEW_HPP
12
20
21#include "TpetraCore_config.h"
22#include "KokkosKernels_ArithTraits.hpp"
23#include "Kokkos_Complex.hpp"
24
25namespace Tpetra {
26
31
32namespace Impl {
33
40template <class ViewType1,
41 class ViewType2,
42 const int rank1 = ViewType1::rank>
43struct AbsMax {
44 static KOKKOS_INLINE_FUNCTION void
45 run(const ViewType2& Y, const ViewType1& X);
46};
47
52template <class ViewType1,
53 class ViewType2>
57 static KOKKOS_INLINE_FUNCTION void
58 run(const ViewType2& Y, const ViewType1& X) {
59 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
60 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
61 typedef typename std::remove_reference<decltype(Y(0, 0))>::type STY;
62 static_assert(!std::is_const<STY>::value,
63 "AbsMax: The type of each entry of Y must be nonconst.");
64 typedef typename std::decay<decltype(X(0, 0))>::type STX;
65 static_assert(std::is_same<STX, STY>::value,
66 "AbsMax: The type of each entry of X and Y must be the same.");
67 typedef KokkosKernels::ArithTraits<STY> KAT;
68
69 const int numCols = Y.extent(1);
70 const int numRows = Y.extent(0);
71 for (int j = 0; j < numCols; ++j) {
72 for (int i = 0; i < numRows; ++i) {
73 STY& Y_ij = Y(i, j); // use ref here to avoid 2nd op() call on Y
74 const STX X_ij = X(i, j);
75 // NOTE: no std::max (not a CUDA __device__ function); must
76 // cast back up to complex.
77 const auto Y_ij_abs = KAT::abs(Y_ij);
78 const auto X_ij_abs = KAT::abs(X_ij);
79 Y_ij = (Y_ij_abs >= X_ij_abs) ? static_cast<STY>(Y_ij_abs) : static_cast<STY>(X_ij_abs);
80 }
81 }
82 }
83};
84
89template <class ViewType1,
90 class ViewType2>
94 static KOKKOS_INLINE_FUNCTION void
95 run(const ViewType2& Y, const ViewType1& X) {
96 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
97 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
98
99 typedef typename std::remove_reference<decltype(Y(0))>::type STY;
100 static_assert(!std::is_const<STY>::value,
101 "AbsMax: The type of each entry of Y must be nonconst.");
102 typedef typename std::remove_const<typename std::remove_reference<decltype(X(0))>::type>::type STX;
103 static_assert(std::is_same<STX, STY>::value,
104 "AbsMax: The type of each entry of X and Y must be the same.");
105 typedef KokkosKernels::ArithTraits<STY> KAT;
106
107 const int numRows = Y.extent(0);
108 for (int i = 0; i < numRows; ++i) {
109 STY& Y_i = Y(i); // use ref here to avoid 2nd op() call on Y
110 const STX X_i = X(i);
111 // NOTE: no std::max (not a CUDA __device__ function); must
112 // cast back up to complex.
113 const auto Y_i_abs = KAT::abs(Y_i);
114 const auto X_i_abs = KAT::abs(X_i);
115 Y_i = (Y_i_abs >= X_i_abs) ? static_cast<STY>(Y_i_abs) : static_cast<STY>(X_i_abs);
116 }
117 }
118};
119
126template <class ViewType1, class ViewType2, const int rank = ViewType1::rank>
128absMax(const ViewType2& Y, const ViewType1& X) {
129 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
130 "absMax: ViewType1 and ViewType2 must have the same rank.");
132}
133
138template <class ViewType,
139 class CoefficientType,
140 class IndexType = int,
141 const bool is_contiguous = false,
142 const int rank = ViewType::rank>
143struct SCAL {
144 static KOKKOS_INLINE_FUNCTION void
145 run(const CoefficientType& alpha, const ViewType& x);
146};
147
150template <class ViewType,
151 class CoefficientType,
152 class IndexType>
155 static KOKKOS_INLINE_FUNCTION void
157 const IndexType numRows = static_cast<IndexType>(x.extent(0));
158
160#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
161#pragma unroll
162#endif
163 for (IndexType i = 0; i < numRows; ++i)
164 x(i) = alpha * x(i);
165 }
166};
169template <class ViewType,
170 class CoefficientType,
171 class IndexType>
174 static KOKKOS_INLINE_FUNCTION void
176 const IndexType numRows = static_cast<IndexType>(A.extent(0));
177 const IndexType numCols = static_cast<IndexType>(A.extent(1));
178
179 for (IndexType j = 0; j < numCols; ++j)
180 for (IndexType i = 0; i < numRows; ++i)
181 A(i, j) = alpha * A(i, j);
182 }
183};
184template <class ViewType,
185 class CoefficientType,
186 class IndexType,
187 const int rank>
190 static KOKKOS_INLINE_FUNCTION void
191 run(const CoefficientType& alpha, const ViewType& x) {
192 using x_value_type = typename std::decay<decltype(*x.data())>::type;
193 const IndexType span = static_cast<IndexType>(x.span());
195#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
196#pragma unroll
197#endif
198 for (IndexType i = 0; i < span; ++i)
199 x_ptr[i] = alpha * x_ptr[i];
200 }
201};
202
207template <class ViewType,
208 class InputType,
209 class IndexType = int,
210 const bool is_contiguous = false,
211 const int rank = ViewType::rank>
212struct FILL {
213 static KOKKOS_INLINE_FUNCTION void
214 run(const ViewType& x, const InputType& val);
215};
216
219template <class ViewType,
220 class InputType,
221 class IndexType>
223 static KOKKOS_INLINE_FUNCTION void
224 run(const ViewType& x, const InputType& val) {
225 const IndexType numRows = static_cast<IndexType>(x.extent(0));
226 for (IndexType i = 0; i < numRows; ++i)
227 x(i) = val;
228 }
229};
232template <class ViewType,
233 class InputType,
234 class IndexType>
236 static KOKKOS_INLINE_FUNCTION void
237 run(const ViewType& X, const InputType& val) {
238 const IndexType numRows = static_cast<IndexType>(X.extent(0));
239 const IndexType numCols = static_cast<IndexType>(X.extent(1));
240 for (IndexType j = 0; j < numCols; ++j)
241 for (IndexType i = 0; i < numRows; ++i)
242 X(i, j) = val;
243 }
244};
245template <class ViewType,
246 class InputType,
247 class IndexType,
248 const int rank>
250 static KOKKOS_INLINE_FUNCTION void
251 run(const ViewType& x, const InputType& val) {
252 const IndexType span = static_cast<IndexType>(x.span());
253 auto x_ptr = x.data();
254#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
255#pragma unroll
256#endif
257 for (IndexType i = 0; i < span; ++i)
258 x_ptr[i] = val;
259 }
260};
261
266template <class CoefficientType,
267 class ViewType1,
268 class ViewType2,
269 class IndexType = int,
270 const bool is_contiguous = false,
271 const int rank = ViewType1::rank>
272struct AXPY {
273 static KOKKOS_INLINE_FUNCTION void
274 run(const CoefficientType& alpha,
275 const ViewType1& x,
276 const ViewType2& y);
277};
278
281template <class CoefficientType,
282 class ViewType1,
283 class ViewType2,
284 class IndexType>
287 static KOKKOS_INLINE_FUNCTION void
289 const ViewType1& x,
290 const ViewType2& y) {
291 using KokkosKernels::ArithTraits;
292 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
293 "AXPY: x and y must have the same rank.");
294
295 const IndexType numRows = static_cast<IndexType>(y.extent(0));
298 for (IndexType i = 0; i < numRows; ++i)
299 y(i) += alpha * x(i);
300 }
301 }
302};
303
306template <class CoefficientType,
307 class ViewType1,
308 class ViewType2,
309 class IndexType>
312 static KOKKOS_INLINE_FUNCTION void
314 const ViewType1& X,
315 const ViewType2& Y) {
316 using KokkosKernels::ArithTraits;
317 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
318 "AXPY: X and Y must have the same rank.");
319 const IndexType numRows = static_cast<IndexType>(Y.extent(0));
320 const IndexType numCols = static_cast<IndexType>(Y.extent(1));
321
323 for (IndexType j = 0; j < numCols; ++j)
324 for (IndexType i = 0; i < numRows; ++i)
325 Y(i, j) += alpha * X(i, j);
326 }
327 }
328};
329
330template <class CoefficientType,
331 class ViewType1,
332 class ViewType2,
333 class IndexType,
334 const int rank>
337 static KOKKOS_INLINE_FUNCTION void
338 run(const CoefficientType& alpha,
339 const ViewType1& x,
340 const ViewType2& y) {
341 using KokkosKernels::ArithTraits;
342 static_assert(static_cast<int>(ViewType1::rank) == static_cast<int>(ViewType2::rank),
343 "AXPY: x and y must have the same rank.");
344
346 using x_value_type = typename std::decay<decltype(*x.data())>::type;
347 using y_value_type = typename std::decay<decltype(*y.data())>::type;
348 const IndexType span = static_cast<IndexType>(y.span());
349 const x_value_type* __restrict x_ptr(x.data());
351#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
352#pragma unroll
353#endif
354 for (IndexType i = 0; i < span; ++i)
355 y_ptr[i] += alpha * x_ptr[i];
356 }
357 }
358};
359
364template <class ViewType1,
365 class ViewType2,
366 class IndexType = int,
367 const bool is_contiguous = false,
368 const int rank = ViewType1::rank>
369struct COPY {
370 static KOKKOS_INLINE_FUNCTION void
371 run(const ViewType1& x, const ViewType2& y);
372};
373
376template <class ViewType1,
377 class ViewType2,
378 class IndexType>
381 static KOKKOS_INLINE_FUNCTION void
382 run(const ViewType1& x, const ViewType2& y) {
383 const IndexType numRows = static_cast<IndexType>(x.extent(0));
385 for (IndexType i = 0; i < numRows; ++i)
386 y(i) = x(i);
387 }
388};
389
392template <class ViewType1,
393 class ViewType2,
394 class IndexType>
397 static KOKKOS_INLINE_FUNCTION void
398 run(const ViewType1& X, const ViewType2& Y) {
399 const IndexType numRows = static_cast<IndexType>(Y.extent(0));
400 const IndexType numCols = static_cast<IndexType>(Y.extent(1));
402 for (IndexType j = 0; j < numCols; ++j)
403 for (IndexType i = 0; i < numRows; ++i)
404 Y(i, j) = X(i, j);
405 }
406};
407
408template <class ViewType1,
409 class ViewType2,
410 class IndexType,
411 const int rank>
414 static KOKKOS_INLINE_FUNCTION void
415 run(const ViewType1& x, const ViewType2& y) {
416 const IndexType span = static_cast<IndexType>(x.span());
417 using x_value_type = typename std::decay<decltype(*x.data())>::type;
418 using y_value_type = typename std::decay<decltype(*y.data())>::type;
419 const x_value_type* __restrict x_ptr(x.data());
421
422#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
423#pragma unroll
424#endif
425 for (IndexType i = 0; i < span; ++i)
426 y_ptr[i] = x_ptr[i];
427 }
428};
429
430template <class VecType1,
431 class BlkType,
432 class VecType2,
433 class CoeffType,
434 class IndexType = int,
435 bool is_contiguous = false,
436 class BlkLayoutType = typename BlkType::array_layout>
437struct GEMV {
438 static KOKKOS_INLINE_FUNCTION void
439 run(const CoeffType& alpha,
440 const BlkType& A,
441 const VecType1& x,
442 const VecType2& y) {
443 static_assert(VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
444 static_assert(BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
445 static_assert(VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
446
447 const IndexType numRows = static_cast<IndexType>(A.extent(0));
448 const IndexType numCols = static_cast<IndexType>(A.extent(1));
449
451 for (IndexType i = 0; i < numRows; ++i)
452 for (IndexType j = 0; j < numCols; ++j)
453 y(i) += alpha * A(i, j) * x(j);
454 }
455};
456
457template <class VecType1,
458 class BlkType,
459 class VecType2,
460 class CoeffType,
461 class IndexType>
462struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, true, Kokkos::LayoutLeft> {
463 static KOKKOS_INLINE_FUNCTION void
464 run(const CoeffType& alpha,
465 const BlkType& A,
466 const VecType1& x,
467 const VecType2& y) {
468 static_assert(VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
469 static_assert(BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
470 static_assert(VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
471
472 using A_value_type = typename std::decay<decltype(A(0, 0))>::type;
473 using x_value_type = typename std::decay<decltype(x(0))>::type;
474 using y_value_type = typename std::decay<decltype(y(0))>::type;
475
476 const IndexType numRows = static_cast<IndexType>(A.extent(0));
477 const IndexType numCols = static_cast<IndexType>(A.extent(1));
478
479 const A_value_type* __restrict A_ptr(A.data());
480 const IndexType as1(A.stride(1));
481 const x_value_type* __restrict x_ptr(x.data());
482 y_value_type* __restrict y_ptr(y.data());
483
484#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
485#pragma unroll
486#endif
487 for (IndexType j = 0; j < numCols; ++j) {
488 const x_value_type x_at_j = alpha * x_ptr[j];
489 const A_value_type* __restrict A_at_j = A_ptr + j * as1;
490#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
491#pragma unroll
492#endif
493 for (IndexType i = 0; i < numRows; ++i)
494 y_ptr[i] += A_at_j[i] * x_at_j;
495 }
496 }
497};
498
499template <class VecType1,
500 class BlkType,
501 class VecType2,
502 class CoeffType,
503 class IndexType>
504struct GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, true, Kokkos::LayoutRight> {
505 static KOKKOS_INLINE_FUNCTION void
506 run(const CoeffType& alpha,
507 const BlkType& A,
508 const VecType1& x,
509 const VecType2& y) {
510 static_assert(VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
511 static_assert(BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
512 static_assert(VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
513
514 using A_value_type = typename std::decay<decltype(A(0, 0))>::type;
515 using x_value_type = typename std::decay<decltype(x(0))>::type;
516 using y_value_type = typename std::decay<decltype(y(0))>::type;
517
518 const IndexType numRows = static_cast<IndexType>(A.extent(0));
519 const IndexType numCols = static_cast<IndexType>(A.extent(1));
520
521 const A_value_type* __restrict A_ptr(A.data());
522 const IndexType as0(A.stride(0));
523 const x_value_type* __restrict x_ptr(x.data());
524 y_value_type* __restrict y_ptr(y.data());
525
526#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
527#pragma unroll
528#endif
529 for (IndexType i = 0; i < numRows; ++i) {
530 y_value_type y_at_i(0);
531 const auto A_at_i = A_ptr + i * as0;
532#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
533#pragma unroll
534#endif
535 for (IndexType j = 0; j < numCols; ++j)
536 y_at_i += A_at_i[j] * x_ptr[j];
537 y_ptr[i] += alpha * y_at_i;
538 }
539 }
540};
541
542} // namespace Impl
543
546template <class ViewType,
547 class CoefficientType,
548 class IndexType = int,
549 const int rank = ViewType::rank>
550KOKKOS_INLINE_FUNCTION void
552 using LayoutType = typename ViewType::array_layout;
553 constexpr bool is_contiguous = (std::is_same<LayoutType, Kokkos::LayoutLeft>::value ||
554 std::is_same<LayoutType, Kokkos::LayoutRight>::value);
556}
557
559template <class ViewType,
560 class InputType,
561 class IndexType = int,
562 const int rank = ViewType::rank>
563KOKKOS_INLINE_FUNCTION void
564FILL(const ViewType& x, const InputType& val) {
565 using LayoutType = typename ViewType::array_layout;
566 constexpr bool is_contiguous = (std::is_same<LayoutType, Kokkos::LayoutLeft>::value ||
567 std::is_same<LayoutType, Kokkos::LayoutRight>::value);
569}
570
576template <class CoefficientType,
577 class ViewType1,
578 class ViewType2,
579 class IndexType = int,
580 const int rank = ViewType1::rank>
581KOKKOS_INLINE_FUNCTION void
583 const ViewType1& x,
584 const ViewType2& y) {
585 static_assert(static_cast<int>(ViewType1::rank) ==
586 static_cast<int>(ViewType2::rank),
587 "AXPY: x and y must have the same rank.");
588 using LayoutType1 = typename ViewType1::array_layout;
589 using LayoutType2 = typename ViewType2::array_layout;
590 constexpr bool is_layout_same = std::is_same<LayoutType1, LayoutType2>::value;
591 constexpr bool is_x_contiguous = (std::is_same<LayoutType1, Kokkos::LayoutLeft>::value ||
592 std::is_same<LayoutType1, Kokkos::LayoutRight>::value);
593 constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
595}
596
605template <class ViewType1,
606 class ViewType2,
607 class IndexType = int,
608 const int rank = ViewType1::rank>
609KOKKOS_INLINE_FUNCTION void
610COPY(const ViewType1& x, const ViewType2& y) {
611 static_assert(static_cast<int>(ViewType1::rank) ==
612 static_cast<int>(ViewType2::rank),
613 "COPY: x and y must have the same rank.");
614 using LayoutType1 = typename ViewType1::array_layout;
615 using LayoutType2 = typename ViewType2::array_layout;
616 constexpr bool is_layout_same = std::is_same<LayoutType1, LayoutType2>::value;
617 constexpr bool is_x_contiguous = (std::is_same<LayoutType1, Kokkos::LayoutLeft>::value ||
618 std::is_same<LayoutType1, Kokkos::LayoutRight>::value);
619 constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
621}
622
634template <class VecType1,
635 class BlkType,
636 class VecType2,
637 class CoeffType,
638 class IndexType = int>
639KOKKOS_INLINE_FUNCTION void
641 const BlkType& A,
642 const VecType1& x,
643 const VecType2& y) {
644 constexpr bool is_A_contiguous = (std::is_same<typename BlkType::array_layout, Kokkos::LayoutLeft>::value ||
645 std::is_same<typename BlkType::array_layout, Kokkos::LayoutRight>::value);
646 constexpr bool is_x_contiguous = (std::is_same<typename VecType1::array_layout, Kokkos::LayoutLeft>::value ||
647 std::is_same<typename VecType1::array_layout, Kokkos::LayoutRight>::value);
648 constexpr bool is_y_contiguous = (std::is_same<typename VecType2::array_layout, Kokkos::LayoutLeft>::value ||
649 std::is_same<typename VecType2::array_layout, Kokkos::LayoutRight>::value);
651 Impl::GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, is_contiguous>::run(alpha, A, x, y);
652}
653
661template <class ViewType1,
662 class ViewType2,
663 class ViewType3,
664 class CoefficientType,
665 class IndexType = int>
666KOKKOS_INLINE_FUNCTION void
667GEMM(const char transA[],
668 const char transB[],
669 const CoefficientType& alpha,
670 const ViewType1& A,
671 const ViewType2& B,
672 const CoefficientType& beta,
673 const ViewType3& C) {
674 // Assert that A, B, and C are in fact matrices
675 static_assert(ViewType1::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
676 static_assert(ViewType2::rank == 2, "GEMM: B must have rank 2 (be a matrix).");
677 static_assert(ViewType3::rank == 2, "GEMM: C must have rank 2 (be a matrix).");
678
679 typedef typename std::remove_reference<decltype(A(0, 0))>::type Scalar;
680 typedef KokkosKernels::ArithTraits<Scalar> STS;
681 const Scalar ZERO = STS::zero();
682 const Scalar ONE = STS::one();
683
684 // Get the dimensions
685 IndexType m, n, k;
686 if (transA[0] == 'N' || transA[0] == 'n') {
687 m = static_cast<IndexType>(A.extent(0));
688 n = static_cast<IndexType>(A.extent(1));
689 } else {
690 m = static_cast<IndexType>(A.extent(1));
691 n = static_cast<IndexType>(A.extent(0));
692 }
693 k = static_cast<IndexType>(C.extent(1));
694
695 // quick return if possible
696 if (alpha == ZERO && beta == ONE)
697 return;
698
699 // And if alpha equals zero...
700 if (alpha == ZERO) {
701 if (beta == ZERO) {
702 for (IndexType i = 0; i < m; i++) {
703 for (IndexType j = 0; j < k; j++) {
704 C(i, j) = ZERO;
705 }
706 }
707 } else {
708 for (IndexType i = 0; i < m; i++) {
709 for (IndexType j = 0; j < k; j++) {
710 C(i, j) = beta * C(i, j);
711 }
712 }
713 }
714 }
715
716 // Start the operations
717 if (transB[0] == 'n' || transB[0] == 'N') {
718 if (transA[0] == 'n' || transA[0] == 'N') {
719 // Form C = alpha*A*B + beta*C
720 for (IndexType j = 0; j < n; j++) {
721 if (beta == ZERO) {
722 for (IndexType i = 0; i < m; i++) {
723 C(i, j) = ZERO;
724 }
725 } else if (beta != ONE) {
726 for (IndexType i = 0; i < m; i++) {
727 C(i, j) = beta * C(i, j);
728 }
729 }
730 for (IndexType l = 0; l < k; l++) {
731 Scalar temp = alpha * B(l, j);
732 for (IndexType i = 0; i < m; i++) {
733 C(i, j) = C(i, j) + temp * A(i, l);
734 }
735 }
736 }
737 } else {
738 // Form C = alpha*A**T*B + beta*C
739 for (IndexType j = 0; j < n; j++) {
740 for (IndexType i = 0; i < m; i++) {
741 Scalar temp = ZERO;
742 for (IndexType l = 0; l < k; l++) {
743 temp = temp + A(l, i) * B(l, j);
744 }
745 if (beta == ZERO) {
746 C(i, j) = alpha * temp;
747 } else {
748 C(i, j) = alpha * temp + beta * C(i, j);
749 }
750 }
751 }
752 }
753 } else {
754 if (transA[0] == 'n' || transA[0] == 'N') {
755 // Form C = alpha*A*B**T + beta*C
756 for (IndexType j = 0; j < n; j++) {
757 if (beta == ZERO) {
758 for (IndexType i = 0; i < m; i++) {
759 C(i, j) = ZERO;
760 }
761 } else if (beta != ONE) {
762 for (IndexType i = 0; i < m; i++) {
763 C(i, j) = beta * C(i, j);
764 }
765 }
766 for (IndexType l = 0; l < k; l++) {
767 Scalar temp = alpha * B(j, l);
768 for (IndexType i = 0; i < m; i++) {
769 C(i, j) = C(i, j) + temp * A(i, l);
770 }
771 }
772 }
773 } else {
774 // Form C = alpha*A**T*B**T + beta*C
775 for (IndexType j = 0; j < n; j++) {
776 for (IndexType i = 0; i < m; i++) {
777 Scalar temp = ZERO;
778 for (IndexType l = 0; l < k; l++) {
779 temp = temp + A(l, i) * B(j, l);
780 }
781 if (beta == ZERO) {
782 C(i, j) = alpha * temp;
783 } else {
784 C(i, j) = alpha * temp + beta * C(i, j);
785 }
786 }
787 }
788 }
789 }
790}
791
793template <class LittleBlockType,
794 class LittleVectorType>
795KOKKOS_INLINE_FUNCTION void
797 // The type of an entry of ipiv is the index type.
798 typedef typename std::decay<decltype(ipiv(0))>::type IndexType;
799 static_assert(std::is_integral<IndexType>::value,
800 "GETF2: The type of each entry of ipiv must be an integer type.");
801 typedef typename std::remove_reference<decltype(A(0, 0))>::type Scalar;
802 static_assert(!std::is_const<Scalar>::value,
803 "GETF2: A must not be a const View (or LittleBlock).");
804 static_assert(!std::is_const<std::remove_reference<decltype(ipiv(0))>>::value,
805 "GETF2: ipiv must not be a const View (or LittleBlock).");
806 static_assert(LittleBlockType::rank == 2, "GETF2: A must have rank 2 (be a matrix).");
807 typedef KokkosKernels::ArithTraits<Scalar> STS;
808 const Scalar ZERO = STS::zero();
809
810 const IndexType numRows = static_cast<IndexType>(A.extent(0));
811 const IndexType numCols = static_cast<IndexType>(A.extent(1));
812 const IndexType pivDim = static_cast<IndexType>(ipiv.extent(0));
813
814 // std::min is not a CUDA device function
815 const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
816 if (pivDim < minPivDim) {
817 info = -2;
818 return;
819 }
820
821 // Initialize info
822 info = 0;
823
824 for (IndexType j = 0; j < pivDim; j++) {
825 // Find pivot and test for singularity
826 IndexType jp = j;
827 for (IndexType i = j + 1; i < numRows; i++) {
828 if (STS::abs(A(i, j)) > STS::abs(A(jp, j))) {
829 jp = i;
830 }
831 }
832 ipiv(j) = jp + 1;
833
834 if (A(jp, j) != ZERO) {
835 // Apply the interchange to columns 1:N
836 if (jp != j) {
837 for (IndexType i = 0; i < numCols; i++) {
838 Scalar temp = A(jp, i);
839 A(jp, i) = A(j, i);
840 A(j, i) = temp;
841 }
842 }
843
844 // Compute elements J+1:M of J-th column
845 for (IndexType i = j + 1; i < numRows; i++) {
846 A(i, j) = A(i, j) / A(j, j);
847 }
848 } else if (info == 0) {
849 info = j;
850 }
851
852 // Update trailing submatrix
853 for (IndexType r = j + 1; r < numRows; r++) {
854 for (IndexType c = j + 1; c < numCols; c++) {
855 A(r, c) = A(r, c) - A(r, j) * A(j, c);
856 }
857 }
858 }
859}
860
861namespace Impl {
862
866template <class LittleBlockType,
867 class LittleIntVectorType,
868 class LittleScalarVectorType,
869 const int rank = LittleScalarVectorType::rank>
870struct GETRS {
871 static KOKKOS_INLINE_FUNCTION void
872 run(const char mode[],
873 const LittleBlockType& A,
876 int& info);
877};
878
880template <class LittleBlockType,
884 static KOKKOS_INLINE_FUNCTION void
885 run(const char mode[],
886 const LittleBlockType& A,
889 int& info) {
890 // The type of an entry of ipiv is the index type.
891 typedef typename std::decay<decltype(ipiv(0))>::type IndexType;
892 // IndexType must be signed, because this code does a countdown loop
893 // to zero. Unsigned integers are always >= 0, even on underflow.
894 static_assert(std::is_integral<IndexType>::value &&
895 std::is_signed<IndexType>::value,
896 "GETRS: The type of each entry of ipiv must be a signed integer.");
897 typedef typename std::decay<decltype(A(0, 0))>::type Scalar;
898 static_assert(!std::is_const<std::remove_reference<decltype(B(0))>>::value,
899 "GETRS: B must not be a const View (or LittleBlock).");
900 static_assert(LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
901 static_assert(LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
902 static_assert(LittleScalarVectorType::rank == 1, "GETRS: For this specialization, B must have rank 1.");
903
904 typedef KokkosKernels::ArithTraits<Scalar> STS;
905 const Scalar ZERO = STS::zero();
906 const IndexType numRows = static_cast<IndexType>(A.extent(0));
907 const IndexType numCols = static_cast<IndexType>(A.extent(1));
908 const IndexType pivDim = static_cast<IndexType>(ipiv.extent(0));
909
910 info = 0;
911
912 // Ensure that the matrix is square
913 if (numRows != numCols) {
914 info = -2;
915 return;
916 }
917
918 // Ensure that the pivot array is sufficiently large
919 if (pivDim < numRows) {
920 info = -3;
921 return;
922 }
923
924 // No transpose case
925 if (mode[0] == 'n' || mode[0] == 'N') {
926 // Apply row interchanges to the RHS
927 for (IndexType i = 0; i < numRows; i++) {
928 if (ipiv(i) != i + 1) {
929 Scalar temp = B(i);
930 B(i) = B(ipiv(i) - 1);
931 B(ipiv(i) - 1) = temp;
932 }
933 }
934
935 // Solve Lx=b, overwriting b with x
936 for (IndexType r = 1; r < numRows; r++) {
937 for (IndexType c = 0; c < r; c++) {
938 B(r) = B(r) - A(r, c) * B(c);
939 }
940 }
941
942 // Solve Ux=b, overwriting b with x
943 for (IndexType r = numRows - 1; r >= 0; r--) {
944 // Check whether U is singular
945 if (A(r, r) == ZERO) {
946 info = r + 1;
947 return;
948 }
949
950 for (IndexType c = r + 1; c < numCols; c++) {
951 B(r) = B(r) - A(r, c) * B(c);
952 }
953 B(r) = B(r) / A(r, r);
954 }
955 }
956 // Transpose case
957 else if (mode[0] == 't' || mode[0] == 'T') {
958 info = -1; // NOT YET IMPLEMENTED
959 return;
960 }
961 // Conjugate transpose case
962 else if (mode[0] == 'c' || mode[0] == 'C') {
963 info = -1; // NOT YET IMPLEMENTED
964 return;
965 } else { // invalid mode
966 info = -1;
967 return;
968 }
969 }
970};
971
973template <class LittleBlockType,
977 static KOKKOS_INLINE_FUNCTION void
978 run(const char mode[],
979 const LittleBlockType& A,
982 int& info) {
983 // The type of an entry of ipiv is the index type.
984 typedef typename std::decay<decltype(ipiv(0))>::type IndexType;
985 static_assert(std::is_integral<IndexType>::value,
986 "GETRS: The type of each entry of ipiv must be an integer type.");
987 static_assert(!std::is_const<std::remove_reference<decltype(B(0))>>::value,
988 "GETRS: B must not be a const View (or LittleBlock).");
989 static_assert(LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
990 static_assert(LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
991 static_assert(LittleScalarVectorType::rank == 2, "GETRS: For this specialization, B must have rank 2.");
992
993 // The current implementation iterates over one right-hand side at
994 // a time. It might be faster to do this differently, but this
995 // should work for now.
996 const IndexType numRhs = B.extent(1);
997 info = 0;
998
999 for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1000 auto B_cur = Kokkos::subview(B, Kokkos::ALL(), rhs);
1002 if (info != 0) {
1003 return;
1004 }
1005 }
1006 }
1007};
1008
1009} // namespace Impl
1010
1014template <class LittleBlockType,
1015 class LittleIntVectorType,
1018GETRS(const char mode[],
1019 const LittleBlockType& A,
1022 int& info) {
1024 LittleScalarVectorType::rank>::run(mode, A, ipiv, B, info);
1025}
1026
1041template <class LittleBlockType,
1042 class LittleIntVectorType,
1043 class LittleScalarVectorType>
1044KOKKOS_INLINE_FUNCTION void
1048 int& info) {
1049 // The type of an entry of ipiv is the index type.
1050 typedef typename std::decay<decltype(ipiv(0))>::type IndexType;
1051 // IndexType must be signed, because this code does a countdown loop
1052 // to zero. Unsigned integers are always >= 0, even on underflow.
1053 static_assert(std::is_integral<IndexType>::value &&
1054 std::is_signed<IndexType>::value,
1055 "GETRI: The type of each entry of ipiv must be a signed integer.");
1056 typedef typename std::remove_reference<decltype(A(0, 0))>::type Scalar;
1057 static_assert(!std::is_const<std::remove_reference<decltype(A(0, 0))>>::value,
1058 "GETRI: A must not be a const View (or LittleBlock).");
1059 static_assert(!std::is_const<std::remove_reference<decltype(work(0))>>::value,
1060 "GETRI: work must not be a const View (or LittleBlock).");
1061 static_assert(LittleBlockType::rank == 2, "GETRI: A must have rank 2 (be a matrix).");
1062 typedef KokkosKernels::ArithTraits<Scalar> STS;
1063 const Scalar ZERO = STS::zero();
1064 const Scalar ONE = STS::one();
1065
1066 const IndexType numRows = static_cast<IndexType>(A.extent(0));
1067 const IndexType numCols = static_cast<IndexType>(A.extent(1));
1068 const IndexType pivDim = static_cast<IndexType>(ipiv.extent(0));
1069 const IndexType workDim = static_cast<IndexType>(work.extent(0));
1070
1071 info = 0;
1072
1073 // Ensure that the matrix is square
1074 if (numRows != numCols) {
1075 info = -1;
1076 return;
1077 }
1078
1079 // Ensure that the pivot array is sufficiently large
1080 if (pivDim < numRows) {
1081 info = -2;
1082 return;
1083 }
1084
1085 // Ensure that the work array is sufficiently large
1086 if (workDim < numRows) {
1087 info = -3;
1088 return;
1089 }
1090
1091 // Form Uinv in place
1092 for (IndexType j = 0; j < numRows; j++) {
1093 if (A(j, j) == ZERO) {
1094 info = j + 1;
1095 return;
1096 }
1097
1098 A(j, j) = ONE / A(j, j);
1099
1100 // Compute elements 1:j-1 of j-th column
1101 for (IndexType r = 0; r < j; r++) {
1102 A(r, j) = A(r, r) * A(r, j);
1103 for (IndexType c = r + 1; c < j; c++) {
1104 A(r, j) = A(r, j) + A(r, c) * A(c, j);
1105 }
1106 }
1107 for (IndexType r = 0; r < j; r++) {
1108 A(r, j) = -A(j, j) * A(r, j);
1109 }
1110 }
1111
1112 // Compute Ainv by solving A\L = Uinv
1113 for (IndexType j = numCols - 2; j >= 0; j--) {
1114 // Copy lower triangular data to work array and replace with 0
1115 for (IndexType r = j + 1; r < numRows; r++) {
1116 work(r) = A(r, j);
1117 A(r, j) = 0;
1118 }
1119
1120 for (IndexType r = 0; r < numRows; r++) {
1121 for (IndexType i = j + 1; i < numRows; i++) {
1122 A(r, j) = A(r, j) - work(i) * A(r, i);
1123 }
1124 }
1125 }
1126
1127 // Apply column interchanges
1128 for (IndexType j = numRows - 1; j >= 0; j--) {
1129 IndexType jp = ipiv(j) - 1;
1130 if (j != jp) {
1131 for (IndexType r = 0; r < numRows; r++) {
1132 Scalar temp = A(r, j);
1133 A(r, j) = A(r, jp);
1134 A(r, jp) = temp;
1135 }
1136 }
1137 }
1138}
1139
1140// mfh 08 Nov 2015: I haven't tested this overload yet. It also needs
1141// an implementation for trans != 'N' (the transpose and conjugate
1142// transpose cases).
1143#if 0
1144template<class LittleBlockType,
1145 class LittleVectorTypeX,
1146 class LittleVectorTypeY,
1147 class CoefficientType,
1148 class IndexType = int>
1149KOKKOS_INLINE_FUNCTION void
1150GEMV (const char trans,
1151 const CoefficientType& alpha,
1152 const LittleBlockType& A,
1153 const LittleVectorTypeX& x,
1154 const CoefficientType& beta,
1155 const LittleVectorTypeY& y)
1156{
1157 // y(0) returns a reference to the 0-th entry of y. Remove that
1158 // reference to get the type of each entry of y. It's OK if y has
1159 // zero entries -- this doesn't actually do y(i), it just returns
1160 // the type of that expression.
1161 typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1162 const IndexType numRows = static_cast<IndexType> (A.extent (0));
1163 const IndexType numCols = static_cast<IndexType> (A.extent (1));
1164
1165 if (beta == 0.0) {
1166 if (alpha == 0.0) {
1167 for (IndexType i = 0; i < numRows; ++i) {
1168 y(i) = 0.0;
1169 }
1170 }
1171 else {
1172 for (IndexType i = 0; i < numRows; ++i) {
1173 y_value_type y_i = 0.0;
1174 for (IndexType j = 0; j < numCols; ++j) {
1175 y_i += A(i,j) * x(j);
1176 }
1177 y(i) = y_i;
1178 }
1179 }
1180 }
1181 else { // beta != 0
1182 if (alpha == 0.0) {
1183 if (beta == 0.0) {
1184 for (IndexType i = 0; i < numRows; ++i) {
1185 y(i) = 0.0;
1186 }
1187 }
1188 else {
1189 for (IndexType i = 0; i < numRows; ++i) {
1190 y(i) *= beta;
1191 }
1192 }
1193 }
1194 else {
1195 for (IndexType i = 0; i < numRows; ++i) {
1196 y_value_type y_i = beta * y(i);
1197 for (IndexType j = 0; j < numCols; ++j) {
1198 y_i += alpha * A(i,j) * x(j);
1199 }
1200 y(i) = y_i;
1201 }
1202 }
1203 }
1204}
1205
1206#endif // 0
1207
1208} // namespace Tpetra
1209
1210#endif // TPETRA_BLOCKVIEW_HPP
Struct that holds views of the contents of a CrsMatrix.
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
@ ZERO
Replace old values with zero.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra::AXPY function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra::COPY function.
Implementation of Tpetra::FILL function.
Computes the solution to Ax=b.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Implementation of Tpetra::SCAL function.