Compadre 1.6.4
Loading...
Searching...
No Matches
Compadre_LinearAlgebra.cpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Compadre: COMpatible PArticle Discretization and REmap Toolkit
4//
5// Copyright 2018 NTESS and the Compadre contributors.
6// SPDX-License-Identifier: BSD-2-Clause
7// *****************************************************************************
8// @HEADER
10
11#include "KokkosBatched_UTV_Decl.hpp"
12#include "KokkosBatched_SolveUTV_Decl_Compadre.hpp"
13
14using namespace KokkosBatched;
15
16namespace Compadre{
17namespace GMLS_LinearAlgebra {
18
19 template<typename DeviceType,
20 typename AlgoTagType,
21 typename MatrixViewType_A,
22 typename MatrixViewType_B,
23 typename MatrixViewType_X>
25 MatrixViewType_A _a;
26 MatrixViewType_B _b;
27
30 int _M, _N, _NRHS;
32
33 KOKKOS_INLINE_FUNCTION
35 const int M,
36 const int N,
37 const int NRHS,
38 const MatrixViewType_A &a,
39 const MatrixViewType_B &b,
40 const bool implicit_RHS)
41 : _a(a), _b(b), _M(M), _N(N), _NRHS(NRHS), _implicit_RHS(implicit_RHS)
43
44 template<typename MemberType>
45 KOKKOS_INLINE_FUNCTION
46 void operator()(const MemberType &member) const {
47
48 const int k = member.league_rank();
49
50 // workspace vectors
51 scratch_vector_type ww_fast(member.team_scratch(_pm_getTeamScratchLevel_0), 3*_M);
52 scratch_vector_type ww_slow(member.team_scratch(_pm_getTeamScratchLevel_1), _N*_NRHS);
53
54 scratch_matrix_right_type aa(_a.data() + TO_GLOBAL(k)*TO_GLOBAL(_a.extent(1))*TO_GLOBAL(_a.extent(2)),
55 _a.extent(1), _a.extent(2));
56 scratch_matrix_right_type bb(_b.data() + TO_GLOBAL(k)*TO_GLOBAL(_b.extent(1))*TO_GLOBAL(_b.extent(2)),
57 _b.extent(1), _b.extent(2));
58 scratch_matrix_right_type xx(_b.data() + TO_GLOBAL(k)*TO_GLOBAL(_b.extent(1))*TO_GLOBAL(_b.extent(2)),
59 _b.extent(1), _b.extent(2));
60
61 // if sizes don't match extents, then copy to a view with extents matching sizes
62 if ((size_t)_M!=_a.extent(1) || (size_t)_N!=_a.extent(2)) {
63 scratch_matrix_right_type tmp(ww_slow.data(), _M, _N);
64 auto aaa = scratch_matrix_right_type(_a.data() + TO_GLOBAL(k)*TO_GLOBAL(_a.extent(1))*TO_GLOBAL(_a.extent(2)), _M, _N);
65 // copy A to W, then back to A
66 Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_M),[&](const int &i) {
67 Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_N),[&](const int &j) {
68 tmp(i,j) = aa(i,j);
69 });
70 });
71 member.team_barrier();
72 Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_M),[&](const int &i) {
73 Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_N),[&](const int &j) {
74 aaa(i,j) = tmp(i,j);
75 });
76 });
77 member.team_barrier();
78 aa = aaa;
79 }
80
81 if (std::is_same<typename MatrixViewType_B::array_layout, layout_left>::value) {
82 scratch_matrix_right_type tmp(ww_slow.data(), _N, _NRHS);
83 // coming from LU
84 // then copy B to W, then back to B
85 auto bb_left =
86 scratch_matrix_left_type(_b.data() + TO_GLOBAL(k)*TO_GLOBAL(_b.extent(1))*TO_GLOBAL(_b.extent(2)),
87 _b.extent(1), _b.extent(2));
88 Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_N),[&](const int &i) {
89 Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_NRHS),[&](const int &j) {
90 tmp(i,j) = bb_left(i,j);
91 });
92 });
93 member.team_barrier();
94 Kokkos::parallel_for(Kokkos::TeamThreadRange(member,0,_N),[&](const int &i) {
95 Kokkos::parallel_for(Kokkos::ThreadVectorRange(member,0,_NRHS),[&](const int &j) {
96 bb(i,j) = tmp(i,j);
97 });
98 });
99 }
100
101 scratch_matrix_right_type uu(member.team_scratch(_pm_getTeamScratchLevel_1), _M, _N /* only N columns of U are filled, maximum */);
102 scratch_matrix_right_type vv(member.team_scratch(_pm_getTeamScratchLevel_1), _N, _N);
103 scratch_local_index_type pp(member.team_scratch(_pm_getTeamScratchLevel_0), _N);
104
105 bool do_print = false;
106 if (do_print) {
107 Kokkos::single(Kokkos::PerTeam(member), [&] () {
108 using Kokkos::printf;
109 //print a
110 printf("a=zeros(%lu,%lu);\n", aa.extent(0), aa.extent(1));
111 for (size_t i=0; i<aa.extent(0); ++i) {
112 for (size_t j=0; j<aa.extent(1); ++j) {
113 printf("a(%lu,%lu)= %f;\n", i+1,j+1, aa(i,j));
114 }
115 }
116 //print b
117 printf("b=zeros(%lu,%lu);\n", bb.extent(0), bb.extent(1));
118 for (size_t i=0; i<bb.extent(0); ++i) {
119 for (size_t j=0; j<bb.extent(1); ++j) {
120 printf("b(%lu,%lu)= %f;\n", i+1,j+1, bb(i,j));
121 }
122 }
123 });
124 }
125 do_print = false;
126
127 /// Solving Ax = b using UTV transformation
128 /// A P^T P x = b
129 /// UTV P x = b;
130
131 /// UTV = A P^T
132 int matrix_rank(0);
133 member.team_barrier();
134 TeamVectorUTV<MemberType,AlgoTagType>
135 ::invoke(member, aa, pp, uu, vv, ww_fast, matrix_rank);
136 member.team_barrier();
137
138 if (do_print) {
139 Kokkos::single(Kokkos::PerTeam(member), [&] () {
140 using Kokkos::printf;
141 printf("matrix_rank: %d\n", matrix_rank);
142 //print u
143 printf("u=zeros(%lu,%lu);\n", uu.extent(0), uu.extent(1));
144 for (size_t i=0; i<uu.extent(0); ++i) {
145 for (size_t j=0; j<uu.extent(1); ++j) {
146 printf("u(%lu,%lu)= %f;\n", i+1,j+1, uu(i,j));
147 }
148 }
149 });
150 }
151 TeamVectorSolveUTVCompadre<MemberType,AlgoTagType>
152 ::invoke(member, matrix_rank, _M, _N, _NRHS, uu, aa, vv, pp, bb, xx, ww_slow, ww_fast, _implicit_RHS);
153 member.team_barrier();
154
155 }
156
157 inline
159 typedef typename MatrixViewType_A::non_const_value_type value_type;
160 std::string name_region("KokkosBatched::Test::TeamVectorSolveUTVCompadre");
161 std::string name_value_type = ( std::is_same<value_type,float>::value ? "::Float" :
162 std::is_same<value_type,double>::value ? "::Double" :
163 std::is_same<value_type,Kokkos::complex<float> >::value ? "::ComplexFloat" :
164 std::is_same<value_type,Kokkos::complex<double> >::value ? "::ComplexDouble" : "::UnknownValueType" );
165 std::string name = name_region + name_value_type;
166 Kokkos::Profiling::pushRegion( name.c_str() );
167
170
171 int scratch_size = scratch_matrix_right_type::shmem_size(_N, _N); // V
172 scratch_size += scratch_matrix_right_type::shmem_size(_M, _N /* only N columns of U are filled, maximum */); // U
173 scratch_size += scratch_vector_type::shmem_size(_N*_NRHS); // W (for SolveUTV)
174
175 int l0_scratch_size = scratch_vector_type::shmem_size(_N); // P (temporary)
176 l0_scratch_size += scratch_vector_type::shmem_size(3*_M); // W (for UTV)
177
179 pm.setTeamScratchSize(0, l0_scratch_size);
180 pm.setTeamScratchSize(1, scratch_size);
181
182 pm.CallFunctorWithTeamThreadsAndVectors(*this, _a.extent(0));
183 Kokkos::fence();
184
185 Kokkos::Profiling::popRegion();
186 }
187 };
188
189
190
191template <typename A_layout, typename B_layout, typename X_layout>
192void batchQRPivotingSolve(ParallelManager pm, double *A, int lda, int nda, double *B, int ldb, int ndb, int M, int N, int NRHS, const int num_matrices, const bool implicit_RHS) {
193
194 typedef Algo::UTV::Unblocked algo_tag_type;
195 typedef Kokkos::View<double***, A_layout, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
196 MatrixViewType_A;
197 typedef Kokkos::View<double***, B_layout, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
198 MatrixViewType_B;
199 typedef Kokkos::View<double***, X_layout, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
200 MatrixViewType_X;
201
202 MatrixViewType_A mat_A(A, num_matrices, lda, nda);
203 MatrixViewType_B mat_B(B, num_matrices, ldb, ndb);
204
206 <device_execution_space, algo_tag_type, MatrixViewType_A, MatrixViewType_B, MatrixViewType_X>(M,N,NRHS,mat_A,mat_B,implicit_RHS).run(pm);
207
208}
209
210template void batchQRPivotingSolve<layout_right, layout_right, layout_right>(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
211template void batchQRPivotingSolve<layout_right, layout_right, layout_left >(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
212template void batchQRPivotingSolve<layout_right, layout_left , layout_right>(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
213template void batchQRPivotingSolve<layout_right, layout_left , layout_left >(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
214template void batchQRPivotingSolve<layout_left , layout_right, layout_right>(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
215template void batchQRPivotingSolve<layout_left , layout_right, layout_left >(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
216template void batchQRPivotingSolve<layout_left , layout_left , layout_right>(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
217template void batchQRPivotingSolve<layout_left , layout_left , layout_left >(ParallelManager,double*,int,int,double*,int,int,int,int,int,const int,const bool);
218
219} // GMLS_LinearAlgebra
220} // Compadre
#define TO_GLOBAL(variable)
void setTeamScratchSize(const int level, const int value)
void CallFunctorWithTeamThreadsAndVectors(C functor, const global_index_type batch_size, const int threads_per_team=-1, const int vector_lanes_per_thread=-1) const
Calls a parallel_for parallel_for will break out over loops over teams with each vector lane executin...
KOKKOS_INLINE_FUNCTION int getTeamScratchLevel(const int level) const
template void batchQRPivotingSolve< layout_left, layout_right, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_left, layout_left, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
void batchQRPivotingSolve(ParallelManager pm, double *A, int lda, int nda, double *B, int ldb, int ndb, int M, int N, int NRHS, const int num_matrices, const bool implicit_RHS)
Solves a batch of problems with QR+Pivoting.
template void batchQRPivotingSolve< layout_left, layout_right, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_left, layout_left, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_right, layout_left, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_right, layout_right, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_right, layout_left, layout_left >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
template void batchQRPivotingSolve< layout_right, layout_right, layout_right >(ParallelManager, double *, int, int, double *, int, int, int, int, int, const int, const bool)
Kokkos::View< double *, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_vector_type
Kokkos::View< int *, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_local_index_type
Kokkos::View< double **, layout_left, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_left_type
Kokkos::DefaultExecutionSpace device_execution_space
Kokkos::View< double **, layout_right, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_right_type
KOKKOS_INLINE_FUNCTION void operator()(const MemberType &member) const
KOKKOS_INLINE_FUNCTION Functor_TestBatchedTeamVectorSolveUTV(const int M, const int N, const int NRHS, const MatrixViewType_A &a, const MatrixViewType_B &b, const bool implicit_RHS)