Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_BlockTriDiContainer_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
11#define IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
12
13#include <Teuchos_Details_MpiTypeTraits.hpp>
14
15#include <Tpetra_Distributor.hpp>
16#include <Tpetra_BlockMultiVector.hpp>
17#include <Tpetra_BlockCrsMatrix_Helpers.hpp>
18
19#include <KokkosKernels_ArithTraits.hpp>
20#include <KokkosBatched_Util.hpp>
21#include <KokkosBatched_Vector.hpp>
22#include <KokkosBatched_AddRadial_Decl.hpp>
23#include <KokkosBatched_AddRadial_Impl.hpp>
24#include <KokkosBatched_Gemm_Decl.hpp>
25#include <KokkosBatched_Gemm_Serial_Impl.hpp>
26#include <KokkosBatched_Gemv_Decl.hpp>
27#include <KokkosBatched_Trsm_Decl.hpp>
28#include <KokkosBatched_Trsm_Serial_Impl.hpp>
29#include <KokkosBatched_Trsv_Decl.hpp>
30#include <KokkosBatched_Trsv_Serial_Impl.hpp>
31#include <KokkosBatched_LU_Decl.hpp>
32#include <KokkosBatched_LU_Serial_Impl.hpp>
33
35#include "Ifpack2_BlockTriDiContainer_impl.hpp"
36
37#include <memory>
38
39namespace Ifpack2 {
40
44
45template <typename MatrixType>
46void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::initInternal(const Teuchos::RCP<const row_matrix_type>& matrix,
47 const Teuchos::RCP<const import_type>& importer,
48 const bool overlapCommAndComp,
49 const bool useSeqMethod,
50 const int block_size,
51 const bool explicitConversion) {
52 IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDiContainer::initInternal", initInternal, typename BlockHelperDetails::ImplType<MatrixType>::execution_space);
53
54 // create pointer of impl
55 {
56 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createImpl", createImpl);
57 impl_ = Teuchos::rcp(new BlockTriDiContainerDetails::ImplObject<MatrixType>());
58 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
59 }
60
61 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
62 // using block_crs_matrix_type = typename impl_type::tpetra_block_crs_matrix_type;
63
64 {
65 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA", setA);
66 if (explicitConversion) {
67 impl_->A = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(matrix);
68 if (impl_->A.is_null()) {
69 TEUCHOS_TEST_FOR_EXCEPT_MSG(block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
70 {
71 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA::convertToBlockCrsMatrix", convertToBlockCrsMatrix);
72 impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(matrix), block_size, true);
73 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
74 }
75 }
76 } else {
77 impl_->A = matrix;
78 }
79 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
80 }
81
82 impl_->tpetra_importer = Teuchos::null;
83 impl_->async_importer = Teuchos::null;
84
85 if (useSeqMethod) {
86 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter useSeqMethod", useSeqMethod);
87 if (importer.is_null()) // there is no given importer, then create one
88 impl_->tpetra_importer = BlockTriDiContainerDetails::createBlockCrsTpetraImporter<MatrixType>(impl_->A);
89 else
90 impl_->tpetra_importer = importer; // if there is a given importer, use it
91 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
92 } else {
93 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter", createBlockCrsTpetraImporter);
94 // Leave tpetra_importer null even if user provided an importer.
95 // It is not used in the performant codepath (!useSeqMethod)
96 impl_->async_importer = BlockTriDiContainerDetails::createBlockCrsAsyncImporter<MatrixType>(impl_->A);
97 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
98 }
99
100 // as a result, there are
101 // 1) tpetra_importer is null , async_importer is null (no need for importer)
102 // 2) tpetra_importer is NOT null , async_importer is null (sequential method is used)
103 // 3) tpetra_importer is null , async_importer is NOT null (async method is used)
104
105 // temporary disabling
106 impl_->overlap_communication_and_computation = overlapCommAndComp;
107
108 {
109 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createZ", createZ);
110 impl_->Z = typename impl_type::tpetra_multivector_type();
111 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
112 }
113 {
114 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createW", createW);
115 impl_->W = typename impl_type::impl_scalar_type_1d_view();
116 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
117 }
118
119 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
120}
121
122template <typename MatrixType>
123void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::clearInternal() {
124 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearInternal", clearInternal);
125 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
126 using part_interface_type = BlockHelperDetails::PartInterface<MatrixType>;
127 using block_tridiags_type = BlockTriDiContainerDetails::BlockTridiags<MatrixType>;
128 using amd_type = BlockHelperDetails::AmD<MatrixType>;
129 using norm_manager_type = BlockHelperDetails::NormManager<MatrixType>;
130
131 impl_->A = Teuchos::null;
132 impl_->tpetra_importer = Teuchos::null;
133 impl_->async_importer = Teuchos::null;
134
135 impl_->Z = typename impl_type::tpetra_multivector_type();
136 impl_->W = typename impl_type::impl_scalar_type_1d_view();
137
138 impl_->part_interface = part_interface_type();
139 impl_->block_tridiags = block_tridiags_type();
140 impl_->a_minus_d = amd_type();
141 impl_->work = typename impl_type::vector_type_1d_view();
142 impl_->norm_manager = norm_manager_type();
143
144 impl_ = Teuchos::null;
145 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
146}
147
148template <typename MatrixType>
150 const Teuchos::Array<Teuchos::Array<local_ordinal_type> >& partitions,
151 const Teuchos::RCP<const import_type>& importer,
152 bool pointIndexed)
153 : Container<MatrixType>(matrix, partitions, pointIndexed)
154 , partitions_(partitions) {
155 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer", BlockTriDiContainer);
156 const bool useSeqMethod = false;
157 const bool overlapCommAndComp = false;
158 initInternal(matrix, importer, overlapCommAndComp, useSeqMethod);
159 impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
160 n_subparts_per_part_ = -1;
161 block_size_ = -1;
162 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
163}
164
165template <typename MatrixType>
167 const Teuchos::Array<Teuchos::Array<local_ordinal_type> >& partitions,
168 const int n_subparts_per_part,
169 const bool overlapCommAndComp,
170 const bool useSeqMethod,
171 const int block_size,
172 const bool explicitConversion)
173 : Container<MatrixType>(matrix, partitions, false)
174 , partitions_(partitions) {
175 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer", BlockTriDiContainer);
176 initInternal(matrix, Teuchos::null, overlapCommAndComp, useSeqMethod, block_size, explicitConversion);
177 impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
178 n_subparts_per_part_ = n_subparts_per_part;
179 block_size_ = block_size;
180 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
181}
182
183template <typename MatrixType>
185 const Teuchos::RCP<const row_matrix_type>& matrix,
186 const Teuchos::Array<Teuchos::Array<local_ordinal_type> >& partitions,
187 bool useSeqMethod) {
189 auto matrixBCRS = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(matrix);
190 bool hasBlockCrs = !matrixBCRS.is_null();
191 bool onePartitionPerRow = hasBlockCrs && size_t(partitions.size()) == matrixBCRS->getLocalNumRows();
192 constexpr bool isGPU = BlockHelperDetails::is_device<typename Helpers::execution_space>::value;
193 // The fused block Jacobi solve is actually slower on V100
194#ifdef KOKKOS_ARCH_VOLTA
195 constexpr bool isVolta = true;
196#else
197 constexpr bool isVolta = false;
198#endif
199 // Jacobi case can be represented in two ways:
200 // - partitions is empty
201 // - partitions has length equal to local number of rows (meaning all line lengths must be 1)
202 return isGPU && !isVolta && !useSeqMethod && hasBlockCrs && (!partitions.size() || onePartitionPerRow);
203}
204
205template <typename MatrixType>
208
209template <typename MatrixType>
211 if (List.isType<int>("partitioner: subparts per part"))
212 n_subparts_per_part_ = List.get<int>("partitioner: subparts per part");
213 if (List.isType<int>("partitioner: block size"))
214 block_size_ = List.get<int>("partitioner: block size");
215}
216
217template <typename MatrixType>
219 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize", initialize);
220 this->IsInitialized_ = true;
221 {
222 auto bA = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(impl_->A);
223 if (bA.is_null()) {
224 TEUCHOS_TEST_FOR_EXCEPT_MSG(block_size_ == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
225 {
226 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize::getBlockCrsGraph", getBlockCrsGraph);
227 auto A = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(impl_->A);
228 impl_->blockGraph = Tpetra::getBlockCrsGraph(*A, block_size_, true);
229 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
230 }
231 } else {
232 impl_->blockGraph = Teuchos::rcpFromRef(bA->getCrsGraph());
233 }
234 }
235
236 {
237 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createPartInterfaceBlockTridiagsNormManager", createPartInterfaceBlockTridiagsNormManager);
238 impl_->part_interface = BlockTriDiContainerDetails::createPartInterface<MatrixType>(impl_->A, impl_->blockGraph, partitions_, n_subparts_per_part_);
239 impl_->block_tridiags = BlockTriDiContainerDetails::createBlockTridiags<MatrixType>(impl_->part_interface);
240 impl_->norm_manager = BlockHelperDetails::NormManager<MatrixType>(impl_->A->getComm());
241 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
242 }
243
244 // We assume that if you called this method, you intend to recompute
245 // everything.
246 this->IsComputed_ = false;
247 TEUCHOS_ASSERT(!impl_->A.is_null()); // when initInternal is called, A_ must be set
248 {
249 bool useSeqMethod = !impl_->tpetra_importer.is_null();
250 BlockTriDiContainerDetails::performSymbolicPhase<MatrixType>(impl_->A,
251 impl_->blockGraph,
252 impl_->part_interface,
253 impl_->block_tridiags,
254 impl_->a_minus_d,
255 impl_->overlap_communication_and_computation,
256 impl_->async_importer, useSeqMethod, impl_->use_fused_jacobi);
257 }
258 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
259}
260
261template <typename MatrixType>
263 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute", compute);
264 this->IsComputed_ = false;
265 if (!this->isInitialized())
266 this->initialize();
267 {
268 BlockTriDiContainerDetails::performNumericPhase<MatrixType>(impl_->A,
269 impl_->blockGraph,
270 impl_->part_interface, impl_->block_tridiags,
271 KokkosKernels::ArithTraits<magnitude_type>::zero(),
272 impl_->use_fused_jacobi);
273 }
274 this->IsComputed_ = true;
275 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
276}
277
278template <typename MatrixType>
280 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearBlocks", clearBlocks);
281 clearInternal();
282 this->IsInitialized_ = false;
283 this->IsComputed_ = false;
285 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
286}
287
288template <typename MatrixType>
290 bool zeroStartingSolution, int numSweeps) const {
291 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
292 const magnitude_type tol = KokkosKernels::ArithTraits<magnitude_type>::zero();
293 const int check_tol_every = 1;
294
295 if (!impl_->use_fused_jacobi) {
296 BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>(impl_->A,
297 impl_->blockGraph,
298 impl_->tpetra_importer,
299 impl_->async_importer,
300 impl_->overlap_communication_and_computation,
301 X, Y, impl_->Z, impl_->W,
302 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
303 impl_->work,
304 impl_->norm_manager,
305 dampingFactor,
306 zeroStartingSolution,
307 numSweeps,
308 tol,
309 check_tol_every);
310 } else {
311 BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>(impl_->tpetra_importer,
312 impl_->async_importer,
313 impl_->overlap_communication_and_computation,
314 X, Y, impl_->W,
315 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
316 impl_->work_flat,
317 impl_->norm_manager,
318 dampingFactor,
319 zeroStartingSolution,
320 numSweeps,
321 tol,
322 check_tol_every);
323 }
324 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
325}
326
327template <typename MatrixType>
332
333template <typename MatrixType>
335 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute", compute);
336 this->IsComputed_ = false;
337 if (!this->isInitialized())
338 this->initialize();
339 {
340 BlockTriDiContainerDetails::performNumericPhase<MatrixType>(impl_->A,
341 impl_->blockGraph,
342 impl_->part_interface, impl_->block_tridiags,
343 in.addRadiallyToDiagonal,
344 impl_->use_fused_jacobi);
345 }
346 this->IsComputed_ = true;
347 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
348}
349
350template <typename MatrixType>
353 ApplyParameters in;
354 in.dampingFactor = Teuchos::ScalarTraits<scalar_type>::one();
355 return in;
356}
357
358template <typename MatrixType>
360 const ApplyParameters& in) const {
361 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
362 int r_val = 0;
363 {
364 if (!impl_->use_fused_jacobi) {
365 r_val = BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>(impl_->A,
366 impl_->blockGraph,
367 impl_->tpetra_importer,
368 impl_->async_importer,
369 impl_->overlap_communication_and_computation,
370 X, Y, impl_->Z, impl_->W,
371 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
372 impl_->work,
373 impl_->norm_manager,
374 in.dampingFactor,
375 in.zeroStartingSolution,
376 in.maxNumSweeps,
377 in.tolerance,
378 in.checkToleranceEvery);
379 } else {
380 r_val = BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>(impl_->tpetra_importer,
381 impl_->async_importer,
382 impl_->overlap_communication_and_computation,
383 X, Y, impl_->W,
384 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
385 impl_->work_flat,
386 impl_->norm_manager,
387 in.dampingFactor,
388 in.zeroStartingSolution,
389 in.maxNumSweeps,
390 in.tolerance,
391 in.checkToleranceEvery);
392 }
393 }
394 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
395 return r_val;
396}
397
398template <typename MatrixType>
401 return impl_->norm_manager.getNorms0();
402}
403
404template <typename MatrixType>
407 return impl_->norm_manager.getNormsFinal();
408}
409
410template <typename MatrixType>
411void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::apply(ConstHostView /* X */, HostView /* Y */, int /* blockIndex */, Teuchos::ETransp /* mode */,
412 scalar_type /* alpha */, scalar_type /* beta */) const {
413 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "BlockTriDiContainer::apply is not implemented. You may have reached this message "
414 << "because you want to use this container's performance-portable Jacobi iteration. In "
415 << "that case, set \"relaxation: type\" to \"MT Split Jacobi\" rather than \"Jacobi\".");
416}
417
418template <typename MatrixType>
419void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::weightedApply(ConstHostView /* X */, HostView /* Y */, ConstHostView /* D */, int /* blockIndex */,
420 Teuchos::ETransp /* mode */, scalar_type /* alpha */, scalar_type /* beta */) const {
421 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "BlockTriDiContainer::weightedApply is not implemented.");
422}
423
424template <typename MatrixType>
425std::ostream&
427 Teuchos::FancyOStream fos(Teuchos::rcp(&os, false));
428 fos.setOutputToRootOnly(0);
429 describe(fos);
430 return os;
431}
432
433template <typename MatrixType>
434std::string
436 std::ostringstream oss;
437 oss << Teuchos::Describable::description();
438 if (this->isInitialized()) {
439 if (this->isComputed()) {
440 oss << "{status = initialized, computed";
441 } else {
442 oss << "{status = initialized, not computed";
443 }
444 } else {
445 oss << "{status = not initialized, not computed";
446 }
447
448 oss << "}";
449 return oss.str();
450}
451
452template <typename MatrixType>
454 describe(Teuchos::FancyOStream& os,
455 const Teuchos::EVerbosityLevel verbLevel) const {
456 using std::endl;
457 if (verbLevel == Teuchos::VERB_NONE) return;
458 os << "================================================================================" << endl
459 << "Ifpack2::BlockTriDiContainer" << endl
460 << "Number of blocks = " << this->numBlocks_ << endl
461 << "isInitialized() = " << this->IsInitialized_ << endl
462 << "isComputed() = " << this->IsComputed_ << endl
463 << "================================================================================" << endl
464 << endl;
465}
466
467template <typename MatrixType>
468std::string
469BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::getName() { return "Ifpack2::BlockTriDiContainer::ImplSimdTag"; }
470
471#define IFPACK2_BLOCKTRIDICONTAINER_INSTANT(S, LO, GO, N) \
472 template class Ifpack2::BlockTriDiContainer<Tpetra::RowMatrix<S, LO, GO, N> >;
473} // namespace Ifpack2
474#endif
Ifpack2::BlockTriDiContainer class declaration.
Store and solve local block tridiagonal linear problems.
Definition Ifpack2_BlockTriDiContainer_decl.hpp:107
Interface for creating and solving a set of local linear problems.
Definition Ifpack2_Container_decl.hpp:79
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40
Definition Ifpack2_BlockHelper.hpp:270
Definition Ifpack2_BlockHelper.hpp:377