Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_BlockTriDiContainer_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
11#define IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
12
13#include <Teuchos_Details_MpiTypeTraits.hpp>
14
15#include <Tpetra_Distributor.hpp>
16#include <Tpetra_BlockMultiVector.hpp>
17#include <Tpetra_BlockCrsMatrix_Helpers.hpp>
18
19#if KOKKOS_VERSION >= 40799
20#include <KokkosKernels_ArithTraits.hpp>
21#else
22#include <Kokkos_ArithTraits.hpp>
23#endif
24#include <KokkosBatched_Util.hpp>
25#include <KokkosBatched_Vector.hpp>
26#include <KokkosBatched_AddRadial_Decl.hpp>
27#include <KokkosBatched_AddRadial_Impl.hpp>
28#include <KokkosBatched_Gemm_Decl.hpp>
29#include <KokkosBatched_Gemm_Serial_Impl.hpp>
30#include <KokkosBatched_Gemv_Decl.hpp>
31#include <KokkosBatched_Trsm_Decl.hpp>
32#include <KokkosBatched_Trsm_Serial_Impl.hpp>
33#include <KokkosBatched_Trsv_Decl.hpp>
34#include <KokkosBatched_Trsv_Serial_Impl.hpp>
35#include <KokkosBatched_LU_Decl.hpp>
36#include <KokkosBatched_LU_Serial_Impl.hpp>
37
39#include "Ifpack2_BlockTriDiContainer_impl.hpp"
40
41#include <memory>
42
43namespace Ifpack2 {
44
48
49template <typename MatrixType>
50void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::initInternal(const Teuchos::RCP<const row_matrix_type>& matrix,
51 const Teuchos::RCP<const import_type>& importer,
52 const bool overlapCommAndComp,
53 const bool useSeqMethod,
54 const int block_size,
55 const bool explicitConversion) {
56 IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDiContainer::initInternal", initInternal, typename BlockHelperDetails::ImplType<MatrixType>::execution_space);
57
58 // create pointer of impl
59 {
60 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createImpl", createImpl);
61 impl_ = Teuchos::rcp(new BlockTriDiContainerDetails::ImplObject<MatrixType>());
62 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
63 }
64
65 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
66 // using block_crs_matrix_type = typename impl_type::tpetra_block_crs_matrix_type;
67
68 {
69 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA", setA);
70 if (explicitConversion) {
71 impl_->A = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(matrix);
72 if (impl_->A.is_null()) {
73 TEUCHOS_TEST_FOR_EXCEPT_MSG(block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
74 {
75 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA::convertToBlockCrsMatrix", convertToBlockCrsMatrix);
76 impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(matrix), block_size, true);
77 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
78 }
79 }
80 } else {
81 impl_->A = matrix;
82 }
83 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
84 }
85
86 impl_->tpetra_importer = Teuchos::null;
87 impl_->async_importer = Teuchos::null;
88
89 if (useSeqMethod) {
90 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter useSeqMethod", useSeqMethod);
91 if (importer.is_null()) // there is no given importer, then create one
92 impl_->tpetra_importer = BlockTriDiContainerDetails::createBlockCrsTpetraImporter<MatrixType>(impl_->A);
93 else
94 impl_->tpetra_importer = importer; // if there is a given importer, use it
95 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
96 } else {
97 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter", createBlockCrsTpetraImporter);
98 // Leave tpetra_importer null even if user provided an importer.
99 // It is not used in the performant codepath (!useSeqMethod)
100 impl_->async_importer = BlockTriDiContainerDetails::createBlockCrsAsyncImporter<MatrixType>(impl_->A);
101 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
102 }
103
104 // as a result, there are
105 // 1) tpetra_importer is null , async_importer is null (no need for importer)
106 // 2) tpetra_importer is NOT null , async_importer is null (sequential method is used)
107 // 3) tpetra_importer is null , async_importer is NOT null (async method is used)
108
109 // temporary disabling
110 impl_->overlap_communication_and_computation = overlapCommAndComp;
111
112 {
113 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createZ", createZ);
114 impl_->Z = typename impl_type::tpetra_multivector_type();
115 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
116 }
117 {
118 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createW", createW);
119 impl_->W = typename impl_type::impl_scalar_type_1d_view();
120 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
121 }
122
123 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
124}
125
126template <typename MatrixType>
127void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::clearInternal() {
128 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearInternal", clearInternal);
129 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
130 using part_interface_type = BlockHelperDetails::PartInterface<MatrixType>;
131 using block_tridiags_type = BlockTriDiContainerDetails::BlockTridiags<MatrixType>;
132 using amd_type = BlockHelperDetails::AmD<MatrixType>;
133 using norm_manager_type = BlockHelperDetails::NormManager<MatrixType>;
134
135 impl_->A = Teuchos::null;
136 impl_->tpetra_importer = Teuchos::null;
137 impl_->async_importer = Teuchos::null;
138
139 impl_->Z = typename impl_type::tpetra_multivector_type();
140 impl_->W = typename impl_type::impl_scalar_type_1d_view();
141
142 impl_->part_interface = part_interface_type();
143 impl_->block_tridiags = block_tridiags_type();
144 impl_->a_minus_d = amd_type();
145 impl_->work = typename impl_type::vector_type_1d_view();
146 impl_->norm_manager = norm_manager_type();
147
148 impl_ = Teuchos::null;
149 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
150}
151
152template <typename MatrixType>
154 const Teuchos::Array<Teuchos::Array<local_ordinal_type> >& partitions,
155 const Teuchos::RCP<const import_type>& importer,
156 bool pointIndexed)
157 : Container<MatrixType>(matrix, partitions, pointIndexed)
158 , partitions_(partitions) {
159 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer", BlockTriDiContainer);
160 const bool useSeqMethod = false;
161 const bool overlapCommAndComp = false;
162 initInternal(matrix, importer, overlapCommAndComp, useSeqMethod);
163 impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
164 n_subparts_per_part_ = -1;
165 block_size_ = -1;
166 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
167}
168
169template <typename MatrixType>
171 const Teuchos::Array<Teuchos::Array<local_ordinal_type> >& partitions,
172 const int n_subparts_per_part,
173 const bool overlapCommAndComp,
174 const bool useSeqMethod,
175 const int block_size,
176 const bool explicitConversion)
177 : Container<MatrixType>(matrix, partitions, false)
178 , partitions_(partitions) {
179 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer", BlockTriDiContainer);
180 initInternal(matrix, Teuchos::null, overlapCommAndComp, useSeqMethod, block_size, explicitConversion);
181 impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
182 n_subparts_per_part_ = n_subparts_per_part;
183 block_size_ = block_size;
184 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
185}
186
187template <typename MatrixType>
189 const Teuchos::RCP<const row_matrix_type>& matrix,
190 const Teuchos::Array<Teuchos::Array<local_ordinal_type> >& partitions,
191 bool useSeqMethod) {
193 auto matrixBCRS = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(matrix);
194 bool hasBlockCrs = !matrixBCRS.is_null();
195 bool onePartitionPerRow = hasBlockCrs && size_t(partitions.size()) == matrixBCRS->getLocalNumRows();
196 constexpr bool isGPU = BlockHelperDetails::is_device<typename Helpers::execution_space>::value;
197 // The fused block Jacobi solve is actually slower on V100
198#ifdef KOKKOS_ARCH_VOLTA
199 constexpr bool isVolta = true;
200#else
201 constexpr bool isVolta = false;
202#endif
203 // Jacobi case can be represented in two ways:
204 // - partitions is empty
205 // - partitions has length equal to local number of rows (meaning all line lengths must be 1)
206 return isGPU && !isVolta && !useSeqMethod && hasBlockCrs && (!partitions.size() || onePartitionPerRow);
207}
208
209template <typename MatrixType>
212
213template <typename MatrixType>
215 if (List.isType<int>("partitioner: subparts per part"))
216 n_subparts_per_part_ = List.get<int>("partitioner: subparts per part");
217 if (List.isType<int>("partitioner: block size"))
218 block_size_ = List.get<int>("partitioner: block size");
219}
220
221template <typename MatrixType>
223 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize", initialize);
224 this->IsInitialized_ = true;
225 {
226 auto bA = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(impl_->A);
227 if (bA.is_null()) {
228 TEUCHOS_TEST_FOR_EXCEPT_MSG(block_size_ == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
229 {
230 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize::getBlockCrsGraph", getBlockCrsGraph);
231 auto A = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(impl_->A);
232 impl_->blockGraph = Tpetra::getBlockCrsGraph(*A, block_size_, true);
233 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
234 }
235 } else {
236 impl_->blockGraph = Teuchos::rcpFromRef(bA->getCrsGraph());
237 }
238 }
239
240 {
241 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createPartInterfaceBlockTridiagsNormManager", createPartInterfaceBlockTridiagsNormManager);
242 impl_->part_interface = BlockTriDiContainerDetails::createPartInterface<MatrixType>(impl_->A, impl_->blockGraph, partitions_, n_subparts_per_part_);
243 impl_->block_tridiags = BlockTriDiContainerDetails::createBlockTridiags<MatrixType>(impl_->part_interface);
244 impl_->norm_manager = BlockHelperDetails::NormManager<MatrixType>(impl_->A->getComm());
245 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
246 }
247
248 // We assume that if you called this method, you intend to recompute
249 // everything.
250 this->IsComputed_ = false;
251 TEUCHOS_ASSERT(!impl_->A.is_null()); // when initInternal is called, A_ must be set
252 {
253 bool useSeqMethod = !impl_->tpetra_importer.is_null();
254 BlockTriDiContainerDetails::performSymbolicPhase<MatrixType>(impl_->A,
255 impl_->blockGraph,
256 impl_->part_interface,
257 impl_->block_tridiags,
258 impl_->a_minus_d,
259 impl_->overlap_communication_and_computation,
260 impl_->async_importer, useSeqMethod, impl_->use_fused_jacobi);
261 }
262 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
263}
264
265template <typename MatrixType>
267 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute", compute);
268 this->IsComputed_ = false;
269 if (!this->isInitialized())
270 this->initialize();
271 {
272 BlockTriDiContainerDetails::performNumericPhase<MatrixType>(impl_->A,
273 impl_->blockGraph,
274 impl_->part_interface, impl_->block_tridiags,
275#if KOKKOS_VERSION >= 40799
276 KokkosKernels::ArithTraits<magnitude_type>::zero(),
277#else
278 Kokkos::ArithTraits<magnitude_type>::zero(),
279#endif
280 impl_->use_fused_jacobi);
281 }
282 this->IsComputed_ = true;
283 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
284}
285
286template <typename MatrixType>
288 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearBlocks", clearBlocks);
289 clearInternal();
290 this->IsInitialized_ = false;
291 this->IsComputed_ = false;
293 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
294}
295
296template <typename MatrixType>
298 bool zeroStartingSolution, int numSweeps) const {
299 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
300#if KOKKOS_VERSION >= 40799
301 const magnitude_type tol = KokkosKernels::ArithTraits<magnitude_type>::zero();
302#else
303 const magnitude_type tol = Kokkos::ArithTraits<magnitude_type>::zero();
304#endif
305 const int check_tol_every = 1;
306
307 if (!impl_->use_fused_jacobi) {
308 BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>(impl_->A,
309 impl_->blockGraph,
310 impl_->tpetra_importer,
311 impl_->async_importer,
312 impl_->overlap_communication_and_computation,
313 X, Y, impl_->Z, impl_->W,
314 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
315 impl_->work,
316 impl_->norm_manager,
317 dampingFactor,
318 zeroStartingSolution,
319 numSweeps,
320 tol,
321 check_tol_every);
322 } else {
323 BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>(impl_->tpetra_importer,
324 impl_->async_importer,
325 impl_->overlap_communication_and_computation,
326 X, Y, impl_->W,
327 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
328 impl_->work_flat,
329 impl_->norm_manager,
330 dampingFactor,
331 zeroStartingSolution,
332 numSweeps,
333 tol,
334 check_tol_every);
335 }
336 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
337}
338
339template <typename MatrixType>
344
345template <typename MatrixType>
347 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute", compute);
348 this->IsComputed_ = false;
349 if (!this->isInitialized())
350 this->initialize();
351 {
352 BlockTriDiContainerDetails::performNumericPhase<MatrixType>(impl_->A,
353 impl_->blockGraph,
354 impl_->part_interface, impl_->block_tridiags,
355 in.addRadiallyToDiagonal,
356 impl_->use_fused_jacobi);
357 }
358 this->IsComputed_ = true;
359 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
360}
361
362template <typename MatrixType>
365 ApplyParameters in;
366 in.dampingFactor = Teuchos::ScalarTraits<scalar_type>::one();
367 return in;
368}
369
370template <typename MatrixType>
372 const ApplyParameters& in) const {
373 IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
374 int r_val = 0;
375 {
376 if (!impl_->use_fused_jacobi) {
377 r_val = BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>(impl_->A,
378 impl_->blockGraph,
379 impl_->tpetra_importer,
380 impl_->async_importer,
381 impl_->overlap_communication_and_computation,
382 X, Y, impl_->Z, impl_->W,
383 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
384 impl_->work,
385 impl_->norm_manager,
386 in.dampingFactor,
387 in.zeroStartingSolution,
388 in.maxNumSweeps,
389 in.tolerance,
390 in.checkToleranceEvery);
391 } else {
392 r_val = BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>(impl_->tpetra_importer,
393 impl_->async_importer,
394 impl_->overlap_communication_and_computation,
395 X, Y, impl_->W,
396 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
397 impl_->work_flat,
398 impl_->norm_manager,
399 in.dampingFactor,
400 in.zeroStartingSolution,
401 in.maxNumSweeps,
402 in.tolerance,
403 in.checkToleranceEvery);
404 }
405 }
406 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
407 return r_val;
408}
409
410template <typename MatrixType>
413 return impl_->norm_manager.getNorms0();
414}
415
416template <typename MatrixType>
419 return impl_->norm_manager.getNormsFinal();
420}
421
422template <typename MatrixType>
423void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::apply(ConstHostView /* X */, HostView /* Y */, int /* blockIndex */, Teuchos::ETransp /* mode */,
424 scalar_type /* alpha */, scalar_type /* beta */) const {
425 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "BlockTriDiContainer::apply is not implemented. You may have reached this message "
426 << "because you want to use this container's performance-portable Jacobi iteration. In "
427 << "that case, set \"relaxation: type\" to \"MT Split Jacobi\" rather than \"Jacobi\".");
428}
429
430template <typename MatrixType>
431void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::weightedApply(ConstHostView /* X */, HostView /* Y */, ConstHostView /* D */, int /* blockIndex */,
432 Teuchos::ETransp /* mode */, scalar_type /* alpha */, scalar_type /* beta */) const {
433 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "BlockTriDiContainer::weightedApply is not implemented.");
434}
435
436template <typename MatrixType>
437std::ostream&
439 Teuchos::FancyOStream fos(Teuchos::rcp(&os, false));
440 fos.setOutputToRootOnly(0);
441 describe(fos);
442 return os;
443}
444
445template <typename MatrixType>
446std::string
448 std::ostringstream oss;
449 oss << Teuchos::Describable::description();
450 if (this->isInitialized()) {
451 if (this->isComputed()) {
452 oss << "{status = initialized, computed";
453 } else {
454 oss << "{status = initialized, not computed";
455 }
456 } else {
457 oss << "{status = not initialized, not computed";
458 }
459
460 oss << "}";
461 return oss.str();
462}
463
464template <typename MatrixType>
466 describe(Teuchos::FancyOStream& os,
467 const Teuchos::EVerbosityLevel verbLevel) const {
468 using std::endl;
469 if (verbLevel == Teuchos::VERB_NONE) return;
470 os << "================================================================================" << endl
471 << "Ifpack2::BlockTriDiContainer" << endl
472 << "Number of blocks = " << this->numBlocks_ << endl
473 << "isInitialized() = " << this->IsInitialized_ << endl
474 << "isComputed() = " << this->IsComputed_ << endl
475 << "================================================================================" << endl
476 << endl;
477}
478
479template <typename MatrixType>
480std::string
481BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::getName() { return "Ifpack2::BlockTriDiContainer::ImplSimdTag"; }
482
483#define IFPACK2_BLOCKTRIDICONTAINER_INSTANT(S, LO, GO, N) \
484 template class Ifpack2::BlockTriDiContainer<Tpetra::RowMatrix<S, LO, GO, N> >;
485} // namespace Ifpack2
486#endif
Ifpack2::BlockTriDiContainer class declaration.
Store and solve local block tridiagonal linear problems.
Definition Ifpack2_BlockTriDiContainer_decl.hpp:107
Interface for creating and solving a set of local linear problems.
Definition Ifpack2_Container_decl.hpp:79
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40
Definition Ifpack2_BlockHelper.hpp:270
Definition Ifpack2_BlockHelper.hpp:389