Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_BlockHelper.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_BLOCKHELPER_IMPL_HPP
11#define IFPACK2_BLOCKHELPER_IMPL_HPP
12
13#include "Ifpack2_BlockHelper_Timers.hpp"
14
15namespace Ifpack2 {
16
17namespace BlockHelperDetails {
18
19namespace KB = KokkosBatched;
20
24using do_not_initialize_tag = Kokkos::ViewAllocateWithoutInitializing;
25
26template <typename MemoryTraitsType, Kokkos::MemoryTraitsFlags flag>
27using MemoryTraits = Kokkos::MemoryTraits<MemoryTraitsType::is_unmanaged |
28 MemoryTraitsType::is_random_access |
29 flag>;
30
31template <typename ViewType>
32using Unmanaged = Kokkos::View<typename ViewType::data_type,
33 typename ViewType::array_layout,
34 typename ViewType::device_type,
35 MemoryTraits<typename ViewType::memory_traits, Kokkos::Unmanaged> >;
36template <typename ViewType>
37using Atomic = Kokkos::View<typename ViewType::data_type,
38 typename ViewType::array_layout,
39 typename ViewType::device_type,
40 MemoryTraits<typename ViewType::memory_traits, Kokkos::Atomic> >;
41template <typename ViewType>
42using Const = Kokkos::View<typename ViewType::const_data_type,
43 typename ViewType::array_layout,
44 typename ViewType::device_type,
45 typename ViewType::memory_traits>;
46template <typename ViewType>
47using ConstUnmanaged = Const<Unmanaged<ViewType> >;
48
49template <typename ViewType>
50using AtomicUnmanaged = Atomic<Unmanaged<ViewType> >;
51
52template <typename ViewType>
53using Unmanaged = Kokkos::View<typename ViewType::data_type,
54 typename ViewType::array_layout,
55 typename ViewType::device_type,
56 MemoryTraits<typename ViewType::memory_traits, Kokkos::Unmanaged> >;
57
58template <typename ViewType>
59using Scratch = Kokkos::View<typename ViewType::data_type,
60 typename ViewType::array_layout,
61 typename ViewType::execution_space::scratch_memory_space,
62 MemoryTraits<typename ViewType::memory_traits, Kokkos::Unmanaged> >;
63
67template <typename LayoutType>
69template <>
70struct TpetraLittleBlock<Kokkos::LayoutLeft> {
71 template <typename T>
72 KOKKOS_INLINE_FUNCTION static T getFlatIndex(const T i, const T j, const T blksize) { return i + j * blksize; }
73};
74template <>
75struct TpetraLittleBlock<Kokkos::LayoutRight> {
76 template <typename T>
77 KOKKOS_INLINE_FUNCTION static T getFlatIndex(const T i, const T j, const T blksize) { return i * blksize + j; }
78};
79
83template <typename T>
84struct BlockTridiagScalarType { typedef T type; };
85#if defined(IFPACK2_BLOCKHELPER_USE_SMALL_SCALAR_FOR_BLOCKTRIDIAG)
86template <>
87struct BlockTridiagScalarType<double> { typedef float type; };
88// template<> struct SmallScalarType<Kokkos::complex<double> > { typedef Kokkos::complex<float> type; };
89#endif
90
94template <typename T>
95struct is_cuda {
96 enum : bool { value = false };
97};
98#if defined(KOKKOS_ENABLE_CUDA)
99template <>
100struct is_cuda<Kokkos::Cuda> {
101 enum : bool { value = true };
102};
103#endif
104
108template <typename T>
109struct is_hip {
110 enum : bool { value = false };
111};
112#if defined(KOKKOS_ENABLE_HIP)
113template <>
114struct is_hip<Kokkos::HIP> {
115 enum : bool { value = true };
116};
117#endif
118
122template <typename T>
123struct is_sycl {
124 enum : bool { value = false };
125};
126#if defined(KOKKOS_ENABLE_SYCL)
127template <>
128struct is_sycl<Kokkos::Experimental::SYCL> {
129 enum : bool { value = true };
130};
131#endif
132
133template <typename T>
134struct is_device {
135 enum : bool { value = is_cuda<T>::value || is_hip<T>::value || is_sycl<T>::value };
136};
137
141template <typename T>
143 static void createInstance(T &exec_instance) {
144 exec_instance = T();
145 }
146#if defined(KOKKOS_ENABLE_CUDA)
147 static void createInstance(const cudaStream_t &s, T &exec_instance) {
148 exec_instance = T();
149 }
150#endif
151};
152
153#if defined(KOKKOS_ENABLE_CUDA)
154template <>
155struct ExecutionSpaceFactory<Kokkos::Cuda> {
156 static void createInstance(Kokkos::Cuda &exec_instance) {
157 exec_instance = Kokkos::Cuda();
158 }
159 static void createInstance(const cudaStream_t &s, Kokkos::Cuda &exec_instance) {
160 exec_instance = Kokkos::Cuda(s);
161 }
162};
163#endif
164
165#if defined(KOKKOS_ENABLE_HIP)
166template <>
167struct ExecutionSpaceFactory<Kokkos::HIP> {
168 static void createInstance(Kokkos::HIP &exec_instance) {
169 exec_instance = Kokkos::HIP();
170 }
171};
172#endif
173
174#if defined(KOKKOS_ENABLE_SYCL)
175template <>
176struct ExecutionSpaceFactory<Kokkos::Experimental::SYCL> {
177 static void createInstance(Kokkos::Experimental::SYCL &exec_instance) {
178 exec_instance = Kokkos::Experimental::SYCL();
179 }
180};
181#endif
182
183#if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKHELPER_ENABLE_PROFILE)
184#define IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN \
185 KOKKOS_IMPL_CUDA_SAFE_CALL(cudaProfilerStart());
186
187#define IFPACK2_BLOCKHELPER_PROFILER_REGION_END \
188 { KOKKOS_IMPL_CUDA_SAFE_CALL(cudaProfilerStop()); }
189#else
191#define IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN
192#define IFPACK2_BLOCKHELPER_PROFILER_REGION_END
193#endif
194
198template <typename CommPtrType>
199std::string get_msg_prefix(const CommPtrType &comm) {
200 const auto rank = comm->getRank();
201 const auto nranks = comm->getSize();
202 std::stringstream ss;
203 ss << "Rank " << rank << " of " << nranks << ": ";
204 return ss.str();
205}
206
210template <typename T, int N>
212 T v[N];
213 KOKKOS_INLINE_FUNCTION
215 for (int i = 0; i < N; ++i)
216 this->v[i] = 0;
217 }
218 KOKKOS_INLINE_FUNCTION
220 for (int i = 0; i < N; ++i)
221 this->v[i] = b.v[i];
222 }
223};
224template <typename T, int N>
225static KOKKOS_INLINE_FUNCTION void
226operator+=(ArrayValueType<T, N> &a,
227 const ArrayValueType<T, N> &b) {
228 for (int i = 0; i < N; ++i)
229 a.v[i] += b.v[i];
230}
231
235template <typename T, int N, typename ExecSpace>
237 typedef SumReducer reducer;
239 typedef Kokkos::View<value_type, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
240 value_type *value;
241
242 KOKKOS_INLINE_FUNCTION
244 : value(&val) {}
245
246 KOKKOS_INLINE_FUNCTION
247 void join(value_type &dst, value_type const &src) const {
248 for (int i = 0; i < N; ++i)
249 dst.v[i] += src.v[i];
250 }
251 KOKKOS_INLINE_FUNCTION
252 void init(value_type &val) const {
253 for (int i = 0; i < N; ++i)
254 val.v[i] = 0;
255 }
256 KOKKOS_INLINE_FUNCTION
257 value_type &reference() {
258 return *value;
259 }
260 KOKKOS_INLINE_FUNCTION
261 result_view_type view() const {
262 return result_view_type(value);
263 }
264};
265
269template <typename MatrixType>
270struct ImplType {
274 typedef size_t size_type;
275 typedef MatrixType matrix_type;
276 typedef typename MatrixType::scalar_type scalar_type;
277 typedef typename MatrixType::local_ordinal_type local_ordinal_type;
278 typedef typename MatrixType::global_ordinal_type global_ordinal_type;
279 typedef typename MatrixType::node_type node_type;
280
284 typedef typename KokkosKernels::ArithTraits<scalar_type>::val_type impl_scalar_type;
285 typedef typename KokkosKernels::ArithTraits<impl_scalar_type>::mag_type magnitude_type;
286
287 typedef typename BlockTridiagScalarType<impl_scalar_type>::type btdm_scalar_type;
288 typedef typename KokkosKernels::ArithTraits<btdm_scalar_type>::mag_type btdm_magnitude_type;
289
293 typedef Kokkos::DefaultHostExecutionSpace host_execution_space;
294
298 typedef typename node_type::device_type node_device_type;
299 typedef typename node_device_type::execution_space node_execution_space;
300 typedef typename node_device_type::memory_space node_memory_space;
301
302#if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKHELPER_USE_CUDA_SPACE)
304 typedef node_execution_space execution_space;
305 typedef typename std::conditional<std::is_same<node_memory_space, Kokkos::CudaUVMSpace>::value,
306 Kokkos::CudaSpace,
307 node_memory_space>::type memory_space;
308 typedef Kokkos::Device<execution_space, memory_space> device_type;
309#else
310 typedef node_device_type device_type;
311 typedef node_execution_space execution_space;
312 typedef node_memory_space memory_space;
313#endif
314
315 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_multivector_type;
316 typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> tpetra_map_type;
317 typedef Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type> tpetra_import_type;
318 typedef Tpetra::RowMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_row_matrix_type;
319 typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_crs_matrix_type;
320 typedef Tpetra::CrsGraph<local_ordinal_type, global_ordinal_type, node_type> tpetra_crs_graph_type;
321 typedef Tpetra::BlockCrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_block_crs_matrix_type;
322 typedef typename tpetra_block_crs_matrix_type::little_block_type tpetra_block_access_view_type;
323 typedef Tpetra::BlockMultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_block_multivector_type;
324 typedef typename tpetra_block_crs_matrix_type::crs_graph_type::local_graph_device_type local_crs_graph_type;
325
329 template <typename T, int l>
330 using Vector = KB::Vector<T, l>;
331 template <typename T>
332 using SIMD = KB::SIMD<T>;
333 template <typename T, typename M>
334 using DefaultVectorLength = KB::DefaultVectorLength<T, M>;
335 template <typename T, typename M>
336 using DefaultInternalVectorLength = KB::DefaultInternalVectorLength<T, M>;
337
338 static constexpr int vector_length = DefaultVectorLength<btdm_scalar_type, memory_space>::value;
339 static constexpr int internal_vector_length = DefaultInternalVectorLength<btdm_scalar_type, memory_space>::value;
340 static constexpr int half_vector_length = (vector_length > 1) ? (vector_length / 2) : 1;
341 typedef Vector<SIMD<btdm_scalar_type>, vector_length> vector_type;
342 typedef Vector<SIMD<btdm_scalar_type>, internal_vector_length> internal_vector_type;
343
347 typedef Kokkos::View<size_type *, device_type> size_type_1d_view;
348 typedef Kokkos::View<size_type **, device_type> size_type_2d_view;
349 typedef Kokkos::View<int64_t ***, Kokkos::LayoutRight, device_type> i64_3d_view;
350 typedef Kokkos::View<local_ordinal_type *, device_type> local_ordinal_type_1d_view;
351 typedef Kokkos::View<local_ordinal_type **, device_type> local_ordinal_type_2d_view;
352 // tpetra block crs values
353 typedef Kokkos::View<impl_scalar_type *, device_type> impl_scalar_type_1d_view;
354 typedef Kokkos::View<impl_scalar_type *, node_device_type> impl_scalar_type_1d_view_tpetra;
355
356 // tpetra multivector values (layout left): may need to change the typename more explicitly
357 typedef Kokkos::View<impl_scalar_type **, Kokkos::LayoutLeft, device_type> impl_scalar_type_2d_view;
358 typedef Kokkos::View<impl_scalar_type **, Kokkos::LayoutLeft, node_device_type> impl_scalar_type_2d_view_tpetra;
359 typedef Kokkos::View<const impl_scalar_type **, Kokkos::LayoutLeft, node_device_type> const_impl_scalar_type_2d_view_tpetra;
360
361 // packed data always use layout right
362 typedef Kokkos::View<vector_type *, device_type> vector_type_1d_view;
363 typedef Kokkos::View<vector_type ***, Kokkos::LayoutRight, device_type> vector_type_3d_view;
364 typedef Kokkos::View<vector_type ****, Kokkos::LayoutRight, device_type> vector_type_4d_view;
365 typedef Kokkos::View<internal_vector_type ***, Kokkos::LayoutRight, device_type> internal_vector_type_3d_view;
366 typedef Kokkos::View<internal_vector_type ****, Kokkos::LayoutRight, device_type> internal_vector_type_4d_view;
367 typedef Kokkos::View<internal_vector_type *****, Kokkos::LayoutRight, device_type> internal_vector_type_5d_view;
368 typedef Kokkos::View<btdm_scalar_type **, Kokkos::LayoutRight, device_type> btdm_scalar_type_2d_view;
369 typedef Kokkos::View<btdm_scalar_type ***, Kokkos::LayoutRight, device_type> btdm_scalar_type_3d_view;
370 typedef Kokkos::View<btdm_scalar_type ****, Kokkos::LayoutRight, device_type> btdm_scalar_type_4d_view;
371 typedef Kokkos::View<btdm_scalar_type *****, Kokkos::LayoutRight, device_type> btdm_scalar_type_5d_view;
372
373 // maximum supported block size for BTD/BJacobi solvers (some codepaths may allow larger)
374 enum : int { max_blocksize = 32 };
375};
376
380template <typename MatrixType>
381struct AmD {
383 using local_ordinal_type_1d_view = typename impl_type::local_ordinal_type_1d_view;
384 using size_type_1d_view = typename impl_type::size_type_1d_view;
385 using i64_3d_view = typename impl_type::i64_3d_view;
386 using impl_scalar_type_1d_view_tpetra = Unmanaged<typename impl_type::impl_scalar_type_1d_view_tpetra>;
387 // rowptr points to the start of each row of A_colindsub.
388 size_type_1d_view rowptr, rowptr_remote;
389 // Indices into A's rows giving the blocks to extract. rowptr(i) points to
390 // the i'th row. Thus, g.entries(A_colindsub(rowptr(row) : rowptr(row+1))),
391 // where g is A's graph, are the columns AmD uses. If seq_method_, then
392 // A_colindsub contains all the LIDs and A_colindsub_remote is empty. If !
393 // seq_method_, then A_colindsub contains owned LIDs and A_colindsub_remote
394 // contains the remote ones.
395 local_ordinal_type_1d_view A_colindsub, A_colindsub_remote;
396 // Precomputed direct offsets to A,x blocks, for owned entries (OverlapTag case) or all entries (AsyncTag case)
397 i64_3d_view A_x_offsets;
398 // Precomputed direct offsets to A,x blocks, for non-owned entries (OverlapTag case). For AsyncTag case this is left empty.
399 i64_3d_view A_x_offsets_remote;
400
401 // Currently always true.
402 bool is_tpetra_block_crs;
403
404 // If is_tpetra_block_crs, then this is a pointer to A_'s value data.
405 impl_scalar_type_1d_view_tpetra tpetra_values;
406
407 AmD() = default;
408 AmD(const AmD &b) = default;
409};
410
411template <typename MatrixType>
412struct PartInterface {
413 using local_ordinal_type = typename BlockHelperDetails::ImplType<MatrixType>::local_ordinal_type;
414 using local_ordinal_type_1d_view = typename BlockHelperDetails::ImplType<MatrixType>::local_ordinal_type_1d_view;
415 using local_ordinal_type_2d_view = typename BlockHelperDetails::ImplType<MatrixType>::local_ordinal_type_2d_view;
416
417 PartInterface() = default;
418 PartInterface(const PartInterface &b) = default;
419
420 // Some terms:
421 // The matrix A is split as A = D + R, where D is the matrix of tridiag
422 // blocks and R is the remainder.
423 // A part is roughly a synonym for a tridiag. The distinction is that a part
424 // is the set of rows belonging to one tridiag and, equivalently, the off-diag
425 // rows in R associated with that tridiag. In contrast, the term tridiag is
426 // used to refer specifically to tridiag data, such as the pointer into the
427 // tridiag data array.
428 // Local (lcl) row are the LIDs. lclrow lists the LIDs belonging to each
429 // tridiag, and partptr points to the beginning of each tridiag. This is the
430 // LID space.
431 // Row index (idx) is the ordinal in the tridiag ordering. lclrow is indexed
432 // by this ordinal. This is the 'index' space.
433 // A flat index is the mathematical index into an array. A pack index
434 // accounts for SIMD packing.
435
436 // Local row LIDs. Permutation from caller's index space to tridiag index
437 // space.
438 local_ordinal_type_1d_view lclrow;
439 // partptr_ is the pointer array into lclrow_.
440 local_ordinal_type_1d_view partptr; // np+1
441 local_ordinal_type_2d_view partptr_sub;
442 local_ordinal_type_1d_view partptr_schur;
443 // packptr_(i), for i the pack index, indexes partptr_. partptr_(packptr_(i))
444 // is the start of the i'th pack.
445 local_ordinal_type_1d_view packptr; // npack+1
446 local_ordinal_type_1d_view packptr_sub;
447 local_ordinal_type_1d_view packindices_sub;
448 local_ordinal_type_2d_view packindices_schur;
449 // part2rowidx0_(i) is the flat row index of the start of the i'th part. It's
450 // an alias of partptr_ in the case of no overlap.
451 local_ordinal_type_1d_view part2rowidx0; // np+1
452 local_ordinal_type_1d_view part2rowidx0_sub;
453 // part2packrowidx0_(i) is the packed row index. If vector_length is 1, then
454 // it's the same as part2rowidx0_; if it's > 1, then the value is combined
455 // with i % vector_length to get the location in the packed data.
456 local_ordinal_type_1d_view part2packrowidx0; // np+1
457 local_ordinal_type_2d_view part2packrowidx0_sub;
458 local_ordinal_type part2packrowidx0_back; // So we don't need to grab the array from the GPU.
459 // rowidx2part_ maps the row index to the part index.
460 local_ordinal_type_1d_view rowidx2part; // nr
461 local_ordinal_type_1d_view rowidx2part_sub;
462 // True if lcl{row|col} is at most a constant away from row{idx|col}. In
463 // practice, this knowledge is not particularly useful, as packing for batched
464 // processing is done at the same time as the permutation from LID to index
465 // space. But it's easy to detect, so it's recorded in case an optimization
466 // can be made based on it.
467 bool row_contiguous;
468
469 local_ordinal_type max_partsz;
470 local_ordinal_type max_subpartsz;
471 local_ordinal_type n_subparts_per_part;
472 local_ordinal_type nparts;
473};
474
478template <typename MatrixType>
480 public:
482 using host_execution_space = typename impl_type::host_execution_space;
483 using magnitude_type = typename impl_type::magnitude_type;
484
485 private:
486 bool collective_;
487 int sweep_step_, sweep_step_upper_bound_;
488#ifdef HAVE_IFPACK2_MPI
489 MPI_Request mpi_request_;
490 MPI_Comm comm_;
491#endif
492 magnitude_type work_[3];
493
494 public:
495 NormManager() = default;
496 NormManager(const NormManager &b) = default;
497 NormManager(const Teuchos::RCP<const Teuchos::Comm<int> > &comm) {
498 sweep_step_ = 1;
499 sweep_step_upper_bound_ = 1;
500 collective_ = comm->getSize() > 1;
501 if (collective_) {
502#ifdef HAVE_IFPACK2_MPI
503 const auto mpi_comm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
504 TEUCHOS_ASSERT(!mpi_comm.is_null());
505 comm_ = *mpi_comm->getRawMpiComm();
506#endif
507 }
508 const magnitude_type zero(0), minus_one(-1);
509 work_[0] = zero;
510 work_[1] = zero;
511 work_[2] = minus_one;
512 }
513
514 // Check the norm every sweep_step sweeps.
515 void setCheckFrequency(const int sweep_step) {
516 TEUCHOS_TEST_FOR_EXCEPT_MSG(sweep_step < 1, "sweep step must be >= 1");
517 sweep_step_upper_bound_ = sweep_step;
518 sweep_step_ = 1;
519 }
520
521 // Get the buffer into which to store rank-local squared norms.
522 magnitude_type *getBuffer() { return &work_[0]; }
523
524 // Call MPI_Iallreduce to find the global squared norms.
525 void ireduce(const int sweep, const bool force = false) {
526 if (!force && sweep % sweep_step_) return;
527
528 IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::Ireduce", Ireduce);
529
530 work_[1] = work_[0];
531#ifdef HAVE_IFPACK2_MPI
532 auto send_data = &work_[1];
533 auto recv_data = &work_[0];
534 if (collective_) {
535#if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
536 MPI_Iallreduce(send_data, recv_data, 1,
537 Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
538 MPI_SUM, comm_, &mpi_request_);
539#else
540 MPI_Allreduce(send_data, recv_data, 1,
541 Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
542 MPI_SUM, comm_);
543#endif
544 }
545#endif
546 }
547
548 // Check if the norm-based termination criterion is met. tol2 is the
549 // tolerance squared. Sweep is the sweep index. If not every iteration is
550 // being checked, this function immediately returns false. If a check must
551 // be done at this iteration, it waits for the reduction triggered by
552 // ireduce to complete, then checks the global norm against the tolerance.
553 bool checkDone(const int sweep, const magnitude_type tol2, const bool force = false) {
554 // early return
555 if (sweep <= 0) return false;
556
557 IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::CheckDone", CheckDone);
558
559 TEUCHOS_ASSERT(sweep >= 1);
560 if (!force && (sweep - 1) % sweep_step_) return false;
561 if (collective_) {
562#ifdef HAVE_IFPACK2_MPI
563#if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
564 MPI_Wait(&mpi_request_, MPI_STATUS_IGNORE);
565#else
566 // Do nothing.
567#endif
568#endif
569 }
570 bool r_val = false;
571 if (sweep == 1) {
572 work_[2] = work_[0];
573 } else {
574 r_val = (work_[0] < tol2 * work_[2]);
575 }
576
577 // adjust sweep step
578 const auto adjusted_sweep_step = 2 * sweep_step_;
579 if (adjusted_sweep_step < sweep_step_upper_bound_) {
580 sweep_step_ = adjusted_sweep_step;
581 } else {
582 sweep_step_ = sweep_step_upper_bound_;
583 }
584 return r_val;
585 }
586
587 // After termination has occurred, finalize the norms for use in
588 // get_norms{0,final}.
589 void finalize() {
590 work_[0] = std::sqrt(work_[0]); // after converged
591 if (work_[2] >= 0)
592 work_[2] = std::sqrt(work_[2]); // first norm
593 // if work_[2] is minus one, then norm is not requested.
594 }
595
596 // Report norms to the caller.
597 const magnitude_type getNorms0() const { return work_[2]; }
598 const magnitude_type getNormsFinal() const { return work_[0]; }
599};
600
601template <typename MatrixType>
602void reduceVector(const ConstUnmanaged<typename BlockHelperDetails::ImplType<MatrixType>::impl_scalar_type_1d_view> zz,
603 /* */ typename BlockHelperDetails::ImplType<MatrixType>::magnitude_type *vals) {
604 IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN;
605 IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ReduceVector", ReduceVector);
606
607 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
608 using local_ordinal_type = typename impl_type::local_ordinal_type;
609 using impl_scalar_type = typename impl_type::impl_scalar_type;
610#if 0
611 const auto norm2 = KokkosBlas::nrm1(zz);
612#else
613 impl_scalar_type norm2(0);
614 Kokkos::parallel_reduce(
615 "ReduceMultiVector::Device",
616 Kokkos::RangePolicy<typename impl_type::execution_space>(0, zz.extent(0)),
617 KOKKOS_LAMBDA(const local_ordinal_type &i, impl_scalar_type &update) {
618 update += zz(i);
619 },
620 norm2);
621#endif
622 vals[0] = KokkosKernels::ArithTraits<impl_scalar_type>::abs(norm2);
623
624 IFPACK2_BLOCKHELPER_PROFILER_REGION_END;
625 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename ImplType<MatrixType>::execution_space)
626}
627
628} // namespace BlockHelperDetails
629
630} // namespace Ifpack2
631
632#endif
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:40
Definition Ifpack2_BlockHelper.hpp:381
Definition Ifpack2_BlockHelper.hpp:211
Definition Ifpack2_BlockHelper.hpp:84
Definition Ifpack2_BlockHelper.hpp:142
Definition Ifpack2_BlockHelper.hpp:270
KB::Vector< T, l > Vector
Definition Ifpack2_BlockHelper.hpp:330
size_t size_type
Definition Ifpack2_BlockHelper.hpp:274
node_type::device_type node_device_type
Definition Ifpack2_BlockHelper.hpp:298
Kokkos::DefaultHostExecutionSpace host_execution_space
Definition Ifpack2_BlockHelper.hpp:293
KokkosKernels::ArithTraits< scalar_type >::val_type impl_scalar_type
Definition Ifpack2_BlockHelper.hpp:284
Kokkos::View< size_type *, device_type > size_type_1d_view
Definition Ifpack2_BlockHelper.hpp:347
Definition Ifpack2_BlockHelper.hpp:479
Definition Ifpack2_BlockHelper.hpp:236
Definition Ifpack2_BlockHelper.hpp:68
Definition Ifpack2_BlockHelper.hpp:95
Definition Ifpack2_BlockHelper.hpp:109
Definition Ifpack2_BlockHelper.hpp:123