275 typedef typename MatrixType::scalar_type scalar_type;
276 typedef typename MatrixType::local_ordinal_type local_ordinal_type;
277 typedef typename MatrixType::global_ordinal_type global_ordinal_type;
278 typedef typename MatrixType::node_type node_type;
284 typedef typename KokkosKernels::ArithTraits<impl_scalar_type>::mag_type magnitude_type;
286 typedef typename BlockTridiagScalarType<impl_scalar_type>::type btdm_scalar_type;
287 typedef typename KokkosKernels::ArithTraits<btdm_scalar_type>::mag_type btdm_magnitude_type;
298 typedef typename node_device_type::execution_space node_execution_space;
299 typedef typename node_device_type::memory_space node_memory_space;
301#if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKHELPER_USE_CUDA_SPACE)
303 typedef node_execution_space execution_space;
304 typedef typename std::conditional<std::is_same<node_memory_space, Kokkos::CudaUVMSpace>::value,
306 node_memory_space>::type memory_space;
307 typedef Kokkos::Device<execution_space, memory_space> device_type;
310 typedef node_execution_space execution_space;
311 typedef node_memory_space memory_space;
314 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_multivector_type;
315 typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> tpetra_map_type;
316 typedef Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type> tpetra_import_type;
317 typedef Tpetra::RowMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_row_matrix_type;
318 typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_crs_matrix_type;
319 typedef Tpetra::CrsGraph<local_ordinal_type, global_ordinal_type, node_type> tpetra_crs_graph_type;
320 typedef Tpetra::BlockCrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_block_crs_matrix_type;
321 typedef typename tpetra_block_crs_matrix_type::little_block_type tpetra_block_access_view_type;
322 typedef Tpetra::BlockMultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_block_multivector_type;
323 typedef typename tpetra_block_crs_matrix_type::crs_graph_type::local_graph_device_type local_crs_graph_type;
328 template <
typename T,
int l>
330 template <
typename T>
331 using SIMD = KB::SIMD<T>;
332 template <
typename T,
typename M>
333 using DefaultVectorLength = KB::DefaultVectorLength<T, M>;
334 template <
typename T,
typename M>
335 using DefaultInternalVectorLength = KB::DefaultInternalVectorLength<T, M>;
337 static constexpr int vector_length = DefaultVectorLength<btdm_scalar_type, memory_space>::value;
338 static constexpr int internal_vector_length = DefaultInternalVectorLength<btdm_scalar_type, memory_space>::value;
339 static constexpr int half_vector_length = (vector_length > 1) ? (vector_length / 2) : 1;
347 typedef Kokkos::View<size_type **, device_type> size_type_2d_view;
348 typedef Kokkos::View<int64_t ***, Kokkos::LayoutRight, device_type> i64_3d_view;
349 typedef Kokkos::View<local_ordinal_type *, device_type> local_ordinal_type_1d_view;
350 typedef Kokkos::View<local_ordinal_type **, device_type> local_ordinal_type_2d_view;
352 typedef Kokkos::View<impl_scalar_type *, device_type> impl_scalar_type_1d_view;
353 typedef Kokkos::View<impl_scalar_type *, node_device_type> impl_scalar_type_1d_view_tpetra;
356 typedef Kokkos::View<impl_scalar_type **, Kokkos::LayoutLeft, device_type> impl_scalar_type_2d_view;
357 typedef Kokkos::View<impl_scalar_type **, Kokkos::LayoutLeft, node_device_type> impl_scalar_type_2d_view_tpetra;
358 typedef Kokkos::View<const impl_scalar_type **, Kokkos::LayoutLeft, node_device_type> const_impl_scalar_type_2d_view_tpetra;
361 typedef Kokkos::View<vector_type *, device_type> vector_type_1d_view;
362 typedef Kokkos::View<vector_type ***, Kokkos::LayoutRight, device_type> vector_type_3d_view;
363 typedef Kokkos::View<vector_type ****, Kokkos::LayoutRight, device_type> vector_type_4d_view;
364 typedef Kokkos::View<internal_vector_type ***, Kokkos::LayoutRight, device_type> internal_vector_type_3d_view;
365 typedef Kokkos::View<internal_vector_type ****, Kokkos::LayoutRight, device_type> internal_vector_type_4d_view;
366 typedef Kokkos::View<internal_vector_type *****, Kokkos::LayoutRight, device_type> internal_vector_type_5d_view;
367 typedef Kokkos::View<btdm_scalar_type **, Kokkos::LayoutRight, device_type> btdm_scalar_type_2d_view;
368 typedef Kokkos::View<btdm_scalar_type ***, Kokkos::LayoutRight, device_type> btdm_scalar_type_3d_view;
369 typedef Kokkos::View<btdm_scalar_type ****, Kokkos::LayoutRight, device_type> btdm_scalar_type_4d_view;
370 typedef Kokkos::View<btdm_scalar_type *****, Kokkos::LayoutRight, device_type> btdm_scalar_type_5d_view;
381 using magnitude_type =
typename impl_type::magnitude_type;
385 int sweep_step_, sweep_step_upper_bound_;
386#ifdef HAVE_IFPACK2_MPI
387 MPI_Request mpi_request_;
390 magnitude_type work_[3];
395 NormManager(
const Teuchos::RCP<
const Teuchos::Comm<int> > &comm) {
397 sweep_step_upper_bound_ = 1;
398 collective_ = comm->getSize() > 1;
400#ifdef HAVE_IFPACK2_MPI
401 const auto mpi_comm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
402 TEUCHOS_ASSERT(!mpi_comm.is_null());
403 comm_ = *mpi_comm->getRawMpiComm();
406 const magnitude_type zero(0), minus_one(-1);
409 work_[2] = minus_one;
413 void setCheckFrequency(
const int sweep_step) {
414 TEUCHOS_TEST_FOR_EXCEPT_MSG(sweep_step < 1, "sweep step must be >= 1
");
415 sweep_step_upper_bound_ = sweep_step;
419 // Get the buffer into which to store rank-local squared norms.
420 magnitude_type *getBuffer() { return &work_[0]; }
422 // Call MPI_Iallreduce to find the global squared norms.
423 void ireduce(const int sweep, const bool force = false) {
424 if (!force && sweep % sweep_step_) return;
426 IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::Ireduce
", Ireduce);
429#ifdef HAVE_IFPACK2_MPI
430 auto send_data = &work_[1];
431 auto recv_data = &work_[0];
433#if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
434 MPI_Iallreduce(send_data, recv_data, 1,
435 Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
436 MPI_SUM, comm_, &mpi_request_);
438 MPI_Allreduce(send_data, recv_data, 1,
439 Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
446 // Check if the norm-based termination criterion is met. tol2 is the
447 // tolerance squared. Sweep is the sweep index. If not every iteration is
448 // being checked, this function immediately returns false. If a check must
449 // be done at this iteration, it waits for the reduction triggered by
450 // ireduce to complete, then checks the global norm against the tolerance.
451 bool checkDone(const int sweep, const magnitude_type tol2, const bool force = false) {
453 if (sweep <= 0) return false;
455 IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::CheckDone
", CheckDone);
457 TEUCHOS_ASSERT(sweep >= 1);
458 if (!force && (sweep - 1) % sweep_step_) return false;
460#ifdef HAVE_IFPACK2_MPI
461#if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
462 MPI_Wait(&mpi_request_, MPI_STATUS_IGNORE);
472 r_val = (work_[0] < tol2 * work_[2]);
476 const auto adjusted_sweep_step = 2 * sweep_step_;
477 if (adjusted_sweep_step < sweep_step_upper_bound_) {
478 sweep_step_ = adjusted_sweep_step;
480 sweep_step_ = sweep_step_upper_bound_;
485 // After termination has occurred, finalize the norms for use in
486 // get_norms{0,final}.
488 work_[0] = std::sqrt(work_[0]); // after converged
490 work_[2] = std::sqrt(work_[2]); // first norm
491 // if work_[2] is minus one, then norm is not requested.
494 // Report norms to the caller.
495 const magnitude_type getNorms0() const { return work_[2]; }
496 const magnitude_type getNormsFinal() const { return work_[0]; }