Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_Util.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Amesos2: Templated Direct Sparse Solver Package
4//
5// Copyright 2011 NTESS and the Amesos2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
18#ifndef AMESOS2_UTIL_HPP
19#define AMESOS2_UTIL_HPP
20
21#include <cstdio>
22#include <fstream>
23#include <iostream>
24
25#include "Amesos2_config.h"
26
27#include "Teuchos_RCP.hpp"
28#include "Teuchos_BLAS_types.hpp"
29#include "Teuchos_Array.hpp"
30#include "Teuchos_ArrayView.hpp"
31#include "Teuchos_FancyOStream.hpp"
32
33#include <Tpetra_Map.hpp>
34#include <Tpetra_DistObject_decl.hpp>
35#include <Tpetra_ComputeGatherMap.hpp> // added for gather map... where is the best place??
36
37#include "Amesos2_TypeDecl.hpp"
38#include "Amesos2_Meta.hpp"
40
41#ifdef HAVE_AMESOS2_METIS
42#include "metis.h" // to discuss, remove from header?
43#endif
44
45namespace Amesos2 {
46
47 namespace Util {
48
55 using Teuchos::RCP;
56 using Teuchos::ArrayView;
57
74 template <typename LO, typename GO, typename GS, typename Node>
75 const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
76 getGatherMap( const Teuchos::RCP< const Tpetra::Map<LO,GO,Node> > &map );
77
78
79 template <typename LO, typename GO, typename GS, typename Node>
80 const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
81 getDistributionMap(EDistribution distribution,
82 GS num_global_elements,
83 const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
84 GO indexBase = 0,
85 const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >& map = Teuchos::null);
86
92 template <typename Scalar,
93 typename GlobalOrdinal,
94 typename GlobalSizeT>
95 void transpose(ArrayView<Scalar> vals,
96 ArrayView<GlobalOrdinal> indices,
97 ArrayView<GlobalSizeT> ptr,
98 ArrayView<Scalar> trans_vals,
99 ArrayView<GlobalOrdinal> trans_indices,
100 ArrayView<GlobalSizeT> trans_ptr);
101
115 template <typename Scalar1, typename Scalar2>
116 void scale(ArrayView<Scalar1> vals, size_t l,
117 size_t ld, ArrayView<Scalar2> s);
118
137 template <typename Scalar1, typename Scalar2, class BinaryOp>
138 void scale(ArrayView<Scalar1> vals, size_t l,
139 size_t ld, ArrayView<Scalar2> s, BinaryOp binary_op);
140
141
143 void printLine( Teuchos::FancyOStream &out );
144
145 // Helper function used to convert Kokkos::complex pointer
146 // to std::complex pointer; needed for optimized code path
147 // when retrieving the CRS raw pointers
148 template < class T0, class T1 >
149 struct getStdCplxType
150 {
151 using common_type = typename std::common_type<T0,T1>::type;
152 using type = common_type;
153 };
154
155 template < class T0, class T1 >
156 struct getStdCplxType< T0, T1* >
157 {
158 using common_type = typename std::common_type<T0,T1>::type;
159 using type = common_type;
160 };
161
162#if defined(HAVE_TEUCHOS_COMPLEX) && defined(HAVE_AMESOS2_KOKKOS)
163 template < class T0 >
164 struct getStdCplxType< T0, Kokkos::complex<T0>* >
165 {
166 using type = std::complex<T0>;
167 };
168
169 template < class T0 , class T1 >
170 struct getStdCplxType< T0, Kokkos::complex<T1>* >
171 {
172 using common_type = typename std::common_type<T0,T1>::type;
173 using type = std::complex<common_type>;
174 };
175#endif
176
178 // Matrix/MultiVector Utilities //
180
181
182
196 template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
198 {
199 static void do_get(const Teuchos::Ptr<const M> mat,
200 KV_S& nzvals,
201 KV_GO& indices,
202 KV_GS& pointers,
203 typename KV_GS::value_type& nnz,
204 const Teuchos::Ptr<
205 const Tpetra::Map<typename M::local_ordinal_t,
206 typename M::global_ordinal_t,
207 typename M::node_t> > map,
208 EDistribution distribution,
209 EStorage_Ordering ordering)
210 {
211 Op::template apply_kokkos_view<KV_S, KV_GO, KV_GS>(mat, nzvals,
212 indices, pointers, nnz, map, distribution, ordering);
213 }
214 };
215
216 template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
217 struct diff_gs_helper_kokkos_view
218 {
219 static void do_get(const Teuchos::Ptr<const M> mat,
220 KV_S& nzvals,
221 KV_GO& indices,
222 KV_GS& pointers,
223 typename KV_GS::value_type& nnz,
224 const Teuchos::Ptr<
225 const Tpetra::Map<typename M::local_ordinal_t,
226 typename M::global_ordinal_t,
227 typename M::node_t> > map,
228 EDistribution distribution,
229 EStorage_Ordering ordering)
230 {
231 typedef typename M::global_size_t mat_gs_t;
232 typedef typename Kokkos::View<mat_gs_t*, Kokkos::HostSpace> KV_TMP;
233 size_t i, size = (pointers.extent(0) > 0 ? pointers.extent(0) : 1); // making sure it is at least 1, even for empty local matrix
234 KV_TMP pointers_tmp(Kokkos::ViewAllocateWithoutInitializing("pointers_tmp"), size);
235
236 mat_gs_t nnz_tmp = 0;
237 Op::template apply_kokkos_view<KV_S, KV_GO, KV_TMP>(mat, nzvals,
238 indices, pointers_tmp, nnz_tmp, Teuchos::ptrInArg(*map), distribution, ordering);
239 nnz = Teuchos::as<typename KV_GS::value_type>(nnz_tmp);
240
241 typedef typename KV_GS::value_type view_gs_t;
242 if (pointers.extent(0) == 1) {
243 Kokkos::deep_copy(pointers, 0);
244 } else {
245 auto host_pointers = Kokkos::create_mirror_view(pointers);
246 for (i = 0; i < pointers.extent(0); ++i){
247 host_pointers(i) = Teuchos::as<view_gs_t>(pointers_tmp(i));
248 }
249 Kokkos::deep_copy(pointers, host_pointers);
250 }
251 nnz = Teuchos::as<view_gs_t>(nnz_tmp);
252 }
253 };
254
255 template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
256 struct same_go_helper_kokkos_view
257 {
258 static void do_get(const Teuchos::Ptr<const M> mat,
259 KV_S& nzvals,
260 KV_GO& indices,
261 KV_GS& pointers,
262 typename KV_GS::value_type& nnz,
263 const Teuchos::Ptr<
264 const Tpetra::Map<typename M::local_ordinal_t,
265 typename M::global_ordinal_t,
266 typename M::node_t> > map,
267 EDistribution distribution,
268 EStorage_Ordering ordering)
269 {
270 typedef typename M::global_size_t mat_gs_t;
271 typedef typename KV_GS::value_type view_gs_t;
272 std::conditional_t<std::is_same_v<view_gs_t,mat_gs_t>,
273 same_gs_helper_kokkos_view<M, KV_S, KV_GO, KV_GS, Op>,
274 diff_gs_helper_kokkos_view<M, KV_S, KV_GO, KV_GS, Op> >::do_get(mat, nzvals, indices,
275 pointers, nnz, map,
276 distribution, ordering);
277 }
278 };
279
280 template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
281 struct diff_go_helper_kokkos_view
282 {
283 static void do_get(const Teuchos::Ptr<const M> mat,
284 KV_S& nzvals,
285 KV_GO& indices,
286 KV_GS& pointers,
287 typename KV_GS::value_type& nnz,
288 const Teuchos::Ptr<
289 const Tpetra::Map<typename M::local_ordinal_t,
290 typename M::global_ordinal_t,
291 typename M::node_t> > map,
292 EDistribution distribution,
293 EStorage_Ordering ordering)
294 {
295 typedef typename M::global_ordinal_t mat_go_t;
296 typedef typename M::global_size_t mat_gs_t;
297 typedef typename Kokkos::View<mat_go_t*, Kokkos::HostSpace> KV_TMP;
298 size_t i, size = indices.extent(0);
299 KV_TMP indices_tmp(Kokkos::ViewAllocateWithoutInitializing("indices_tmp"), size);
300
301 typedef typename KV_GO::value_type view_go_t;
302 typedef typename KV_GS::value_type view_gs_t;
303 std::conditional_t<std::is_same_v<view_gs_t,mat_gs_t>,
304 same_gs_helper_kokkos_view<M, KV_S, KV_TMP, KV_GS, Op>,
305 diff_gs_helper_kokkos_view<M, KV_S, KV_TMP, KV_GS, Op> >::do_get(mat, nzvals, indices_tmp,
306 pointers, nnz, map,
307 distribution, ordering);
308 auto host_indices = Kokkos::create_mirror_view(indices);
309 for (i = 0; i < size; ++i){
310 host_indices(i) = Teuchos::as<view_go_t>(indices_tmp(i));
311 }
312 Kokkos::deep_copy(indices, host_indices);
313 }
314 };
315
316 template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
317 struct same_scalar_helper_kokkos_view
318 {
319 static void do_get(const Teuchos::Ptr<const M> mat,
320 KV_S& nzvals,
321 KV_GO& indices,
322 KV_GS& pointers,
323 typename KV_GS::value_type& nnz,
324 const Teuchos::Ptr<
325 const Tpetra::Map<typename M::local_ordinal_t,
326 typename M::global_ordinal_t,
327 typename M::node_t> > map,
328 EDistribution distribution,
329 EStorage_Ordering ordering)
330 {
331 typedef typename M::global_ordinal_t mat_go_t;
332 typedef typename KV_GO::value_type view_go_t;
333 std::conditional_t<std::is_same_v<view_go_t, mat_go_t>,
334 same_go_helper_kokkos_view<M, KV_S, KV_GO, KV_GS, Op>,
335 diff_go_helper_kokkos_view<M, KV_S, KV_GO, KV_GS, Op> >::do_get(mat, nzvals, indices,
336 pointers, nnz, map,
337 distribution, ordering);
338 }
339 };
340
341 template<class M, typename KV_S, typename KV_GO, typename KV_GS, class Op>
342 struct diff_scalar_helper_kokkos_view
343 {
344 static void do_get(const Teuchos::Ptr<const M> mat,
345 KV_S& nzvals,
346 KV_GO& indices,
347 KV_GS& pointers,
348 typename KV_GS::value_type& nnz,
349 const Teuchos::Ptr<
350 const Tpetra::Map<typename M::local_ordinal_t,
351 typename M::global_ordinal_t,
352 typename M::node_t> > map,
353 EDistribution distribution,
354 EStorage_Ordering ordering)
355 {
356 typedef typename M::global_ordinal_t mat_go_t;
357 typedef typename KokkosKernels::ArithTraits<typename M::scalar_t>::val_type mat_scalar_t;
358 typedef typename Kokkos::View<mat_scalar_t*, Kokkos::HostSpace> KV_TMP;
359 size_t i, size = nzvals.extent(0);
360 KV_TMP nzvals_tmp(Kokkos::ViewAllocateWithoutInitializing("nzvals_tmp"), size);
361
362 typedef typename KV_S::value_type view_scalar_t;
363 typedef typename KV_GO::value_type view_go_t;
364 std::conditional_t<std::is_same_v<view_go_t, mat_go_t>,
365 same_go_helper_kokkos_view<M, KV_TMP, KV_GO, KV_GS, Op>,
366 diff_go_helper_kokkos_view<M, KV_TMP, KV_GO, KV_GS, Op> >::do_get(mat, nzvals_tmp, indices,
367 pointers, nnz, map,
368 distribution, ordering);
369
370 auto host_nzvals = Kokkos::create_mirror_view(nzvals);
371 for (i = 0; i < size; ++i){
372 host_nzvals(i) = Teuchos::as<view_scalar_t>(nzvals_tmp(i));
373 }
374 Kokkos::deep_copy(nzvals, host_nzvals);
375 }
376 };
377
378
379 template<class Matrix, typename KV_S, typename KV_GO, typename KV_GS, class Op>
380 struct get_cxs_helper_kokkos_view
381 {
382 static void do_get(const Teuchos::Ptr<const Matrix> mat,
383 KV_S& nzvals,
384 KV_GO& indices,
385 KV_GS& pointers,
386 typename KV_GS::value_type& nnz,
387 EDistribution distribution,
388 EStorage_Ordering ordering=ARBITRARY,
389 typename KV_GO::value_type indexBase = 0)
390 {
391 typedef typename Matrix::local_ordinal_t lo_t;
392 typedef typename Matrix::global_ordinal_t go_t;
393 typedef typename Matrix::global_size_t gs_t;
394 typedef typename Matrix::node_t node_t;
395
396 const Teuchos::RCP<const Tpetra::Map<lo_t,go_t,node_t> > map
397 = getDistributionMap<lo_t,go_t,gs_t,node_t>(distribution,
398 Op::get_dimension(mat),
399 mat->getComm(),
400 indexBase,
401 Op::getMapFromMatrix(mat) //getMap must be the map returned, NOT rowmap or colmap
402 );
403 do_get(mat, nzvals, indices, pointers, nnz, Teuchos::ptrInArg(*map), distribution, ordering);
404 }
405
410 static void do_get(const Teuchos::Ptr<const Matrix> mat,
411 KV_S& nzvals,
412 KV_GO& indices,
413 KV_GS& pointers,
414 typename KV_GS::value_type& nnz,
415 EDistribution distribution, // Does this one need a distribution argument??
416 EStorage_Ordering ordering=ARBITRARY)
417 {
418 const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
419 typename Matrix::global_ordinal_t,
420 typename Matrix::node_t> > map
421 = Op::getMap(mat);
422 do_get(mat, nzvals, indices, pointers, nnz, Teuchos::ptrInArg(*map), distribution, ordering);
423 }
424
429 static void do_get(const Teuchos::Ptr<const Matrix> mat,
430 KV_S& nzvals,
431 KV_GO& indices,
432 KV_GS& pointers,
433 typename KV_GS::value_type& nnz,
434 const Teuchos::Ptr<
435 const Tpetra::Map<typename Matrix::local_ordinal_t,
436 typename Matrix::global_ordinal_t,
437 typename Matrix::node_t> > map,
438 EDistribution distribution,
439 EStorage_Ordering ordering=ARBITRARY)
440 {
441 typedef typename Matrix::scalar_t mat_scalar;
442 typedef typename KV_S::value_type view_scalar_t;
443
444 std::conditional_t<std::is_same_v<mat_scalar,view_scalar_t>,
445 same_scalar_helper_kokkos_view<Matrix,KV_S,KV_GO,KV_GS,Op>,
446 diff_scalar_helper_kokkos_view<Matrix,KV_S,KV_GO,KV_GS,Op> >::do_get(mat,
447 nzvals, indices,
448 pointers, nnz,
449 map,
450 distribution, ordering);
451 }
452 };
453
454#ifndef DOXYGEN_SHOULD_SKIP_THIS
455 /*
456 * These two function-like classes are meant to be used as the \c
457 * Op template parameter for the \c get_cxs_helper template class.
458 */
459 template<class Matrix>
460 struct get_ccs_func
461 {
462 template<typename KV_S, typename KV_GO, typename KV_GS>
463 static void apply_kokkos_view(const Teuchos::Ptr<const Matrix> mat,
464 KV_S& nzvals,
465 KV_GO& rowind,
466 KV_GS& colptr,
467 typename Matrix::global_size_t& nnz,
468 const Teuchos::Ptr<
469 const Tpetra::Map<typename Matrix::local_ordinal_t,
470 typename Matrix::global_ordinal_t,
471 typename Matrix::node_t> > map,
472 EDistribution distribution,
473 EStorage_Ordering ordering)
474 {
475 mat->getCcs_kokkos_view(nzvals, rowind, colptr, nnz, map, ordering, distribution);
476 }
477
478 static
479 const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
480 typename Matrix::global_ordinal_t,
481 typename Matrix::node_t> >
482 getMapFromMatrix(const Teuchos::Ptr<const Matrix> mat)
483 {
484 return mat->getMap(); // returns Teuchos::null if mat is Epetra_CrsMatrix
485 }
486
487 static
488 const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
489 typename Matrix::global_ordinal_t,
490 typename Matrix::node_t> >
491 getMap(const Teuchos::Ptr<const Matrix> mat)
492 {
493 return mat->getColMap();
494 }
495
496 static
497 typename Matrix::global_size_t
498 get_dimension(const Teuchos::Ptr<const Matrix> mat)
499 {
500 return mat->getGlobalNumCols();
501 }
502 };
503
504 template<class Matrix>
505 struct get_crs_func
506 {
507 template<typename KV_S, typename KV_GO, typename KV_GS>
508 static void apply_kokkos_view(const Teuchos::Ptr<const Matrix> mat,
509 KV_S& nzvals,
510 KV_GO& colind,
511 KV_GS& rowptr,
512 typename Matrix::global_size_t& nnz,
513 const Teuchos::Ptr<
514 const Tpetra::Map<typename Matrix::local_ordinal_t,
515 typename Matrix::global_ordinal_t,
516 typename Matrix::node_t> > map,
517 EDistribution distribution,
518 EStorage_Ordering ordering)
519 {
520 mat->getCrs_kokkos_view(nzvals, colind, rowptr, nnz, map, ordering, distribution);
521 }
522
523 static
524 const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
525 typename Matrix::global_ordinal_t,
526 typename Matrix::node_t> >
527 getMapFromMatrix(const Teuchos::Ptr<const Matrix> mat)
528 {
529 return mat->getMap(); // returns Teuchos::null if mat is Epetra_CrsMatrix
530 }
531
532 static
533 const Teuchos::RCP<const Tpetra::Map<typename Matrix::local_ordinal_t,
534 typename Matrix::global_ordinal_t,
535 typename Matrix::node_t> >
536 getMap(const Teuchos::Ptr<const Matrix> mat)
537 {
538 return mat->getRowMap();
539 }
540
541 static
542 typename Matrix::global_size_t
543 get_dimension(const Teuchos::Ptr<const Matrix> mat)
544 {
545 return mat->getGlobalNumRows();
546 }
547 };
548#endif // DOXYGEN_SHOULD_SKIP_THIS
549
587 template<class Matrix, typename KV_S, typename KV_GO, typename KV_GS>
588 struct get_ccs_helper_kokkos_view : get_cxs_helper_kokkos_view<Matrix,KV_S,KV_GO,KV_GS,get_ccs_func<Matrix> >
589 {};
590
598 template<class Matrix, typename KV_S, typename KV_GO, typename KV_GS>
599 struct get_crs_helper_kokkos_view : get_cxs_helper_kokkos_view<Matrix,KV_S,KV_GO,KV_GS,get_crs_func<Matrix> >
600 {};
601 /* End Matrix/MultiVector Utilities */
602
603
605 // Definitions //
607
608
609 template <typename LO, typename GO, typename GS, typename Node>
610 const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
611 getGatherMap( const Teuchos::RCP< const Tpetra::Map<LO,GO,Node> > &map )
612 {
613 //RCP<Teuchos::FancyOStream> fos = Teuchos::fancyOStream( Teuchos::null ); // may need to pass an osstream to computeGatherMap for debugging cases...
614 Teuchos::RCP< const Tpetra::Map<LO,GO,Node> > gather_map = Tpetra::Details::computeGatherMap(map, Teuchos::null);
615 return gather_map;
616 }
617
618
619 template <typename LO, typename GO, typename GS, typename Node>
620 const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
621 getDistributionMap(EDistribution distribution,
622 GS num_global_elements,
623 const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
624 GO indexBase,
625 const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >& map)
626 {
627 // TODO: Need to add indexBase to cases other than ROOTED
628 // We do not support these maps in any solver now.
629 switch( distribution ){
630 case DISTRIBUTED:
632 return Tpetra::createUniformContigMapWithNode<LO,GO, Node>(num_global_elements, comm);
634 return Tpetra::createLocalMapWithNode<LO,GO, Node>(num_global_elements, comm);
635 case ROOTED:
636 {
637 int rank = Teuchos::rank(*comm);
638 size_t my_num_elems = Teuchos::OrdinalTraits<size_t>::zero();
639 if( rank == 0 ) { my_num_elems = num_global_elements; }
640
641 return rcp(new Tpetra::Map<LO,GO, Node>(num_global_elements,
642 my_num_elems, indexBase, comm));
643 }
645 {
646 const Teuchos::RCP<const Tpetra::Map<LO,GO,Node> > gathermap
647 = getGatherMap<LO,GO,GS,Node>( map ); //getMap must be the map returned, NOT rowmap or colmap
648 return gathermap;
649 }
650 default:
651 TEUCHOS_TEST_FOR_EXCEPTION( true,
652 std::logic_error,
653 "Control should never reach this point. "
654 "Please contact the Amesos2 developers." );
655 }
656 }
657
658 template <typename Scalar,
659 typename GlobalOrdinal,
660 typename GlobalSizeT>
661 void transpose(Teuchos::ArrayView<Scalar> vals,
662 Teuchos::ArrayView<GlobalOrdinal> indices,
663 Teuchos::ArrayView<GlobalSizeT> ptr,
664 Teuchos::ArrayView<Scalar> trans_vals,
665 Teuchos::ArrayView<GlobalOrdinal> trans_indices,
666 Teuchos::ArrayView<GlobalSizeT> trans_ptr)
667 {
668 /* We have a compressed-row storage format of this matrix. We
669 * transform this into a compressed-column format using a
670 * distribution-counting sort algorithm, which is described by
671 * D.E. Knuth in TAOCP Vol 3, 2nd ed pg 78.
672 */
673
674#ifdef HAVE_AMESOS2_DEBUG
675 typename Teuchos::ArrayView<GlobalOrdinal>::iterator ind_it, ind_begin, ind_end;
676 ind_begin = indices.begin();
677 ind_end = indices.end();
678 size_t min_trans_ptr_size = *std::max_element(ind_begin, ind_end) + 1;
679 TEUCHOS_TEST_FOR_EXCEPTION( Teuchos::as<size_t>(trans_ptr.size()) < min_trans_ptr_size,
680 std::invalid_argument,
681 "Transpose pointer size not large enough." );
682 TEUCHOS_TEST_FOR_EXCEPTION( trans_vals.size() < vals.size(),
683 std::invalid_argument,
684 "Transpose values array not large enough." );
685 TEUCHOS_TEST_FOR_EXCEPTION( trans_indices.size() < indices.size(),
686 std::invalid_argument,
687 "Transpose indices array not large enough." );
688#else
689 typename Teuchos::ArrayView<GlobalOrdinal>::iterator ind_it, ind_end;
690#endif
691 // Count the number of entries in each column
692 Teuchos::Array<GlobalSizeT> count(trans_ptr.size(), 0);
693 ind_end = indices.end();
694 for( ind_it = indices.begin(); ind_it != ind_end; ++ind_it ){
695 ++(count[(*ind_it) + 1]);
696 }
697 // Accumulate
698 typename Teuchos::Array<GlobalSizeT>::iterator cnt_it, cnt_end;
699 cnt_end = count.end();
700 for( cnt_it = count.begin() + 1; cnt_it != cnt_end; ++cnt_it ){
701 *cnt_it = *cnt_it + *(cnt_it - 1);
702 }
703 // This becomes the array of column pointers
704 trans_ptr.assign(count);
705
706 /* Move the nonzero values into their final place in nzval, based on the
707 * counts found previously.
708 *
709 * This sequence deviates from Knuth's algorithm a bit, following more
710 * closely the description presented in Gustavson, Fred G. "Two Fast
711 * Algorithms for Sparse Matrices: Multiplication and Permuted
712 * Transposition" ACM Trans. Math. Softw. volume 4, number 3, 1978, pages
713 * 250--269, http://doi.acm.org/10.1145/355791.355796.
714 *
715 * The output indices end up in sorted order
716 */
717
718 GlobalSizeT size = ptr.size();
719 for( GlobalSizeT i = 0; i < size - 1; ++i ){
720 GlobalOrdinal u = ptr[i];
721 GlobalOrdinal v = ptr[i + 1];
722 for( GlobalOrdinal j = u; j < v; ++j ){
723 GlobalOrdinal k = count[indices[j]];
724 trans_vals[k] = vals[j];
725 trans_indices[k] = i;
726 ++(count[indices[j]]);
727 }
728 }
729 }
730
731
732 template <typename Scalar1, typename Scalar2>
733 void
734 scale(Teuchos::ArrayView<Scalar1> vals, size_t l,
735 size_t ld, Teuchos::ArrayView<Scalar2> s)
736 {
737 size_t vals_size = vals.size();
738#ifdef HAVE_AMESOS2_DEBUG
739 size_t s_size = s.size();
740 TEUCHOS_TEST_FOR_EXCEPTION( s_size < l,
741 std::invalid_argument,
742 "Scale vector must have length at least that of the vector" );
743#endif
744 size_t i, s_i;
745 for( i = 0, s_i = 0; i < vals_size; ++i, ++s_i ){
746 if( s_i == l ){
747 // bring i to the next multiple of ld
748 i += ld - s_i;
749 s_i = 0;
750 }
751 vals[i] *= s[s_i];
752 }
753 }
754
755 template <typename Scalar1, typename Scalar2, class BinaryOp>
756 void
757 scale(Teuchos::ArrayView<Scalar1> vals, size_t l,
758 size_t ld, Teuchos::ArrayView<Scalar2> s,
759 BinaryOp binary_op)
760 {
761 size_t vals_size = vals.size();
762#ifdef HAVE_AMESOS2_DEBUG
763 size_t s_size = s.size();
764 TEUCHOS_TEST_FOR_EXCEPTION( s_size < l,
765 std::invalid_argument,
766 "Scale vector must have length at least that of the vector" );
767#endif
768 size_t i, s_i;
769 for( i = 0, s_i = 0; i < vals_size; ++i, ++s_i ){
770 if( s_i == l ){
771 // bring i to the next multiple of ld
772 i += ld - s_i;
773 s_i = 0;
774 }
775 vals[i] = binary_op(vals[i], s[s_i]);
776 }
777 }
778
779 template<class row_ptr_view_t, class cols_view_t, class per_view_t>
780 void
781 reorder(row_ptr_view_t & row_ptr, cols_view_t & cols,
782 per_view_t & perm, per_view_t & peri, size_t & nnz,
783 bool permute_matrix)
784 {
785 #ifndef HAVE_AMESOS2_METIS
786 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error,
787 "Cannot reorder for cuSolver because no METIS is available.");
788 #else
789 typedef typename cols_view_t::value_type ordinal_type;
790 typedef typename row_ptr_view_t::value_type size_type;
791
792 // begin on host where we'll run metis reorder
793 auto host_row_ptr = Kokkos::create_mirror_view(row_ptr);
794 auto host_cols = Kokkos::create_mirror_view(cols);
795 Kokkos::deep_copy(host_row_ptr, row_ptr);
796 Kokkos::deep_copy(host_cols, cols);
797
798 // strip out the diagonals - metis will just crash with them included.
799 // make space for the stripped version
800 typedef Kokkos::View<idx_t*, Kokkos::HostSpace> host_metis_array;
801 const ordinal_type size = row_ptr.size() - 1;
802 size_type max_nnz = host_row_ptr(size);
803 host_metis_array host_strip_diag_row_ptr(
804 Kokkos::ViewAllocateWithoutInitializing("host_strip_diag_row_ptr"),
805 size+1);
806 host_metis_array host_strip_diag_cols(
807 Kokkos::ViewAllocateWithoutInitializing("host_strip_diag_cols"),
808 max_nnz);
809
810 size_type new_nnz = 0;
811 for(ordinal_type i = 0; i < size; ++i) {
812 host_strip_diag_row_ptr(i) = new_nnz;
813 for(size_type j = host_row_ptr(i); j < host_row_ptr(i+1); ++j) {
814 if (i != host_cols(j)) {
815 host_strip_diag_cols(new_nnz++) = host_cols(j);
816 }
817 }
818 }
819 host_strip_diag_row_ptr(size) = new_nnz;
820
821 // we'll get original permutations on host
822 host_metis_array host_perm(
823 Kokkos::ViewAllocateWithoutInitializing("host_perm"), size);
824 host_metis_array host_peri(
825 Kokkos::ViewAllocateWithoutInitializing("host_peri"), size);
826
827 // If we want to remove metis.h included in this header we can move this
828 // to the cpp, but we need to decide how to handle the idx_t declaration.
829 idx_t metis_size = size;
830 int err = METIS_NodeND(&metis_size, host_strip_diag_row_ptr.data(), host_strip_diag_cols.data(),
831 NULL, NULL, host_perm.data(), host_peri.data());
832
833 TEUCHOS_TEST_FOR_EXCEPTION(err != METIS_OK, std::runtime_error,
834 "METIS_NodeND failed to sort matrix.");
835
836 // put the permutations on our saved device ptrs
837 // these will be used to permute x and b when we solve
838 typedef typename cols_view_t::execution_space exec_space_t;
839 auto device_perm = Kokkos::create_mirror_view(exec_space_t(), host_perm);
840 auto device_peri = Kokkos::create_mirror_view(exec_space_t(), host_peri);
841 deep_copy(device_perm, host_perm);
842 deep_copy(device_peri, host_peri);
843
844 // also set the permutation which may need to convert the type from
845 // metis to the native ordinal_type
846 deep_copy_or_assign_view(perm, device_perm);
847 deep_copy_or_assign_view(peri, device_peri);
848
849 if (permute_matrix) {
850 // we'll permute matrix on device to a new set of arrays
851 row_ptr_view_t new_row_ptr(
852 Kokkos::ViewAllocateWithoutInitializing("new_row_ptr"), row_ptr.size());
853 cols_view_t new_cols(
854 Kokkos::ViewAllocateWithoutInitializing("new_cols"), cols.size() - new_nnz/2);
855
856 // permute row indices
857 Kokkos::RangePolicy<exec_space_t> policy_row(0, row_ptr.size());
858 Kokkos::parallel_scan(policy_row, KOKKOS_LAMBDA(
859 ordinal_type i, size_type & update, const bool &final) {
860 if(final) {
861 new_row_ptr(i) = update;
862 }
863 if(i < size) {
864 ordinal_type count = 0;
865 const ordinal_type row = device_perm(i);
866 for(ordinal_type k = row_ptr(row); k < row_ptr(row + 1); ++k) {
867 const ordinal_type j = device_peri(cols(k));
868 count += (i >= j);
869 }
870 update += count;
871 }
872 });
873
874 // permute col indices
875 Kokkos::RangePolicy<exec_space_t> policy_col(0, size);
876 Kokkos::parallel_for(policy_col, KOKKOS_LAMBDA(ordinal_type i) {
877 const ordinal_type kbeg = new_row_ptr(i);
878 const ordinal_type row = device_perm(i);
879 const ordinal_type col_beg = row_ptr(row);
880 const ordinal_type col_end = row_ptr(row + 1);
881 const ordinal_type nk = col_end - col_beg;
882 for(ordinal_type k = 0, t = 0; k < nk; ++k) {
883 const ordinal_type tk = kbeg + t;
884 const ordinal_type sk = col_beg + k;
885 const ordinal_type j = device_peri(cols(sk));
886 if(i >= j) {
887 new_cols(tk) = j;
888 ++t;
889 }
890 }
891 });
892
893 // finally set the inputs to the new sorted arrays
894 row_ptr = new_row_ptr;
895 cols = new_cols;
896 }
897
898 nnz = new_nnz;
899 #endif // HAVE_AMESOS2_METIS
900 }
901
902 template<class values_view_t, class row_ptr_view_t,
903 class cols_view_t, class per_view_t>
904 void
905 reorder_values(values_view_t & values, const row_ptr_view_t & orig_row_ptr,
906 const row_ptr_view_t & new_row_ptr,
907 const cols_view_t & orig_cols, const per_view_t & perm, const per_view_t & peri,
908 size_t nnz)
909 {
910 typedef typename cols_view_t::value_type ordinal_type;
911 typedef typename cols_view_t::execution_space exec_space_t;
912
913 auto device_perm = Kokkos::create_mirror_view(exec_space_t(), perm);
914 auto device_peri = Kokkos::create_mirror_view(exec_space_t(), peri);
915 deep_copy(device_perm, perm);
916 deep_copy(device_peri, peri);
917
918 const ordinal_type size = orig_row_ptr.size() - 1;
919
920 auto host_orig_row_ptr = Kokkos::create_mirror_view(orig_row_ptr);
921 auto new_nnz = host_orig_row_ptr(size); // TODO: Maybe optimize this by caching
922
923 values_view_t new_values(
924 Kokkos::ViewAllocateWithoutInitializing("new_values"), values.size() - new_nnz/2);
925
926 // permute col indices
927 Kokkos::RangePolicy<exec_space_t> policy_col(0, size);
928 Kokkos::parallel_for(policy_col, KOKKOS_LAMBDA(ordinal_type i) {
929 const ordinal_type kbeg = new_row_ptr(i);
930 const ordinal_type row = device_perm(i);
931 const ordinal_type col_beg = orig_row_ptr(row);
932 const ordinal_type col_end = orig_row_ptr(row + 1);
933 const ordinal_type nk = col_end - col_beg;
934 for(ordinal_type k = 0, t = 0; k < nk; ++k) {
935 const ordinal_type tk = kbeg + t;
936 const ordinal_type sk = col_beg + k;
937 const ordinal_type j = device_peri(orig_cols(sk));
938 if(i >= j) {
939 new_values(tk) = values(sk);
940 ++t;
941 }
942 }
943 });
944
945 values = new_values;
946 }
947
948 template<class array_view_t, class per_view_t>
949 void
950 apply_reorder_permutation(const array_view_t & array,
951 array_view_t & permuted_array, const per_view_t & permutation) {
952 if(permuted_array.extent(0) != array.extent(0) || permuted_array.extent(1) != array.extent(1)) {
953 permuted_array = array_view_t(
954 Kokkos::ViewAllocateWithoutInitializing("permuted_array"),
955 array.extent(0), array.extent(1));
956 }
957 typedef typename array_view_t::execution_space exec_space_t;
958 Kokkos::RangePolicy<exec_space_t> policy(0, array.extent(0));
959 Kokkos::parallel_for(policy, KOKKOS_LAMBDA(size_t i) {
960 for(size_t j = 0; j < array.extent(1); ++j) {
961 permuted_array(i, j) = array(permutation(i), j);
962 }
963 });
964 }
965
966
967 // used to read matrix with gapped GIDs for test/example
968 template <typename GO, typename Scalar>
969 bool
970 readEntryFromFile (GO& gblRowInd, GO& gblColInd, Scalar& val, const std::string& s)
971 {
972 if (s.size () == 0 || s.find ("%") != std::string::npos) {
973 return false; // empty line or comment line
974 }
975 std::istringstream in (s);
976
977 if (! in) {
978 return false;
979 }
980 in >> gblRowInd;
981 if (! in) {
982 return false;
983 }
984 in >> gblColInd;
985 if (! in) {
986 return false;
987 }
988 in >> val;
989 return true;
990 }
991
992 template<class map_type, class MAT>
993 Teuchos::RCP<MAT>
994 readCrsMatrixFromFile (const std::string& matrixFilename,
995 Teuchos::RCP<Teuchos::FancyOStream> & fos,
996 const Teuchos::RCP<const map_type>& rowMap,
997 const Teuchos::RCP<const map_type>& domainMap,
998 const Teuchos::RCP<const map_type>& rangeMap,
999 const bool convert_to_zero_base,
1000 const int header_size)
1001 {
1002 using Scalar = typename MAT::scalar_type;
1003 using GO = typename MAT::global_ordinal_type;
1004
1005 using counter_type = std::map<GO, size_t>;
1006 using pair_type = std::pair<const GO, size_t>;
1007 using Teuchos::RCP;
1008
1009 auto comm = rowMap->getComm ();
1010 const int myRank = comm->getRank ();
1011
1012 std::ifstream inFile;
1013 int opened = 0;
1014 if (myRank == 0)
1015 {
1016 try {
1017 inFile.open (matrixFilename);
1018 if (inFile) {
1019 opened = 1;
1020 }
1021 }
1022 catch (...) {
1023 opened = 0;
1024 }
1025 }
1026 Teuchos::broadcast<int, int> (*comm, 0, Teuchos::outArg (opened));
1027 TEUCHOS_TEST_FOR_EXCEPTION
1028 (opened == 0, std::runtime_error, "readCrsMatrixFromFile: "
1029 "Failed to open file \"" << matrixFilename << "\" on Process 0.");
1030
1031 RCP<MAT> A;
1032 if (myRank == 0)
1033 {
1034 std::string line;
1035
1036 // Skip the first N lines. This is a hack, specific to the input file in question.
1037 //*fos << " Reading matrix market file. Skip " << header_size << " header lines" << std::endl;
1038 for ( int i = 0; i < header_size; ++i ) {
1039 std::getline (inFile, line);
1040 }
1041
1042 counter_type counts;
1043 Teuchos::Array<Scalar> vals;
1044 Teuchos::Array<GO> gblRowInds;
1045 Teuchos::Array<GO> gblColInds;
1046 while (inFile) {
1047 std::getline (inFile, line);
1048 GO gblRowInd {};
1049 GO gblColInd {};
1050 Scalar val {};
1051 const bool gotLine = readEntryFromFile (gblRowInd, gblColInd, val, line);
1052 if (gotLine) {
1053 //*fos << " read mtx rank: " << myRank << " | gblRowInd = " << gblRowInd << " gblColInd = " << gblColInd << std::endl;
1054 if ( convert_to_zero_base ) {
1055 gblRowInd -= 1 ;
1056 gblColInd -= 1 ;
1057 }
1058 counts[gblRowInd]++;
1059 vals.push_back(val);
1060 gblRowInds.push_back(gblRowInd);
1061 gblColInds.push_back(gblColInd);
1062 }
1063 }
1064
1065 // Max number of entries in any row
1066 auto pr = std::max_element(
1067 std::begin(counts),
1068 std::end(counts),
1069 [] (pair_type const& p1, pair_type const& p2){ return p1.second < p2.second; }
1070 );
1071 size_t maxCount = (counts.empty()) ? size_t(0) : pr->second;
1072 A = Teuchos::rcp(new MAT(rowMap, maxCount));
1073 for (typename Teuchos::Array<GO>::size_type i=0; i<gblRowInds.size(); i++) {
1074 A->insertGlobalValues (gblRowInds[i], gblColInds(i,1), vals(i,1));
1075 }
1076 } else {
1077 A = Teuchos::rcp(new MAT(rowMap, 0));
1078 }
1079
1080 A->fillComplete (domainMap, rangeMap);
1081 return A;
1082 }
1085 } // end namespace Util
1086
1087} // end namespace Amesos2
1088
1089#endif // #ifndef AMESOS2_UTIL_HPP
Copy or assign views based on memory spaces.
Provides some simple meta-programming utilities for Amesos2.
Enum and other types declarations for Amesos2.
EDistribution
Definition Amesos2_TypeDecl.hpp:89
@ DISTRIBUTED
Definition Amesos2_TypeDecl.hpp:90
@ GLOBALLY_REPLICATED
Definition Amesos2_TypeDecl.hpp:92
@ DISTRIBUTED_NO_OVERLAP
Definition Amesos2_TypeDecl.hpp:91
@ ROOTED
Definition Amesos2_TypeDecl.hpp:93
@ CONTIGUOUS_AND_ROOTED
Definition Amesos2_TypeDecl.hpp:94
EStorage_Ordering
Definition Amesos2_TypeDecl.hpp:107
void printLine(Teuchos::FancyOStream &out)
Prints a line of 70 "-"s on std::cout.
Definition Amesos2_Util.cpp:20
void scale(ArrayView< Scalar1 > vals, size_t l, size_t ld, ArrayView< Scalar2 > s)
Scales a 1-D representation of a multivector.
void transpose(ArrayView< Scalar > vals, ArrayView< GlobalOrdinal > indices, ArrayView< GlobalSizeT > ptr, ArrayView< Scalar > trans_vals, ArrayView< GlobalOrdinal > trans_indices, ArrayView< GlobalSizeT > trans_ptr)
const Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > getGatherMap(const Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > &map)
Gets a Tpetra::Map described by the EDistribution.
Definition Amesos2_Util.hpp:611
const int size
Definition klu2_simple.cpp:50
A generic helper class for getting a CCS representation of a Matrix.
Definition Amesos2_Util.hpp:589
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition Amesos2_Util.hpp:600
A generic base class for the CRS and CCS helpers.
Definition Amesos2_Util.hpp:198