Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_TpetraCrsMatrix_MatrixAdapter_def.hpp
1// @HEADER
2// *****************************************************************************
3// Amesos2: Templated Direct Sparse Solver Package
4//
5// Copyright 2011 NTESS and the Amesos2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10
11#ifndef AMESOS2_TPETRACRSMATRIX_MATRIXADAPTER_DEF_HPP
12#define AMESOS2_TPETRACRSMATRIX_MATRIXADAPTER_DEF_HPP
13
15#include "Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp"
16#include "Amesos2_MatrixAdapter_def.hpp"
17
18namespace Amesos2 {
19
20 template <typename Scalar,
21 typename LocalOrdinal,
22 typename GlobalOrdinal,
23 typename Node>
24 ConcreteMatrixAdapter<
25 Tpetra::CrsMatrix<Scalar,
26 LocalOrdinal,
27 GlobalOrdinal,
28 Node>
29 >::ConcreteMatrixAdapter(Teuchos::RCP<matrix_t> m)
30 : AbstractConcreteMatrixAdapter<Tpetra::RowMatrix<Scalar,
31 LocalOrdinal,
32 GlobalOrdinal,
33 Node>,
34 Tpetra::CrsMatrix<Scalar,
35 LocalOrdinal,
36 GlobalOrdinal,
37 Node> >(m) // with implicit cast
38 {}
39
40 template <typename Scalar,
41 typename LocalOrdinal,
42 typename GlobalOrdinal,
43 typename Node>
44 Teuchos::RCP<const MatrixAdapter<Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > >
45 ConcreteMatrixAdapter<
46 Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
47 >::get_impl(const Teuchos::Ptr<const map_t> map, EDistribution distribution) const
48 {
49 using Teuchos::RCP;
50 using Teuchos::rcp;
51 using Teuchos::rcpFromPtr;
52 typedef Tpetra::Import<local_ordinal_t, global_ordinal_t, node_t> import_t;
53
54 RCP<import_t> importer =
55 rcp (new import_t (this->getRowMap(), rcpFromPtr (map)));
56
57 RCP<matrix_t> t_mat;
58
59 t_mat = Tpetra::importAndFillCompleteCrsMatrix<matrix_t>( (this->mat_), *importer ); // DomainMap, RangeMap, params inputs default to Teuchos::null
60
61 // Case for non-contiguous GIDs
62 if ( distribution == CONTIGUOUS_AND_ROOTED ) {
63
64 auto myRank = map->getComm()->getRank();
65
66 auto local_matrix = t_mat->getLocalMatrixDevice();
67 const size_t global_num_contiguous_entries = t_mat->getGlobalNumRows();
68 const size_t local_num_contiguous_entries = (myRank == 0) ? t_mat->getGlobalNumRows() : 0;
69
70 //create maps
71 //typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type;
72 RCP<const map_t> contiguousRowMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) );
73 RCP<const map_t> contiguousColMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) );
74 RCP<const map_t> contiguousDomainMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) );
75 RCP<const map_t> contiguousRangeMap = rcp( new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) );
76
77 RCP<matrix_t> contiguous_t_mat = rcp( new matrix_t(contiguousRowMap, contiguousColMap, local_matrix) );
78 contiguous_t_mat->resumeFill();
79 contiguous_t_mat->expertStaticFillComplete(contiguousDomainMap, contiguousRangeMap);
80
81 return rcp (new ConcreteMatrixAdapter<matrix_t> (contiguous_t_mat));
82 } //end if not contiguous
83
84 return rcp (new ConcreteMatrixAdapter<matrix_t> (t_mat));
85 }
86
87
88
89 template <typename Scalar,
90 typename LocalOrdinal,
91 typename GlobalOrdinal,
92 typename Node>
93 Teuchos::RCP<const MatrixAdapter<Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > >
94 ConcreteMatrixAdapter<
95 Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
96 >::reindex_impl(Teuchos::RCP<const map_t> &contigRowMap,
97 Teuchos::RCP<const map_t> &contigColMap,
98 const EPhase current_phase) const
99 {
100 typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type;
101 using Teuchos::RCP;
102 using Teuchos::rcp;
103#ifdef HAVE_AMESOS2_TIMERS
104 auto reindexTimer = Teuchos::TimeMonitor::getNewTimer("Time to re-index matrix gids");
105 Teuchos::TimeMonitor ReindexTimer(*reindexTimer);
106#endif
107
108 auto rowMap = this->mat_->getRowMap();
109 auto colMap = this->mat_->getColMap();
110 auto rowComm = rowMap->getComm();
111 auto colComm = colMap->getComm();
112
113 GlobalOrdinal indexBase = rowMap->getIndexBase();
114 GlobalOrdinal numDoFs = this->mat_->getGlobalNumRows();
115 LocalOrdinal nRows = this->mat_->getLocalNumRows();
116 LocalOrdinal nCols = colMap->getLocalNumElements();
117
118 RCP<matrix_t> contiguous_t_mat;
119 // check when to recompute contigRowMap & contigColMap
120 if(current_phase == PREORDERING || current_phase == SYMBFACT) {
121 auto tmpMap = rcp (new contiguous_map_type (numDoFs, nRows, indexBase, rowComm));
122 global_ordinal_t frow = tmpMap->getMinGlobalIndex();
123
124 // Create new GID list for RowMap
125 Kokkos::View<global_ordinal_t*, HostExecSpaceType> rowIndexList ("rowIndexList", nRows);
126 for (local_ordinal_t k = 0; k < nRows; k++) {
127 rowIndexList(k) = frow+k; // based on index-base of rowMap
128 }
129 // Create new GID list for ColMap
130 Kokkos::View<global_ordinal_t*, HostExecSpaceType> colIndexList ("colIndexList", nCols);
131 // initialize to catch col GIDs that are not in row GIDs
132 // they will be all assigned to (n+1)th columns
133 for (local_ordinal_t k = 0; k < nCols; k++) {
134 colIndexList(k) = numDoFs+indexBase;
135 }
136 typedef Tpetra::MultiVector<global_ordinal_t,
137 local_ordinal_t,
138 global_ordinal_t,
139 node_t> gid_mv_t;
140 Teuchos::ArrayView<const global_ordinal_t> rowIndexArray(rowIndexList.data(), nRows);
141 Teuchos::ArrayView<const global_ordinal_t> colIndexArray(colIndexList.data(), nCols);
142 gid_mv_t row_mv (rowMap, rowIndexArray, nRows, 1);
143 gid_mv_t col_mv (colMap, colIndexArray, nCols, 1);
144 typedef Tpetra::Import<local_ordinal_t, global_ordinal_t, node_t> import_t;
145 RCP<import_t> importer = rcp (new import_t (rowMap, colMap));
146 col_mv.doImport (row_mv, *importer, Tpetra::INSERT);
147 {
148 // col_mv is imported from rowIndexList, which is based on index-base of rowMap
149 auto col_view = col_mv.getLocalViewHost(Tpetra::Access::ReadOnly);
150 for(int i=0; i<nCols; i++) colIndexList(i) = col_view(i,0);
151 }
152 // Create new Row & Col Maps (both based on indexBase of rowMap)
153 contigRowMap = rcp (new contiguous_map_type (numDoFs, rowIndexList.data(), nRows, indexBase, rowComm));
154 contigColMap = rcp (new contiguous_map_type (numDoFs, colIndexList.data(), nCols, indexBase, colComm));
155
156 // Create contiguous Matrix
157 auto lclMatrix = this->mat_->getLocalMatrixDevice();
158 contiguous_t_mat = rcp( new matrix_t(contigRowMap, contigColMap, lclMatrix));
159 } else {
160 // Build Matrix with contiguous Maps
161 auto lclMatrix = this->mat_->getLocalMatrixDevice();
162 auto importer = this->mat_->getCrsGraph()->getImporter();
163 auto exporter = this->mat_->getCrsGraph()->getExporter();
164 contiguous_t_mat = rcp( new matrix_t(lclMatrix, contigRowMap, contigColMap, contigRowMap, contigColMap, importer,exporter));
165 }
166
167 // Return new matrix adapter
168 return rcp (new ConcreteMatrixAdapter<matrix_t> (contiguous_t_mat));
169 }
170
171
172 template <typename Scalar,
173 typename LocalOrdinal,
174 typename GlobalOrdinal,
175 typename Node>
176 template<typename KV_S, typename KV_GO, typename KV_GS, typename host_ordinal_type_array, typename host_scalar_type_array>
177 LocalOrdinal
178 ConcreteMatrixAdapter<
179 Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
180 >::gather_impl(KV_S& nzvals, KV_GO& indices, KV_GS& pointers,
181 host_ordinal_type_array &perm_g2l,
182 host_ordinal_type_array &recvCountRows, host_ordinal_type_array &recvDisplRows,
183 host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls,
184 host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t,
185 bool column_major, EPhase current_phase) const
186 {
187 LocalOrdinal ret = -1;
188 {
189#ifdef HAVE_AMESOS2_TIMERS
190 Teuchos::RCP< Teuchos::Time > gatherTime = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather");
191 Teuchos::TimeMonitor GatherTimer(*gatherTime);
192#endif
193 auto rowMap = this->mat_->getRowMap();
194 auto colMap = this->mat_->getColMap();
195 auto comm = rowMap->getComm();
196 auto nRanks = comm->getSize();
197 auto myRank = comm->getRank();
198
199 global_ordinal_t nRows = this->mat_->getGlobalNumRows();
200 auto lclMatrix = this->mat_->getLocalMatrixDevice();
201
202 // check when to recompute communication patterns
203 if(current_phase == PREORDERING || current_phase == SYMBFACT) {
204 // grab rowptr and colind on host
205 auto lclRowptr_d = lclMatrix.graph.row_map;
206 auto lclColind_d = lclMatrix.graph.entries;
207 auto lclRowptr = Kokkos::create_mirror_view(lclRowptr_d);
208 auto lclColind = Kokkos::create_mirror_view(lclColind_d);
209 Kokkos::deep_copy(lclRowptr, lclRowptr_d);
210 Kokkos::deep_copy(lclColind, lclColind_d);
211
212 // index-bases
213 global_ordinal_t rowIndexBase = rowMap->getIndexBase();
214 global_ordinal_t colIndexBase = colMap->getIndexBase();
215 // map from global to local
216 host_ordinal_type_array perm_l2g; // map from GIDs
217 // true uses 'original' contiguous row inds 0:(n-1) (no need to perm sol or rhs),
218 // false uses GIDs given by map (need to swap sol & rhs, but use matrix perm, e.g., fill-reducing)
219 bool swap_cols = false; //true;
220 // workspace to transpose
221 KV_GS pointers_t;
222 KV_GO indices_t;
223 // communication counts & displacements
224 LocalOrdinal myNRows = this->mat_->getLocalNumRows();
225 Kokkos::resize(recvCounts, nRanks);
226 Kokkos::resize(recvDispls, nRanks+1);
227 bool need_to_perm = false;
228 {
229#ifdef HAVE_AMESOS2_TIMERS
230 Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(rowptr)");
231 Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
232#endif
233 Teuchos::gather<int, LocalOrdinal> (&myNRows, 1, recvCounts.data(), 1, 0, *comm);
234 if (myRank == 0) {
235 Kokkos::resize(recvDispls, nRanks+1);
236 recvDispls(0) = 0;
237 for (int p = 1; p <= nRanks; p++) {
238 recvDispls(p) = recvDispls(p-1) + recvCounts(p-1);
239 }
240 if (recvDispls(nRanks) != nRows) {
241 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Amesos2_TpetraCrsMatrix_MatrixAdapter::gather_impl : mismatch between gathered(local nrows) and global nrows.");
242 }
243 } else {
244 for (int p = 0; p < nRanks; p++) {
245 recvCounts(p) = 0;
246 recvDispls(p) = 0;
247 }
248 recvDispls(nRanks) = 0;
249 }
250 // gether g2l perm (convert to 0-base)
251 {
252 host_ordinal_type_array lclMap;
253 Kokkos::resize(lclMap, myNRows);
254 if (myRank == 0) {
255 Kokkos::resize(perm_g2l, nRows);
256 Kokkos::resize(perm_l2g, nRows);
257 } else {
258 Kokkos::resize(perm_g2l, 1);
259 }
260 for (int i=0; i < myNRows; i++) {
261 lclMap(i) = rowMap->getGlobalElement(i);
262 }
263 Teuchos::gatherv<int, LocalOrdinal> (lclMap.data(), myNRows, perm_g2l.data(),
264 recvCounts.data(), recvDispls.data(),
265 0, *comm);
266 if (myRank == 0) {
267 for (int i=0; i < nRows; i++) {
268 perm_g2l(i) -= rowIndexBase; // map to GIDs
269 perm_l2g(perm_g2l(i)) = i; // map from GIDs
270 if (i != perm_g2l(i)) need_to_perm = true;
271 }
272 }
273 }
274 // gether rowptr
275 // - making sure same ordinal type
276 KV_GS lclRowptr_ ("localRowptr_", lclRowptr.extent(0));
277 for (int i = 0; i < int(lclRowptr.extent(0)); i++) lclRowptr_(i) = lclRowptr(i);
278 if (myRank == 0 && (column_major || need_to_perm)) {
279 Kokkos::resize(pointers_t, nRows+1);
280 } else if (myRank != 0) {
281 Kokkos::resize(pointers_t, 2);
282 }
283 LocalOrdinal sendIdx = (myNRows > 0 ? 1 : 0); // To skip sending the first rowptr entry (note: 0, if local matrix is empty)
284 LocalOrdinal *pointers_ = ((myRank != 0 || (column_major || need_to_perm)) ? pointers_t.data() : pointers.data());
285 Teuchos::gatherv<int, LocalOrdinal> (&lclRowptr_(sendIdx), myNRows, &pointers_[1],
286 recvCounts.data(), recvDispls.data(),
287 0, *comm);
288
289 // save row counts/displs
290 Kokkos::resize(recvCountRows, nRanks);
291 Kokkos::resize(recvDisplRows, nRanks+1);
292 Kokkos::deep_copy(recvCountRows, recvCounts);
293 Kokkos::deep_copy(recvDisplRows, recvDispls);
294 if (myRank == 0) {
295 // shift to global pointers
296 pointers_[0] = 0;
297 recvCounts(0) = pointers_[recvDispls(1)];
298 LocalOrdinal displs = recvCounts(0);
299 for (int p = 1; p < nRanks; p++) {
300 // skip "Empty" submatrix (no rows)
301 // recvCounts(p) is zero, while pointers_[recvDispls(p+1)] now contains nnz from p-1
302 if (recvDispls(p+1) > recvDispls(p)) {
303 // save recvCounts from pth MPI
304 recvCounts(p) = pointers_[recvDispls(p+1)];
305 // shift pointers for pth MPI to global
306 for (int i = 1+recvDispls(p); i <= recvDispls(p+1); i++) {
307 pointers_[i] += displs;
308 }
309 displs += recvCounts(p);
310 }
311 }
312 ret = pointers_[nRows];
313 }
314 }
315 {
316#ifdef HAVE_AMESOS2_TIMERS
317 Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(colind)");
318 Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
319#endif
320 // gather colinds
321 if (myRank == 0) {
322 recvDispls(0) = 0;
323 for (int p = 0; p < nRanks; p++) {
324 recvDispls(p+1) = recvDispls(p) + recvCounts(p);
325 }
326 }
327 // -- convert to global colids & ** convert to base-zero **
328 KV_GO lclColind_ ("localColind_", lclColind.extent(0));
329 for (int i = 0; i < int(lclColind.extent(0)); i++) {
330 lclColind_(i) = (colMap->getGlobalElement((lclColind(i))) - colIndexBase);
331 }
332 if (column_major || need_to_perm) {
333 Kokkos::resize(indices_t, indices.extent(0));
334 Teuchos::gatherv<int, LocalOrdinal> (lclColind_.data(), lclColind_.extent(0), indices_t.data(),
335 recvCounts.data(), recvDispls.data(),
336 0, *comm);
337 } else {
338 Teuchos::gatherv<int, LocalOrdinal> (lclColind_.data(), lclColind_.extent(0), indices.data(),
339 recvCounts.data(), recvDispls.data(),
340 0, *comm);
341 }
342 }
343 if (myRank == 0) {
344#ifdef HAVE_AMESOS2_TIMERS
345 Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(transpose index)");
346 Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
347#endif
348 if (swap_cols && need_to_perm) {
349 // convert col GIDs to 0:(n-1)
350 for (int i=0; i<ret; i++) {
351 if (column_major || need_to_perm) {
352 indices_t(i) = perm_l2g(indices_t(i));
353 } else {
354 indices(i) = perm_l2g(indices(i));
355 }
356 }
357 }
358 // (note: column idexes are now in base-0)
359 if (column_major) {
360 // Map to transpose
361 Kokkos::resize(transpose_map, ret);
362 // Transopose to convert to CSC
363 for (int i=0; i<=nRows; i++) {
364 pointers(i) = 0;
365 }
366 for (int k=0; k<ret; k++) {
367 int col = indices_t(k);
368 if (col < nRows-1) {
369 pointers(col+2) ++;
370 }
371 }
372 for (int i=1; i < nRows; i++) {
373 pointers(i+1) += pointers(i);
374 }
375 // nonzeroes in each column are sorted in ascending row
376 for (int row=0; row<nRows; row++) {
377 // if swap cols, then just use the original 0:(n-1), otherwise GIDs
378 int i = (swap_cols ? row : perm_l2g(row));
379 for (int k=pointers_t(i); k<pointers_t(i+1); k++) {
380 int col = indices_t(k);
381 if (col < nRows) {
382 transpose_map(k) = pointers(1+col);
383 indices(pointers(1+col)) = row;
384 pointers(1+col) ++;
385 } else {
386 // extra columns
387 transpose_map(k) = -1;
388 }
389 }
390 }
391 } else if (need_to_perm) {
392 // Map to permute
393 Kokkos::resize(transpose_map, ret);
394 for (int i=0; i<nRows; i++) {
395 int row = perm_g2l(i);
396 pointers(row+2) = pointers_t(i+1)-pointers_t(i);
397 }
398 for (int i=1; i < nRows; i++) {
399 pointers(i+1) += pointers(i);
400 }
401 for (int i=0; i<nRows; i++) {
402 int row = perm_g2l(i);
403 for (int k=pointers_t(i); k<pointers_t(i+1); k++) {
404 int col = indices_t(k);
405 if (col < nRows) {
406 transpose_map(k) = pointers(1+row);
407 indices(pointers(1+row)) = col;
408 pointers(1+row) ++;
409 } else {
410 transpose_map(k) = -1;
411 }
412 }
413 }
414 } else {
415 Kokkos::resize(transpose_map, 0);
416 }
417 }
418 if (!need_to_perm || swap_cols) {
419 // no need to perm rhs or sol
420 Kokkos::resize(perm_g2l, 0);
421 }
422 }
423 //if(current_phase == NUMFACT) // Numerical values may be used in symbolic (e.g, MWM)
424 {
425 {
426#ifdef HAVE_AMESOS2_TIMERS
427 Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(nzvals)");
428 Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
429#endif
430 // grab numerical values on host
431 auto lclNzvals_d = lclMatrix.values;
432 auto lclNzvals = Kokkos::create_mirror_view(lclNzvals_d);;
433 Kokkos::deep_copy(lclNzvals, lclNzvals_d);
434
435 // gather nzvals
436 if (transpose_map.extent(0) > 0) {
437 Kokkos::resize(nzvals_t, nzvals.extent(0));
438 Teuchos::gatherv<int, Scalar> (reinterpret_cast<Scalar*> (lclNzvals.data()), lclNzvals.extent(0),
439 reinterpret_cast<Scalar*> (nzvals_t.data()), recvCounts.data(), recvDispls.data(),
440 0, *comm);
441 } else {
442 Teuchos::gatherv<int, Scalar> (reinterpret_cast<Scalar*> (lclNzvals.data()), lclNzvals.extent(0),
443 reinterpret_cast<Scalar*> (nzvals.data()), recvCounts.data(), recvDispls.data(),
444 0, *comm);
445 }
446 }
447 if (myRank == 0) {
448 // Insert Numerical values to transopose matrix
449 ret = pointers(nRows);
450#ifdef HAVE_AMESOS2_TIMERS
451 Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter ("Amesos2::gather(transpose values)");
452 Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
453#endif
454 if (transpose_map.extent(0) > 0) {
455 //for (int k=0; k<ret; k++) {
456 // if (transpose_map(k) >= 0) {
457 // nzvals(transpose_map(k)) = nzvals_t(k);
458 // }
459 //}
460 Kokkos::parallel_for("Amesos2::TpetraCrsMatrixAdapter::gather", Kokkos::RangePolicy<HostExecSpaceType>(0, ret),
461 KOKKOS_LAMBDA(const int k) { if (transpose_map(k) >= 0) nzvals(transpose_map(k)) = nzvals_t(k); });
462 }
463 }
464 }
465 // broadcast return value
466 Teuchos::broadcast<int, LocalOrdinal>(*comm, 0, 1, &ret);
467 }
468 return ret;
469 }
470
471
472 template <typename Scalar,
473 typename LocalOrdinal,
474 typename GlobalOrdinal,
475 typename Node>
476 void
477 ConcreteMatrixAdapter<
478 Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
479 >::describe (Teuchos::FancyOStream& os,
480 const Teuchos::EVerbosityLevel verbLevel) const
481 {
482 this->mat_->describe(os, verbLevel);
483 }
484} // end namespace Amesos2
485
486#endif // AMESOS2_TPETRACRSMATRIX_MATRIXADAPTER_DEF_HPP
Specialization of the ConcreteMatrixAdapter for Tpetra::CrsMatrix. Inherits all its functionality fro...