MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_AlgebraicPermutationStrategy_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10/*
11 * MueLu_AlgebraicPermutationStrategy_def.hpp
12 *
13 * Created on: Feb 20, 2013
14 * Author: tobias
15 */
16
17#ifndef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
18#define MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
19
20#include <queue>
21
22#include <Teuchos_ScalarTraits.hpp>
23
24#include <Xpetra_MultiVector.hpp>
25#include <Xpetra_Matrix.hpp>
26#include <Xpetra_CrsGraph.hpp>
27#include <Xpetra_Vector.hpp>
28#include <Xpetra_MultiVectorFactory.hpp>
29#include <Xpetra_VectorFactory.hpp>
30#include <Xpetra_CrsMatrixWrap.hpp>
31#include <Xpetra_Export.hpp>
32#include <Xpetra_ExportFactory.hpp>
33#include <Xpetra_Import.hpp>
34#include <Xpetra_ImportFactory.hpp>
35#include <Xpetra_MatrixMatrix.hpp>
36
37#include "MueLu_Utilities.hpp"
39
40namespace MueLu {
41
42template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
44 BuildPermutation(const Teuchos::RCP<Matrix>& A, const Teuchos::RCP<const Map>& permRowMap,
45 Level& currentLevel, const FactoryBase* genFactory) const {
46 const Scalar SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
47 const Scalar SC_ONE = Teuchos::ScalarTraits<Scalar>::one();
48
49 typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
50 const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
51 const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
52
53 const Teuchos::RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
54 int numProcs = comm->getSize();
55 int myRank = comm->getRank();
56
57 size_t nDofsPerNode = 1;
58 if (A->IsView("stridedMaps")) {
59 Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
60 nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
61 }
62
63 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates;
64 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries;
65 std::vector<MT> Weights;
66
67 // loop over all local rows in matrix A and keep diagonal entries if corresponding
68 // matrix rows are not contained in permRowMap
69 for (size_t row = 0; row < A->getRowMap()->getLocalNumElements(); row++) {
70 GlobalOrdinal grow = A->getRowMap()->getGlobalElement(row);
71
72 if (permRowMap->isNodeGlobalElement(grow) == true) continue;
73
74 size_t nnz = A->getNumEntriesInLocalRow(row);
75
76 // extract local row information from matrix
77 Teuchos::ArrayView<const LocalOrdinal> indices;
78 Teuchos::ArrayView<const Scalar> vals;
79 A->getLocalRowView(row, indices, vals);
80
81 TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError,
82 "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
83
84 // find column entry with max absolute value
85 GlobalOrdinal gMaxValIdx = 0;
86 MT norm1 = MT_ZERO;
87 MT maxVal = MT_ZERO;
88 for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
89 norm1 += Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
90 if (Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
91 maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
92 gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
93 }
94 }
95
96 if (grow == gMaxValIdx) // only keep row/col pair if it's diagonal dominant!!!
97 keepDiagonalEntries.push_back(std::make_pair(grow, grow));
98 }
99
101 // handle rows that are marked to be relevant for permutations
102 for (size_t row = 0; row < permRowMap->getLocalNumElements(); row++) {
103 GlobalOrdinal grow = permRowMap->getGlobalElement(row);
104 LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow);
105 size_t nnz = A->getNumEntriesInLocalRow(lArow);
106
107 // extract local row information from matrix
108 Teuchos::ArrayView<const LocalOrdinal> indices;
109 Teuchos::ArrayView<const Scalar> vals;
110 A->getLocalRowView(lArow, indices, vals);
111
112 TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError,
113 "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
114
115 // find column entry with max absolute value
116 GlobalOrdinal gMaxValIdx = 0;
117 MT norm1 = MT_ZERO;
118 MT maxVal = MT_ZERO;
119 for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
120 norm1 += Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
121 if (Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
122 maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
123 gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
124 }
125 }
126
127 if (maxVal > Teuchos::ScalarTraits<MT>::zero()) { // keep only max Entries \neq 0.0
128 permutedDiagCandidates.push_back(std::make_pair(grow, gMaxValIdx));
129 Weights.push_back(maxVal / (norm1 * Teuchos::as<MT>(nnz)));
130 } else {
131 std::cout << "ATTENTION: row " << grow << " has only zero entries -> singular matrix!" << std::endl;
132 }
133 }
134
135 // sort Weights in descending order
136 std::vector<int> permutation;
137 sortingPermutation(Weights, permutation);
138
139 // create new vector with exactly one possible entry for each column
140
141 // each processor which requests the global column id gcid adds 1 to gColVec
142 // gColVec will be summed up over all processors and communicated to gDomVec
143 // which is based on the non-overlapping domain map of A.
144
145 Teuchos::RCP<Vector> gColVec = VectorFactory::Build(A->getColMap());
146 Teuchos::RCP<Vector> gDomVec = VectorFactory::Build(A->getDomainMap());
147 gColVec->putScalar(SC_ZERO);
148 gDomVec->putScalar(SC_ZERO);
149
150 // put in all keep diagonal entries
151 for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) {
152 gColVec->sumIntoGlobalValue((*p).second, 1.0);
153 }
154
155 Teuchos::RCP<Export> exporter = ExportFactory::Build(gColVec->getMap(), gDomVec->getMap());
156 gDomVec->doExport(*gColVec, *exporter, Xpetra::ADD); // communicate blocked gcolids to all procs
157 gColVec->doImport(*gDomVec, *exporter, Xpetra::INSERT);
158
159 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered; // TODO reserve memory
160 std::map<GlobalOrdinal, MT> gColId2Weight;
161
162 {
163 Teuchos::ArrayRCP<Scalar> ddata = gColVec->getDataNonConst(0);
164 for (size_t i = 0; i < permutedDiagCandidates.size(); ++i) {
165 // loop over all candidates
166 std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]];
167 GlobalOrdinal grow = pp.first;
168 GlobalOrdinal gcol = pp.second;
169
170 LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol);
171 if (Teuchos::ScalarTraits<Scalar>::real(ddata[lcol]) > MT_ZERO) {
172 continue; // skip lcol: column already handled by another row
173 }
174
175 // mark column as already taken
176 ddata[lcol] += SC_ONE;
177
178 permutedDiagCandidatesFiltered.push_back(std::make_pair(grow, gcol));
179 gColId2Weight[gcol] = Weights[permutation[i]];
180 }
181 }
182
183 // communicate how often each column index is requested by the different procs
184 gDomVec->doExport(*gColVec, *exporter, Xpetra::ADD);
185 gColVec->doImport(*gDomVec, *exporter, Xpetra::INSERT); // probably not needed // TODO check me
186
187 //*****************************************************************************************
188 // first communicate ALL global ids of column indices which are requested by more
189 // than one proc to all other procs
190 // detect which global col indices are requested by more than one proc
191 // and store them in the multipleColRequests vector
192 std::vector<GlobalOrdinal> multipleColRequests; // store all global column indices from current processor that are also
193 // requested by another processor. This is possible, since they are stored
194 // in gDomVec which is based on the nonoverlapping domain map. That is, each
195 // global col id is handled by exactly one proc.
196 std::queue<GlobalOrdinal> unusedColIdx; // unused column indices on current processor
197
198 for (size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
199 Teuchos::ArrayRCP<const Scalar> arrDomVec = gDomVec->getData(0);
200 //
201 // FIXME (mfh 30 Oct 2015) I _think_ it's OK to check just the
202 // real part, because this is a count. (Shouldn't MueLu use
203 // mag_type instead of Scalar here to save space?)
204 //
205 if (Teuchos::ScalarTraits<Scalar>::real(arrDomVec[sz]) > MT_ONE) {
206 multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz));
207 } else if (Teuchos::ScalarTraits<Scalar>::real(arrDomVec[sz]) == MT_ZERO) {
208 unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz));
209 }
210 }
211
212 // communicate the global number of column indices which are requested by more than one proc
213 LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size());
214 LocalOrdinal globalMultColRequests = 0;
215
216 // sum up all entries in multipleColRequests over all processors
217 //
218 // FIXME (lbv 11 Feb 2019) why cast localMultColRequests as LocalOrdinal
219 // when it was properly declared as such just a line above?
220 //
221 MueLu_sumAll(gDomVec->getMap()->getComm(), (LocalOrdinal)localMultColRequests, globalMultColRequests);
222
223 if (globalMultColRequests > 0) {
224 // special handling: two processors request the same global column id.
225 // decide which processor gets it
226
227 // distribute number of multipleColRequests to all processors
228 // each processor stores how many column ids for exchange are handled by the cur proc
229 std::vector<GlobalOrdinal> numMyMultColRequests(numProcs, 0);
230 std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs, 0);
231 numMyMultColRequests[myRank] = localMultColRequests;
232 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
233 numMyMultColRequests.data(),
234 numGlobalMultColRequests.data());
235
236 // communicate multipleColRequests entries to all processors
237 int nMyOffset = 0;
238 for (int i = 0; i < myRank - 1; i++)
239 nMyOffset += numGlobalMultColRequests[i]; // calculate offset to store the weights on the corresponding place in procOverlappingWeights
240
241 const GlobalOrdinal zero = 0;
242 std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests, zero);
243 std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests, zero);
244
245 // loop over all local column GIDs that are also requested by other procs
246 for (size_t i = 0; i < multipleColRequests.size(); i++) {
247 procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i]; // all weights are > 0 ?
248 }
249
250 // template ordinal, package (double)
251 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(globalMultColRequests),
252 procMultRequestedColIds.data(),
253 global_procMultRequestedColIds.data());
254
255 // loop over global_procOverlappingWeights and eliminate wrong entries...
256 for (size_t k = 0; k < global_procMultRequestedColIds.size(); k++) {
257 GlobalOrdinal globColId = global_procMultRequestedColIds[k];
258
259 std::vector<MT> MyWeightForColId(numProcs, MT_ZERO);
260 std::vector<MT> GlobalWeightForColId(numProcs, MT_ZERO);
261
262 if (gColVec->getMap()->isNodeGlobalElement(globColId)) {
263 MyWeightForColId[myRank] = gColId2Weight[globColId];
264 } else {
265 MyWeightForColId[myRank] = MT_ZERO;
266 }
267
268 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
269 MyWeightForColId.data(),
270 GlobalWeightForColId.data());
271
272 if (gColVec->getMap()->isNodeGlobalElement(globColId)) {
273 // note: 2 procs could have the same weight for a column index.
274 // pick the first one.
275 MT winnerValue = MT_ZERO;
276 int winnerProcRank = 0;
277 for (int proc = 0; proc < numProcs; proc++) {
278 if (GlobalWeightForColId[proc] > winnerValue) {
279 winnerValue = GlobalWeightForColId[proc];
280 winnerProcRank = proc;
281 }
282 }
283
284 // winnerProcRank is the winner for handling globColId.
285 // winnerProcRank is unique (even if two procs have the same weight for a column index)
286
287 if (myRank != winnerProcRank) {
288 // remove corresponding entry from permutedDiagCandidatesFiltered
289 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin();
290 while (p != permutedDiagCandidatesFiltered.end()) {
291 if ((*p).second == globColId)
292 p = permutedDiagCandidatesFiltered.erase(p);
293 else
294 p++;
295 }
296 }
297
298 } // end if isNodeGlobalElement
299 } // end loop over global_procOverlappingWeights and eliminate wrong entries...
300 } // end if globalMultColRequests > 0
301
302 // put together all pairs:
303 // size_t sizeRowColPairs = keepDiagonalEntries.size() + permutedDiagCandidatesFiltered.size();
304 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
305 RowColPairs.insert(RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end());
306 RowColPairs.insert(RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end());
307
308#ifdef DEBUG_OUTPUT
309 //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
310 // plausibility check
311 gColVec->putScalar(SC_ZERO);
312 gDomVec->putScalar(SC_ZERO);
313 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin();
314 while (pl != RowColPairs.end()) {
315 // GlobalOrdinal ik = (*pl).first;
316 GlobalOrdinal jk = (*pl).second;
317
318 gColVec->sumIntoGlobalValue(jk, 1.0);
319 pl++;
320 }
321 gDomVec->doExport(*gColVec, *exporter, Xpetra::ADD);
322 for (size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
323 Teuchos::ArrayRCP<const Scalar> arrDomVec = gDomVec->getData(0);
324 if (arrDomVec[sz] > 1.0) {
325 GetOStream(Runtime0) << "RowColPairs has multiple column [" << sz << "]=" << arrDomVec[sz] << std::endl;
326 } else if (arrDomVec[sz] == SC_ZERO) {
327 GetOStream(Runtime0) << "RowColPairs has empty column [" << sz << "]=" << arrDomVec[sz] << std::endl;
328 }
329 }
330 //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
331#endif
332
334 // assumption: on each processor RowColPairs now contains
335 // a valid set of (row,column) pairs, where the row entries
336 // are a subset of the processor's rows and the column entries
337 // are unique throughout all processors.
338 // Note: the RowColPairs are only defined for a subset of all rows,
339 // so there might be rows without an entry in RowColPairs.
340 // It can be, that some rows seem to be missing in RowColPairs, since
341 // the entry in that row with maximum absolute value has been reserved
342 // by another row already (e.g. as already diagonal dominant row outside
343 // of perRowMap).
344 // In fact, the RowColPairs vector only defines the (row,column) pairs
345 // that will be definitely moved to the diagonal after permutation.
346
347#ifdef DEBUG_OUTPUT
348 // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) {
349 // std::cout << "proc: " << myRank << " r/c: " << (*p).first << "/" << (*p).second << std::endl;
350 // }
351 // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p)
352 // {
354 // std::cout << (*p).first +1 << " " << (*p).second+1 << std::endl;
355 // }
356 // std::cout << "\n";
357#endif
358
359 // vectors to store permutation information
360 Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap());
361 Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
362 Teuchos::RCP<Vector> lQperm = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
363
364 Pperm->putScalar(SC_ZERO);
365 Qperm->putScalar(SC_ZERO);
366 lQperm->putScalar(SC_ZERO);
367
368 // setup exporter for Qperm
369 Teuchos::RCP<Export> QpermExporter = ExportFactory::Build(lQperm->getMap(), Qperm->getMap());
370
371 Teuchos::RCP<Vector> RowIdStatus = VectorFactory::Build(A->getRowMap());
372 Teuchos::RCP<Vector> ColIdStatus = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
373 Teuchos::RCP<Vector> lColIdStatus = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
374 Teuchos::RCP<Vector> ColIdUsed = VectorFactory::Build(A->getDomainMap()); // mark column ids to be already in use
375 RowIdStatus->putScalar(SC_ZERO);
376 ColIdStatus->putScalar(SC_ZERO);
377 lColIdStatus->putScalar(SC_ZERO);
378 ColIdUsed->putScalar(SC_ZERO); // no column ids are used
379
380 // count wide-range permutations
381 // a wide-range permutation is defined as a permutation of rows/columns which do not
382 // belong to the same node
383 LocalOrdinal lWideRangeRowPermutations = 0;
384 GlobalOrdinal gWideRangeRowPermutations = 0;
385 LocalOrdinal lWideRangeColPermutations = 0;
386 GlobalOrdinal gWideRangeColPermutations = 0;
387
388 // run 1: mark all "identity" permutations
389 {
390 Teuchos::ArrayRCP<Scalar> RowIdStatusArray = RowIdStatus->getDataNonConst(0);
391 Teuchos::ArrayRCP<Scalar> lColIdStatusArray = lColIdStatus->getDataNonConst(0);
392
393 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin();
394 while (p != RowColPairs.end()) {
395 GlobalOrdinal ik = (*p).first;
396 GlobalOrdinal jk = (*p).second;
397
398 LocalOrdinal lik = A->getRowMap()->getLocalElement(ik);
399 LocalOrdinal ljk = A->getColMap()->getLocalElement(jk);
400
401 if (RowIdStatusArray[lik] == SC_ZERO) {
402 RowIdStatusArray[lik] = SC_ONE; // use this row id
403 lColIdStatusArray[ljk] = SC_ONE; // use this column id
404 Pperm->replaceLocalValue(lik, ik);
405 lQperm->replaceLocalValue(ljk, ik); // use column map
406 ColIdUsed->replaceGlobalValue(ik, SC_ONE); // ik is now used
407 p = RowColPairs.erase(p);
408
409 // detect wide range permutations
410 if (floor(ik / nDofsPerNode) != floor(jk / nDofsPerNode)) {
411 lWideRangeColPermutations++;
412 }
413 } else
414 p++;
415 }
416
417 // communicate column map -> domain map
418 Qperm->doExport(*lQperm, *QpermExporter, Xpetra::ABSMAX);
419 ColIdStatus->doExport(*lColIdStatus, *QpermExporter, Xpetra::ABSMAX);
420
421 // plausibility check
422 if (RowColPairs.size() > 0) GetOStream(Warnings0) << "MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl; // TODO fix me
423
424 // close Pperm
425
426 // count, how many row permutations are missing on current proc
427 size_t cntFreeRowIdx = 0;
428 std::queue<GlobalOrdinal> qFreeGRowIdx; // store global row ids of "free" rows
429 for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
430 if (RowIdStatusArray[lik] == SC_ZERO) {
431 cntFreeRowIdx++;
432 qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik));
433 }
434 }
435
436 // fix Pperm
437 for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
438 if (RowIdStatusArray[lik] == SC_ZERO) {
439 RowIdStatusArray[lik] = SC_ONE; // use this row id
440 Pperm->replaceLocalValue(lik, qFreeGRowIdx.front());
441 // detect wide range permutations
442 if (floor(qFreeGRowIdx.front() / nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik) / nDofsPerNode)) {
443 lWideRangeRowPermutations++;
444 }
445 qFreeGRowIdx.pop();
446 }
447 }
448 }
449
450 size_t cntUnusedColIdx = 0;
451 std::queue<GlobalOrdinal> qUnusedGColIdx; // store global column ids of "free" available columns
452 size_t cntFreeColIdx = 0;
453 std::queue<GlobalOrdinal> qFreeGColIdx; // store global column ids of "free" available columns
454 {
455 Teuchos::ArrayRCP<Scalar> ColIdStatusArray = ColIdStatus->getDataNonConst(0);
456 Teuchos::ArrayRCP<Scalar> ColIdUsedArray = ColIdUsed->getDataNonConst(0); // not sure about this
457
458 // close Qperm (free permutation entries in Qperm)
459 for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
460 if (ColIdStatusArray[ljk] == SC_ZERO) {
461 cntFreeColIdx++;
462 qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk));
463 }
464 }
465
466 for (size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) {
467 if (ColIdUsedArray[ljk] == SC_ZERO) {
468 cntUnusedColIdx++;
469 qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk));
470 }
471 }
472
473 // fix Qperm with local entries
474 for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
475 // stop if no (local) unused column idx are left
476 if (cntUnusedColIdx == 0) break;
477
478 if (ColIdStatusArray[ljk] == SC_ZERO) {
479 ColIdStatusArray[ljk] = SC_ONE; // use this row id
480 Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front()); // loop over ColIdStatus (lives on domain map)
481 ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(), SC_ONE); // ljk is now used, too
482 // detect wide range permutations
483 if (floor(qUnusedGColIdx.front() / nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk) / nDofsPerNode)) {
484 lWideRangeColPermutations++;
485 }
486 qUnusedGColIdx.pop();
487 cntUnusedColIdx--;
488 cntFreeColIdx--;
489 }
490 }
491
492 // Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); // no export necessary, since changes only locally
493 // ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX);
494
495 // count, how many unused column idx are needed on current processor
496 // to complete Qperm
497 cntFreeColIdx = 0;
498 for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // TODO avoid this loop
499 if (ColIdStatusArray[ljk] == SC_ZERO) {
500 cntFreeColIdx++;
501 }
502 }
503 }
504
505 GlobalOrdinal global_cntFreeColIdx = 0;
506 LocalOrdinal local_cntFreeColIdx = cntFreeColIdx;
507 MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx);
508#ifdef DEBUG_OUTPUT
509 std::cout << "global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl;
510#endif
511
512 // avoid global communication if possible
513 if (global_cntFreeColIdx > 0) {
514 // 1) count how many unused column ids are left
515 GlobalOrdinal global_cntUnusedColIdx = 0;
516 LocalOrdinal local_cntUnusedColIdx = cntUnusedColIdx;
517 MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx);
518#ifdef DEBUG_OUTPUT
519 std::cout << "global # of unused column idx: " << global_cntUnusedColIdx << std::endl;
520#endif
521
522 // 2) communicate how many unused column ids are available on procs
523 std::vector<LocalOrdinal> local_UnusedColIdxOnProc(numProcs);
524 std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs);
525 local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx;
526 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
527 local_UnusedColIdxOnProc.data(),
528 global_UnusedColIdxOnProc.data());
529
530#ifdef DEBUG_OUTPUT
531 std::cout << "PROC " << myRank << " global num unused indices per proc: ";
532 for (size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) {
533 std::cout << " " << global_UnusedColIdxOnProc[ljk];
534 }
535 std::cout << std::endl;
536#endif
537
538 // 3) build array of length global_cntUnusedColIdx to globally replicate unused column idx
539 std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
540 std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
541 GlobalOrdinal global_cntUnusedColIdxStartIter = 0;
542 for (int proc = 0; proc < myRank; proc++) {
543 global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc];
544 }
545 for (GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter + local_cntUnusedColIdx; k++) {
546 local_UnusedColIdxVector[k] = qUnusedGColIdx.front();
547 qUnusedGColIdx.pop();
548 }
549 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(global_cntUnusedColIdx),
550 local_UnusedColIdxVector.data(),
551 global_UnusedColIdxVector.data());
552#ifdef DEBUG_OUTPUT
553 std::cout << "PROC " << myRank << " global UnusedGColIdx: ";
554 for (size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) {
555 std::cout << " " << global_UnusedColIdxVector[ljk];
556 }
557 std::cout << std::endl;
558#endif
559
560 // 4) communicate, how many column idx are needed on each processor
561 // to complete Qperm
562 std::vector<LocalOrdinal> local_EmptyColIdxOnProc(numProcs);
563 std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs);
564 local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx;
565 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
566 local_EmptyColIdxOnProc.data(),
567 global_EmptyColIdxOnProc.data());
568
569#ifdef DEBUG_OUTPUT
570 std::cout << "PROC " << myRank << " global num of needed column indices: ";
571 for (size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) {
572 std::cout << " " << global_EmptyColIdxOnProc[ljk];
573 }
574 std::cout << std::endl;
575#endif
576
577 // 5) determine first index in global_UnusedColIdxVector for unused column indices,
578 // that are marked to be used by this processor
579 GlobalOrdinal global_UnusedColStartIdx = 0;
580 for (int proc = 0; proc < myRank; proc++) {
581 global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc];
582 }
583
584#ifdef DEBUG_OUTPUT
585 GetOStream(Statistics0) << "PROC " << myRank << " is allowd to use the following column gids: ";
586 for (GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) {
587 GetOStream(Statistics0) << global_UnusedColIdxVector[k] << " ";
588 }
589 GetOStream(Statistics0) << std::endl;
590#endif
591
592 // 6.) fix Qperm with global entries
593 {
594 Teuchos::ArrayRCP<Scalar> ColIdStatusArray = ColIdStatus->getDataNonConst(0);
595 GlobalOrdinal array_iter = 0;
596 for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
597 if (ColIdStatusArray[ljk] == SC_ZERO) {
598 ColIdStatusArray[ljk] = SC_ONE; // use this row id
599 Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]);
600 ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter], SC_ONE);
601 // detect wide range permutations
602 if (floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter] / nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk) / nDofsPerNode)) {
603 lWideRangeColPermutations++;
604 }
605 array_iter++;
606 // cntUnusedColIdx--; // check me
607 }
608 }
609 }
610 } // end if global_cntFreeColIdx > 0
612
613 // create new empty Matrix
614 Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(), 1));
615 Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(), 1));
616
617 {
618 Teuchos::ArrayRCP<Scalar> PpermData = Pperm->getDataNonConst(0);
619 Teuchos::ArrayRCP<Scalar> QpermData = Qperm->getDataNonConst(0);
620
621 for (size_t row = 0; row < A->getLocalNumRows(); row++) {
622 // FIXME (mfh 30 Oct 2015): Teuchos::as doesn't know how to
623 // convert from complex Scalar to GO, so we have to take the real
624 // part first. I think that's the right thing to do in this case.
625 Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1, Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(PpermData[row]))); // column idx for Perm^T
626 Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1, Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(QpermData[row]))); // column idx for Qperm
627 Teuchos::ArrayRCP<Scalar> valout(1, SC_ONE);
628 permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0, indoutP.size()), valout.view(0, valout.size()));
629 permQTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutQ.view(0, indoutQ.size()), valout.view(0, valout.size()));
630 }
631 }
632
633 permPTmatrix->fillComplete();
634 permQTmatrix->fillComplete();
635
636 Teuchos::RCP<Matrix> permPmatrix = Utilities::Transpose(*permPTmatrix, true);
637
638 for (size_t row = 0; row < permPTmatrix->getLocalNumRows(); row++) {
639 if (permPTmatrix->getNumEntriesInLocalRow(row) != 1)
640 GetOStream(Warnings0) << "#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
641 if (permPmatrix->getNumEntriesInLocalRow(row) != 1)
642 GetOStream(Warnings0) << "#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
643 if (permQTmatrix->getNumEntriesInLocalRow(row) != 1)
644 GetOStream(Warnings0) << "#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
645 }
646
647 // build permP * A * permQT
648 Teuchos::RCP<Matrix> ApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*A, false, *permQTmatrix, false, GetOStream(Statistics2));
649 Teuchos::RCP<Matrix> permPApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*permPmatrix, false, *ApermQt, false, GetOStream(Statistics2));
650
651 /*
652 MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("A.mat", *A);
653 MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permP.mat", *permPmatrix);
654 MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permQt.mat", *permQTmatrix);
655 MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permPApermQt.mat", *permPApermQt);
656 */
657 // build scaling matrix
658 Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(), true);
659 Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(), true);
660 Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(), 1));
661 {
662 permPApermQt->getLocalDiagCopy(*diagVec);
663 Teuchos::ArrayRCP<const Scalar> diagVecData = diagVec->getData(0);
664 Teuchos::ArrayRCP<Scalar> invDiagVecData = invDiagVec->getDataNonConst(0);
665 for (size_t i = 0; i < diagVec->getMap()->getLocalNumElements(); ++i) {
666 if (diagVecData[i] != SC_ZERO)
667 invDiagVecData[i] = SC_ONE / diagVecData[i];
668 else {
669 invDiagVecData[i] = SC_ONE;
670 GetOStream(Statistics0) << "MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl;
671 }
672 }
673
674 for (size_t row = 0; row < A->getLocalNumRows(); row++) {
675 Teuchos::ArrayRCP<GlobalOrdinal> indout(1, permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T
676 Teuchos::ArrayRCP<Scalar> valout(1, invDiagVecData[row]);
677 diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size()));
678 }
679 }
680 diagScalingOp->fillComplete();
681
682 Teuchos::RCP<Matrix> scaledA = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*diagScalingOp, false, *permPApermQt, false, GetOStream(Statistics2));
683 currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory /*this*/);
684
685 currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory /*this*/); // TODO careful with this!!!
686 currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory /*this*/);
687 currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory /*this*/);
688 currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory /*this*/);
689
691 // count zeros on diagonal in P -> number of row permutations
692 Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(), true);
693 permPmatrix->getLocalDiagCopy(*diagPVec);
694 LocalOrdinal lNumRowPermutations = 0;
695 GlobalOrdinal gNumRowPermutations = 0;
696 {
697 Teuchos::ArrayRCP<const Scalar> diagPVecData = diagPVec->getData(0);
698 for (size_t i = 0; i < diagPVec->getMap()->getLocalNumElements(); ++i) {
699 if (diagPVecData[i] == SC_ZERO) {
700 lNumRowPermutations++;
701 }
702 }
703 }
704
705 // sum up all entries in multipleColRequests over all processors
706 MueLu_sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations);
707
709 // count zeros on diagonal in Q^T -> number of column permutations
710 Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(), true);
711 permQTmatrix->getLocalDiagCopy(*diagQTVec);
712 LocalOrdinal lNumColPermutations = 0;
713 GlobalOrdinal gNumColPermutations = 0;
714 {
715 Teuchos::ArrayRCP<const Scalar> diagQTVecData = diagQTVec->getData(0);
716 for (size_t i = 0; i < diagQTVec->getMap()->getLocalNumElements(); ++i) {
717 if (diagQTVecData[i] == SC_ZERO) {
718 lNumColPermutations++;
719 }
720 }
721 }
722
723 // sum up all entries in multipleColRequests over all processors
724 MueLu_sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations);
725
726 currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory /*this*/);
727 currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory /*this*/);
728 currentLevel.Set("#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory /*this*/);
729 currentLevel.Set("#WideRangeColPermutations", gWideRangeColPermutations, genFactory /*this*/);
730
731 GetOStream(Statistics0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
732 GetOStream(Statistics0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
733 GetOStream(Runtime1) << "#wide range row permutations: " << gWideRangeRowPermutations << " #wide range column permutations: " << gWideRangeColPermutations << std::endl;
734}
735
736} // namespace MueLu
737
738#endif /* MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_ */
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > &permRowMap, Level &currentLevel, const FactoryBase *genFactory) const
build permutation operators
Exception throws to report errors in the internal logical of the program.
Base class for factories (e.g., R, P, and A_coarse).
Class that holds all level-specific information.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line)
@ Statistics2
Print even more statistics.
@ Runtime0
One-liner description of what is happening.
@ Runtime1
Description of what is happening (more verbose)
@ Statistics0
Print statistics that do not involve significant additional computation.
void sortingPermutation(const std::vector< Scalar > &values, std::vector< LocalOrdinal > &v)