31 Projection(RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& Nullspace) {
32 localMap_ = Xpetra::MapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(Nullspace->getMap()->lib(),
33 Nullspace->getNumVectors(),
34 Nullspace->getMap()->getIndexBase(),
35 Nullspace->getMap()->getComm(),
36 Xpetra::LocallyReplicated);
38 Teuchos::RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > tempMV = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(localMap_, Nullspace->getNumVectors(),
false);
39 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
40 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
41 tempMV->multiply(Teuchos::CONJ_TRANS, Teuchos::NO_TRANS, ONE, *Nullspace, *Nullspace, ZERO);
43 Kokkos::View<Scalar**, Kokkos::LayoutLeft, Kokkos::HostSpace> Q(
"Q", Nullspace->getNumVectors(), Nullspace->getNumVectors());
46 auto dots = tempMV->getLocalViewHost(Tpetra::Access::ReadOnly);
47 Kokkos::deep_copy(Q, dots);
51 Teuchos::LAPACK<LocalOrdinal, Scalar> lapack;
53 lapack.POTRF(
'L', Nullspace->getNumVectors(), Q.data(), LDQ, &info);
54 TEUCHOS_ASSERT(info == 0);
55 lapack.TRTRI(
'L',
'N', Nullspace->getNumVectors(), Q.data(), LDQ, &info);
56 TEUCHOS_ASSERT(info == 0);
58 Nullspace_ = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Nullspace->getMap(), Nullspace->getNumVectors());
60 for (
size_t i = 0; i < Nullspace->getNumVectors(); i++) {
61 for (
size_t j = 0; j <= i; j++) {
62 Nullspace_->getVectorNonConst(i)->update(Q(i, j), *Nullspace->getVector(j), ONE);
69 projectOut(Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& X) {
70 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
71 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
75 if (tempMV_.is_null() || tempMV_->getNumVectors() != X.getNumVectors())
76 tempMV_ = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(localMap_, X.getNumVectors());
77 Teuchos::RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& tempMV = tempMV_;
78 tempMV->multiply(Teuchos::CONJ_TRANS, Teuchos::NO_TRANS, ONE, *Nullspace_, X, ZERO);
79 auto dots = tempMV->getLocalViewHost(Tpetra::Access::ReadOnly);
80 bool doProject =
true;
81 for (
size_t i = 0; i < X.getNumVectors(); i++) {
82 for (
size_t j = 0; j < Nullspace_->getNumVectors(); j++) {
83 doProject = doProject || (Teuchos::ScalarTraits<Scalar>::magnitude(dots(j, i)) > 100 * Teuchos::ScalarTraits<Scalar>::eps());
87 for (
size_t i = 0; i < X.getNumVectors(); i++) {
88 for (
size_t j = 0; j < Nullspace_->getNumVectors(); j++) {
89 X.getVectorNonConst(i)->update(-dots(j, i), *Nullspace_->getVector(j), ONE);
168 this->GetOStream(
Warnings0) <<
"MueLu::Amesos2Smoother::Setup(): Setup() has already been called" << std::endl;
170 RCP<Matrix> A = Factory::Get<RCP<Matrix> >(currentLevel,
"A");
171 auto A_block = rcp_dynamic_cast<BlockedCrsMatrix>(A);
173 A = A_block->Merge();
177 RCP<const Map> rowMap = A->getRowMap();
179 Teuchos::ParameterList pL = this->GetParameterList();
181 if (pL.get<
bool>(
"fix nullspace")) {
182 this->GetOStream(
Runtime1) <<
"MueLu::Amesos2Smoother::Setup(): fixing nullspace" << std::endl;
184 rowMap = A->getRowMap();
185 size_t gblNumCols = rowMap->getGlobalNumElements();
187 RCP<MultiVector> NullspaceOrig = Factory::Get<RCP<MultiVector> >(currentLevel,
"Nullspace");
190 RCP<MultiVector> Nullspace = projection_->Nullspace_;
192 RCP<MultiVector> ghostedNullspace;
193 RCP<const Map> colMap;
194 RCP<const Import> importer;
195 if (rowMap->getComm()->getSize() > 1) {
196 this->GetOStream(
Warnings0) <<
"MueLu::Amesos2Smoother::Setup(): Applying nullspace fix on distributed matrix. Try rebalancing to single rank!" << std::endl;
197 ArrayRCP<GO> elements_RCP;
198 elements_RCP.resize(gblNumCols);
199 ArrayView<GO> elements = elements_RCP();
200 for (
size_t k = 0; k < gblNumCols; k++)
201 elements[k] = Teuchos::as<GO>(k);
202 colMap = MapFactory::Build(rowMap->lib(), gblNumCols * rowMap->getComm()->getSize(), elements, Teuchos::ScalarTraits<GO>::zero(), rowMap->getComm());
203 importer = ImportFactory::Build(rowMap, colMap);
204 ghostedNullspace = MultiVectorFactory::Build(colMap, Nullspace->getNumVectors(),
false);
205 ghostedNullspace->doImport(*Nullspace, *importer, Xpetra::INSERT);
207 ghostedNullspace = Nullspace;
211 using ATS = KokkosKernels::ArithTraits<SC>;
212 using impl_Scalar =
typename ATS::val_type;
213 using impl_ATS = KokkosKernels::ArithTraits<impl_Scalar>;
214 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
216 typedef typename Matrix::local_matrix_device_type KCRS;
217 typedef typename KCRS::StaticCrsGraphType graph_t;
218 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
219 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
220 typedef typename KCRS::values_type::non_const_type scalar_view_t;
222 const impl_Scalar impl_SC_ZERO = impl_ATS::zero();
224 size_t lclNumRows = rowMap->getLocalNumElements();
225 LocalOrdinal lclNumCols = Teuchos::as<LocalOrdinal>(gblNumCols);
226 lno_view_t newRowPointers(
"newRowPointers", lclNumRows + 1);
227 lno_nnz_view_t newColIndices(
"newColIndices", lclNumRows * gblNumCols);
228 scalar_view_t newValues(
"newValues", lclNumRows * gblNumCols);
232 RCP<Vector> diag = VectorFactory::Build(A->getRowMap());
233 A->getLocalDiagCopy(*diag);
234 shift = diag->normInf();
239 auto lclNullspace = Nullspace->getLocalViewDevice(Tpetra::Access::ReadOnly);
240 auto lclGhostedNullspace = ghostedNullspace->getLocalViewDevice(Tpetra::Access::ReadOnly);
241 Kokkos::parallel_for(
242 "MueLu:Amesos2Smoother::fixNullspace_1", range_type(0, lclNumRows + 1),
243 KOKKOS_LAMBDA(
const size_t i) {
244 if (i < lclNumRows) {
245 newRowPointers(i) = i * gblNumCols;
247 newColIndices(i * gblNumCols + j) = j;
248 newValues(i * gblNumCols + j) = impl_SC_ZERO;
249 for (
size_t I = 0; I < lclNullspace.extent(1); I++)
250 for (
size_t J = 0; J < lclGhostedNullspace.extent(1); J++)
251 newValues(i * gblNumCols + j) += shift * lclNullspace(i, I) * impl_ATS::conjugate(lclGhostedNullspace(j, J));
254 newRowPointers(lclNumRows) = lclNumRows * gblNumCols;
259 if (colMap->lib() == Xpetra::UseTpetra) {
260 auto lclA = A->getLocalMatrixDevice();
261 auto lclColMapA = A->getColMap()->getLocalMap();
262 auto lclColMapANew = colMap->getLocalMap();
263 Kokkos::parallel_for(
264 "MueLu:Amesos2Smoother::fixNullspace_2", range_type(0, lclNumRows),
265 KOKKOS_LAMBDA(
const size_t i) {
266 for (
size_t jj = lclA.graph.row_map(i); jj < lclA.graph.row_map(i + 1); jj++) {
267 LO j = lclColMapANew.getLocalElement(lclColMapA.getGlobalElement(lclA.graph.entries(jj)));
268 impl_Scalar v = lclA.values(jj);
269 newValues(i * gblNumCols + j) += v;
273 auto lclA = A->getLocalMatrixHost();
274 for (
size_t i = 0; i < lclNumRows; i++) {
275 for (
size_t jj = lclA.graph.row_map(i); jj < lclA.graph.row_map(i + 1); jj++) {
276 LO j = colMap->getLocalElement(A->getColMap()->getGlobalElement(lclA.graph.entries(jj)));
277 SC v = lclA.values(jj);
278 newValues(i * gblNumCols + j) += v;
283 RCP<Matrix> newA = rcp(
new CrsMatrixWrap(rowMap, colMap, 0));
284 RCP<CrsMatrix> newAcrs = toCrsMatrix(newA);
285 newAcrs->setAllValues(newRowPointers, newColIndices, newValues);
286 newAcrs->expertStaticFillComplete(A->getDomainMap(), A->getRangeMap(),
287 importer, A->getCrsGraph()->getExporter());
290 rowMap = factorA->getRowMap();
295 RCP<const Tpetra_CrsMatrix> tA = toTpetra(factorA);
297 prec_ = Amesos2::create<Tpetra_CrsMatrix, Tpetra_MultiVector>(type_, tA);
298 TEUCHOS_TEST_FOR_EXCEPTION(prec_ == Teuchos::null,
Exceptions::RuntimeError,
"Amesos2::create returns Teuchos::null");
299 RCP<Teuchos::ParameterList> amesos2_params = Teuchos::rcpFromRef(pL.sublist(
"Amesos2"));
300 amesos2_params->setName(
"Amesos2");
301 if ((rowMap->getGlobalNumElements() != as<size_t>((rowMap->getMaxAllGlobalIndex() - rowMap->getMinAllGlobalIndex()) + 1)) ||
302 (!rowMap->isContiguous() && (rowMap->getComm()->getSize() == 1))) {
303 if (((type_ !=
"Cusolver") && (type_ !=
"Tacho")) && !(amesos2_params->sublist(prec_->name()).template isType<bool>(
"IsContiguous")))
304 amesos2_params->sublist(prec_->name()).set(
"IsContiguous",
false,
"Are GIDs Contiguous");
306 prec_->setParameters(amesos2_params);
308 prec_->numericFactorization();
317 RCP<BlockedMultiVector> blockedX = rcp_dynamic_cast<BlockedMultiVector>(rcpFromRef(X));
318 bool blocked = blockedX != Teuchos::null;
320 this->ApplyBlocked(X, B);
324 RCP<Tpetra_MultiVector> tX, tB;
325 if (!useTransformation_) {
326 tX = toTpetra(Teuchos::rcpFromRef(X));
327 tB = toTpetra(Teuchos::rcpFromRef(
const_cast<MultiVector&
>(B)));
330 size_t numVectors = X.getNumVectors();
331 size_t length = X.getLocalLength();
334 "MueLu::Amesos2Smoother::Apply: Fixing coarse matrix for Amesos2 for multivectors has not been implemented yet.");
335 ArrayRCP<const SC> Xdata = X.getData(0), Bdata = B.getData(0);
336 ArrayRCP<SC> X_data = X_->getDataNonConst(0), B_data = B_->getDataNonConst(0);
338 for (
size_t i = 0; i < length; i++) {
339 X_data[i] = Xdata[i];
340 B_data[i] = Bdata[i];
352 prec_->setX(Teuchos::null);
353 prec_->setB(Teuchos::null);
355 if (useTransformation_) {
357 size_t length = X.getLocalLength();
359 ArrayRCP<SC> Xdata = X.getDataNonConst(0);
360 ArrayRCP<const SC> X_data = X_->getData(0);
362 for (
size_t i = 0; i < length; i++)
363 Xdata[i] = X_data[i];
367 Teuchos::ParameterList pL = this->GetParameterList();
368 if (pL.get<
bool>(
"fix nullspace")) {
369 projection_->projectOut(X);
377 "MueLu::Amesos2Smoother::ApplyBlocked: useTransformation_ == true is not supported");
379 Teuchos::ParameterList pL = this->GetParameterList();
380 const auto fixNullspace = pL.get<
bool>(
"fix nullspace");
382 "MueLu::Amesos2Smoother::ApplyBlocked: \"fix nullspace\" == true is not supported");
384 RCP<BlockedMultiVector> blockedX = rcp_dynamic_cast<BlockedMultiVector>(rcpFromRef(X));
385 RCP<const BlockedMultiVector> blockedB = rcp_dynamic_cast<const BlockedMultiVector>(rcpFromRef(B));
387 "MueLu::Amesos2Smoother::ApplyBlocked: Input and/or output vector are not BlockedMultiVector!");
389 RCP<MultiVector> mergedX = blockedX->Merge();
390 RCP<MultiVector> mergedB = blockedB->Merge();
392 RCP<Tpetra_MultiVector> tX, tB;
393 tX = toTpetra(mergedX);
394 tB = toTpetra(mergedB);
401 prec_->setX(Teuchos::null);
402 prec_->setB(Teuchos::null);
404 RCP<MultiVector> xx = Teuchos::rcp(
new BlockedMultiVector(blockedX->getBlockedMap(), mergedX));
405 SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one();
406 X.update(one, *xx, zero);