MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_AMGXOperator_decl.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_AMGXOPERATOR_DECL_HPP
11#define MUELU_AMGXOPERATOR_DECL_HPP
12
13#if defined(HAVE_MUELU_AMGX)
14#include <Teuchos_ParameterList.hpp>
15
16#include <Tpetra_Operator.hpp>
17#include <Tpetra_CrsMatrix.hpp>
18#include <Tpetra_MultiVector.hpp>
19#include <Tpetra_Distributor.hpp>
20#include <Tpetra_HashTable.hpp>
21#include <Tpetra_Import.hpp>
22#include <Tpetra_Import_Util.hpp>
23
24#include "MueLu_Exceptions.hpp"
25#include "MueLu_TimeMonitor.hpp"
26#include "MueLu_TpetraOperator.hpp"
28
29#include <cuda_runtime.h>
30#include <amgx_c.h>
31
32namespace MueLu {
33
40template <class Scalar,
41 class LocalOrdinal,
42 class GlobalOrdinal,
43 class Node>
44class AMGXOperator : public TpetraOperator<Scalar, LocalOrdinal, GlobalOrdinal, Node>, public BaseClass {
45 private:
46 typedef Scalar SC;
49 typedef Node NO;
50
51 typedef Tpetra::Map<LO, GO, NO> Map;
52 typedef Tpetra::MultiVector<SC, LO, GO, NO> MultiVector;
53
54 public:
56
57
59 AMGXOperator(const Teuchos::RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> >& InA, Teuchos::ParameterList& paramListIn) {}
60
62 virtual ~AMGXOperator() {}
63
65
67 Teuchos::RCP<const Map> getDomainMap() const {
68 throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
69 }
70
72 Teuchos::RCP<const Map> getRangeMap() const {
73 throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
74 }
75
77
81 void apply(const MultiVector& X, MultiVector& Y, Teuchos::ETransp mode = Teuchos::NO_TRANS,
82 Scalar alpha = Teuchos::ScalarTraits<Scalar>::one(), Scalar beta = Teuchos::ScalarTraits<Scalar>::zero()) const {
83 throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
84 }
85
87 bool hasTransposeApply() const {
88 throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
89 }
90
92 throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n");
93 }
94
95 private:
96};
97
104template <class Node>
105class AMGXOperator<double, int, int, Node> : public TpetraOperator<double, int, int, Node> {
106 private:
107 typedef double SC;
108 typedef int LO;
109 typedef int GO;
110 typedef Node NO;
111
112 typedef Tpetra::Map<LO, GO, NO> Map;
113 typedef Tpetra::MultiVector<SC, LO, GO, NO> MultiVector;
114
115 void printMaps(Teuchos::RCP<const Teuchos::Comm<int> >& comm, const std::vector<std::vector<int> >& vec, const std::vector<int>& perm,
116 const int* nbrs, const Map& map, const std::string& label) {
117 for (int p = 0; p < comm->getSize(); p++) {
118 if (comm->getRank() == p) {
119 std::cout << "========\n"
120 << label << ", lid (gid), PID " << p << "\n========" << std::endl;
121
122 for (size_t i = 0; i < vec.size(); ++i) {
123 std::cout << " neighbor " << nbrs[i] << " :";
124 for (size_t j = 0; j < vec[i].size(); ++j)
125 std::cout << " " << vec[i][j] << " (" << map.getGlobalElement(perm[vec[i][j]]) << ")";
126 std::cout << std::endl;
127 }
128 std::cout << std::endl;
129 } else {
130 sleep(1);
131 }
132 comm->barrier();
133 }
134 }
135
136 public:
138
139 AMGXOperator(const Teuchos::RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> >& inA, Teuchos::ParameterList& paramListIn) {
140 RCP<const Teuchos::Comm<int> > comm = inA->getRowMap()->getComm();
141 int numProcs = comm->getSize();
142 int myRank = comm->getRank();
143
144 RCP<Teuchos::Time> amgxTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: initialize");
145 amgxTimer->start();
146 // Initialize
147 // AMGX_SAFE_CALL(AMGX_initialize());
148 // AMGX_SAFE_CALL(AMGX_initialize_plugins());
149
150 /*system*/
151 // AMGX_SAFE_CALL(AMGX_register_print_callback(&print_callback));
153 Teuchos::ParameterList configs = paramListIn.sublist("amgx:params", true);
154 if (configs.isParameter("json file")) {
155 AMGX_SAFE_CALL(AMGX_config_create_from_file(&Config_, (const char*)&configs.get<std::string>("json file")[0]));
156 } else {
157 std::ostringstream oss;
158 oss << "";
159 ParameterList::ConstIterator itr;
160 for (itr = configs.begin(); itr != configs.end(); ++itr) {
161 const std::string& name = configs.name(itr);
162 const ParameterEntry& entry = configs.entry(itr);
163 oss << name << "=" << filterValueToString(entry) << ", ";
164 }
165 oss << "\0";
166 std::string configString = oss.str();
167 if (configString == "") {
168 // print msg that using defaults
169 // GetOStream(Warnings0) << "Warning: No configuration parameters specified, using default AMGX configuration parameters. \n";
170 }
172 }
173
174 // TODO: we probably need to add "exception_handling=1" to the parameter list
175 // to switch on internal error handling (with no need for AMGX_SAFE_CALL)
176
177 // AMGX_SAFE_CALL(AMGX_config_add_parameters(&Config_, "exception_handling=1"))
178
179#define NEW_COMM
180#ifdef NEW_COMM
181 // NOTE: MPI communicator used in AMGX_resources_create must exist in the scope of AMGX_matrix_comm_from_maps_one_ring
182 // FIXME: fix for serial comm
183 RCP<const Teuchos::MpiComm<int> > tmpic = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm->duplicate());
184 TEUCHOS_TEST_FOR_EXCEPTION(tmpic.is_null(), Exceptions::RuntimeError, "Communicator is not MpiComm");
185
188#endif
189
190 // Construct AMGX resources
191 if (numProcs == 1) {
192 AMGX_resources_create_simple(&Resources_, Config_);
193
194 } else {
195 int numGPUDevices;
197 int device[] = {(comm->getRank() % numGPUDevices)};
198
199 AMGX_config_add_parameters(&Config_, "communicator=MPI");
200#ifdef NEW_COMM
201 AMGX_resources_create(&Resources_, Config_, &mpiComm, 1 /* number of GPU devices utilized by this rank */, device);
202#else
203 AMGX_resources_create(&Resources_, Config_, MPI_COMM_WORLD, 1 /* number of GPU devices utilized by this rank */, device);
204#endif
205 }
206
208 AMGX_solver_create(&Solver_, Resources_, mode, Config_);
209 AMGX_matrix_create(&A_, Resources_, mode);
210 AMGX_vector_create(&X_, Resources_, mode);
211 AMGX_vector_create(&Y_, Resources_, mode);
212
213 amgxTimer->stop();
214 amgxTimer->incrementNumCalls();
215
216 std::vector<int> amgx2muelu;
217
218 // Construct AMGX communication pattern
219 if (numProcs > 1) {
220 RCP<const Tpetra::Import<LO, GO, NO> > importer = inA->getCrsGraph()->getImporter();
221
222 TEUCHOS_TEST_FOR_EXCEPTION(importer.is_null(), MueLu::Exceptions::RuntimeError, "The matrix A has no Import object.");
223
224 Tpetra::Distributor distributor = importer->getDistributor();
225
226 Array<int> sendRanks = distributor.getProcsTo();
227 Array<int> recvRanks = distributor.getProcsFrom();
228
229 std::sort(sendRanks.begin(), sendRanks.end());
230 std::sort(recvRanks.begin(), recvRanks.end());
231
232 bool match = true;
233 if (sendRanks.size() != recvRanks.size()) {
234 match = false;
235 } else {
236 for (int i = 0; i < sendRanks.size(); i++) {
237 if (recvRanks[i] != sendRanks[i])
238 match = false;
239 break;
240 }
241 }
243 "AMGX requires that the processors that we send to and receive from are the same. "
244 "This is not the case: we send to {"
245 << sendRanks << "} and receive from {" << recvRanks << "}");
246
247 int num_neighbors = sendRanks.size(); // does not include the calling process
248 const int* neighbors = &sendRanks[0];
249
250 // Later on, we'll have to organize the send and recv data by PIDs,
251 // i.e, a vector V of vectors, where V[i] is PID i's vector of data.
252 // Hence we need to be able to quickly look up an array index
253 // associated with each PID.
254 Tpetra::Details::HashTable<int, int> hashTable(3 * num_neighbors);
255 for (int i = 0; i < num_neighbors; i++)
256 hashTable.add(neighbors[i], i);
257
258 // Get some information out
259 ArrayView<const int> exportLIDs = importer->getExportLIDs();
260 ArrayView<const int> exportPIDs = importer->getExportPIDs();
262 Tpetra::Import_Util::getPids(*importer, importPIDs, true /* make local -1 */);
263
264 // Construct the reordering for AMGX as in AMGX_matrix_upload_all documentation
265 RCP<const Map> rowMap = inA->getRowMap();
266 RCP<const Map> colMap = inA->getColMap();
267
268 int N = rowMap->getLocalNumElements(), Nc = colMap->getLocalNumElements();
269 muelu2amgx_.resize(Nc, -1);
270
271 int numUniqExports = 0;
272 for (int i = 0; i < exportLIDs.size(); i++)
273 if (muelu2amgx_[exportLIDs[i]] == -1) {
275 muelu2amgx_[exportLIDs[i]] = -2;
276 }
277
279 // Go through exported LIDs and put them at the end of LIDs
280 for (int i = 0; i < exportLIDs.size(); i++)
281 if (muelu2amgx_[exportLIDs[i]] < 0) // exportLIDs are not unique
282 muelu2amgx_[exportLIDs[i]] = exportOffset++;
283 // Go through all non-export LIDs, and put them at the beginning of LIDs
284 for (int i = 0; i < N; i++)
285 if (muelu2amgx_[i] == -1)
286 muelu2amgx_[i] = localOffset++;
287 // Go through the tail (imported LIDs), and order those by neighbors
288 int importOffset = N;
289 for (int k = 0; k < num_neighbors; k++)
290 for (int i = 0; i < importPIDs.size(); i++)
291 if (importPIDs[i] != -1 && hashTable.get(importPIDs[i]) == k)
292 muelu2amgx_[i] = importOffset++;
293
294 amgx2muelu.resize(muelu2amgx_.size());
295 for (int i = 0; i < (int)muelu2amgx_.size(); i++)
296 amgx2muelu[muelu2amgx_[i]] = i;
297
298 // Construct send arrays
299 std::vector<std::vector<int> > sendDatas(num_neighbors);
300 std::vector<int> send_sizes(num_neighbors, 0);
301 for (int i = 0; i < exportPIDs.size(); i++) {
302 int index = hashTable.get(exportPIDs[i]);
303 sendDatas[index].push_back(muelu2amgx_[exportLIDs[i]]);
304 send_sizes[index]++;
305 }
306 // FIXME: sendDatas must be sorted (based on GIDs)
307
308 std::vector<const int*> send_maps(num_neighbors);
309 for (int i = 0; i < num_neighbors; i++)
310 send_maps[i] = &(sendDatas[i][0]);
311
312 // Debugging
313 // printMaps(comm, sendDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "send_map_vector");
314
315 // Construct recv arrays
316 std::vector<std::vector<int> > recvDatas(num_neighbors);
317 std::vector<int> recv_sizes(num_neighbors, 0);
318 for (int i = 0; i < importPIDs.size(); i++)
319 if (importPIDs[i] != -1) {
320 int index = hashTable.get(importPIDs[i]);
321 recvDatas[index].push_back(muelu2amgx_[i]);
322 recv_sizes[index]++;
323 }
324 // FIXME: recvDatas must be sorted (based on GIDs)
325
326 std::vector<const int*> recv_maps(num_neighbors);
327 for (int i = 0; i < num_neighbors; i++)
328 recv_maps[i] = &(recvDatas[i][0]);
329
330 // Debugging
331 // printMaps(comm, recvDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "recv_map_vector");
332
334
335 AMGX_vector_bind(X_, A_);
336 AMGX_vector_bind(Y_, A_);
337 }
338
339 RCP<Teuchos::Time> matrixTransformTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transform matrix");
340 matrixTransformTimer->start();
341
345 inA->getAllValues(ia_s, ja, a);
346
347 ArrayRCP<int> ia(ia_s.size());
348 for (int i = 0; i < ia.size(); i++)
349 ia[i] = Teuchos::as<int>(ia_s[i]);
350
351 N_ = inA->getLocalNumRows();
352 int nnz = inA->getLocalNumEntries();
353
354 matrixTransformTimer->stop();
355 matrixTransformTimer->incrementNumCalls();
356
357 // Upload matrix
358 // TODO Do we need to pin memory here through AMGX_pin_memory?
359 RCP<Teuchos::Time> matrixTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer matrix CPU->GPU");
360 matrixTimer->start();
361 if (numProcs == 1) {
362 AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia[0], &ja[0], &a[0], NULL);
363
364 } else {
365 // Transform the matrix
366 std::vector<int> ia_new(ia.size());
367 std::vector<int> ja_new(ja.size());
368 std::vector<double> a_new(a.size());
369
370 ia_new[0] = 0;
371 for (int i = 0; i < N_; i++) {
372 int oldRow = amgx2muelu[i];
373
374 ia_new[i + 1] = ia_new[i] + (ia[oldRow + 1] - ia[oldRow]);
375
376 for (int j = ia[oldRow]; j < ia[oldRow + 1]; j++) {
377 int offset = j - ia[oldRow];
378 ja_new[ia_new[i] + offset] = muelu2amgx_[ja[j]];
379 a_new[ia_new[i] + offset] = a[j];
380 }
381 // Do bubble sort on two arrays
382 // NOTE: There are multiple possible optimizations here (even of bubble sort)
383 bool swapped;
384 do {
385 swapped = false;
386
387 for (int j = ia_new[i]; j < ia_new[i + 1] - 1; j++)
388 if (ja_new[j] > ja_new[j + 1]) {
389 std::swap(ja_new[j], ja_new[j + 1]);
390 std::swap(a_new[j], a_new[j + 1]);
391 swapped = true;
392 }
393 } while (swapped == true);
394 }
395
396 AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia_new[0], &ja_new[0], &a_new[0], NULL);
397 }
398 matrixTimer->stop();
399 matrixTimer->incrementNumCalls();
400
401 domainMap_ = inA->getDomainMap();
402 rangeMap_ = inA->getRangeMap();
403
404 RCP<Teuchos::Time> realSetupTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: setup (total)");
405 realSetupTimer->start();
406 AMGX_solver_setup(Solver_, A_);
407 realSetupTimer->stop();
408 realSetupTimer->incrementNumCalls();
409
410 vectorTimer1_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vectors CPU->GPU");
411 vectorTimer2_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vector GPU->CPU");
412 solverTimer_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: Solve (total)");
413 }
414
416 virtual ~AMGXOperator() {
417 // Comment this out if you need rebuild to work. This causes AMGX_solver_destroy memory issues.
424 }
425
427 Teuchos::RCP<const Map> getDomainMap() const;
428
430 Teuchos::RCP<const Map> getRangeMap() const;
431
433
437 void apply(const MultiVector& X, MultiVector& Y, Teuchos::ETransp mode = Teuchos::NO_TRANS,
438 SC alpha = Teuchos::ScalarTraits<SC>::one(), SC beta = Teuchos::ScalarTraits<SC>::zero()) const;
439
441 bool hasTransposeApply() const;
442
444 throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n");
445 }
446
447 std::string filterValueToString(const Teuchos::ParameterEntry& entry) {
448 return (entry.isList() ? std::string("...") : toString(entry.getAny()));
449 }
450
451 int sizeA() {
452 int sizeX, sizeY, n;
454 return n;
455 }
456
457 int iters() {
458 int it;
460 return it;
461 }
462
468
469 private:
476 int N_;
477
480
481 std::vector<int> muelu2amgx_;
482
486};
487
488} // namespace MueLu
489
490#endif // HAVE_MUELU_AMGX
491#endif // MUELU_AMGXOPERATOR_DECL_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
void printMaps(Teuchos::RCP< const Teuchos::Comm< int > > &comm, const std::vector< std::vector< int > > &vec, const std::vector< int > &perm, const int *nbrs, const Map &map, const std::string &label)
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &inA, Teuchos::ParameterList &paramListIn)
std::string filterValueToString(const Teuchos::ParameterEntry &entry)
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
Adapter for AmgX library from Nvidia.
virtual ~AMGXOperator()
Destructor.
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Returns a solution for the linear system AX=Y in the Tpetra::MultiVector X.
Teuchos::RCP< const Map > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &InA, Teuchos::ParameterList &paramListIn)
Constructor.
Tpetra::Map< LO, GO, NO > Map
RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
Teuchos::RCP< const Map > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Base class for MueLu classes.
Exception throws to report errors in the internal logical of the program.
Wraps an existing MueLu::Hierarchy as a Tpetra::Operator.
Namespace for MueLu classes and methods.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.