10#ifndef TPETRA_MATRIXMATRIX_DEF_HPP
11#define TPETRA_MATRIXMATRIX_DEF_HPP
13#include "KokkosSparse_Utils.hpp"
14#include "Tpetra_ConfigDefs.hpp"
16#include "Teuchos_VerboseObject.hpp"
17#include "Teuchos_Array.hpp"
19#include "Tpetra_CrsMatrix.hpp"
20#include "Tpetra_BlockCrsMatrix.hpp"
22#include "Tpetra_RowMatrixTransposer.hpp"
25#include "Tpetra_Details_makeColMap.hpp"
26#include "Tpetra_ConfigDefs.hpp"
27#include "Tpetra_Map.hpp"
28#include "Tpetra_Export.hpp"
35#include "Teuchos_FancyOStream.hpp"
37#include "TpetraExt_MatrixMatrix_ExtraKernels_def.hpp"
40#include "KokkosSparse_spgemm.hpp"
41#include "KokkosSparse_spadd.hpp"
42#include "Kokkos_Bitset.hpp"
54#include "TpetraExt_MatrixMatrix_OpenMP.hpp"
55#include "TpetraExt_MatrixMatrix_Cuda.hpp"
56#include "TpetraExt_MatrixMatrix_HIP.hpp"
57#include "TpetraExt_MatrixMatrix_SYCL.hpp"
61namespace MatrixMatrix {
69template <
class Scalar,
80 const std::string& label,
81 const Teuchos::RCP<Teuchos::ParameterList>&
params) {
97 const std::string
prefix =
"TpetraExt::MatrixMatrix::Multiply(): ";
122 const bool newFlag = !
C.getGraph()->isLocallyIndexed() && !
C.getGraph()->isGloballyIndexed();
128#ifdef USE_OLD_TRANSPOSE
132 using Teuchos::ParameterList;
158 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
159 "must match for matrix-matrix product. op(A) is "
167 prefix <<
"ERROR, dimensions of result C must "
168 "match dimensions of op(A) * op(B). C has "
169 <<
C.getGlobalNumRows()
170 <<
" rows, should have at least " <<
Aouter << std::endl);
178 if (!
C.isFillActive())
C.resumeFill();
220 MMdetails::mult_AT_B_newmatrix(
A,
B,
C, label,
params);
246 C.fillComplete(
Bprime->getDomainMap(),
Aprime->getRangeMap());
264 const std::string& label) {
276 std::string
prefix = std::string(
"TpetraExt ") + label + std::string(
": ");
292 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
293 "must match for matrix-matrix product. op(A) is "
300 const LO blocksize =
A->getBlockSize();
302 prefix <<
"ERROR, Blocksizes do not match. A.blocksize = " << blocksize <<
", B.blocksize = " <<
B->getBlockSize());
322 MMdetails::import_and_extract_views(*
B,
targetMap_B,
Bview,
A->getGraph()->getImporter(),
323 A->getGraph()->getImporter().is_null());
333void Jacobi(
typename Teuchos::ScalarTraits<Scalar>::magnitudeType
omega,
339 const std::string& label,
340 const Teuchos::RCP<Teuchos::ParameterList>&
params) {
354 const std::string
prefix =
"TpetraExt::MatrixMatrix::Jacobi(): ";
373 prefix <<
"ERROR, inner dimensions of op(A) and op(B) "
374 "must match for matrix-matrix product. op(A) is "
382 prefix <<
"ERROR, dimensions of result C must "
383 "match dimensions of op(A) * op(B). C has "
384 <<
C.getGlobalNumRows()
385 <<
" rows, should have at least " <<
Aouter << std::endl);
413 importParams1->set(
"compute global constants",
params->get(
"compute global constants: temporaries",
false));
415 auto slist =
params->sublist(
"matrixmatrix: kernel params",
false);
419 bool isMM =
slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
423 ip1slist.set(
"isMatrixMatrix_TransferAndFillComplete",
isMM);
441 importParams2->set(
"compute global constants",
params->get(
"compute global constants: temporaries",
false));
443 auto slist =
params->sublist(
"matrixmatrix: kernel params",
false);
445 bool isMM =
slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
451 ip2slist.set(
"isMatrixMatrix_TransferAndFillComplete",
isMM);
464 bool newFlag = !
C.getGraph()->isLocallyIndexed() && !
C.getGraph()->isGloballyIndexed();
479 typedef Teuchos::ScalarTraits<Scalar> STS;
480 typename STS::magnitudeType
threshold =
params->get(
"remove zeros threshold", STS::magnitude(STS::zero()));
496 using Teuchos::Array;
506 const std::string
prefix =
"TpetraExt::MatrixMatrix::Add(): ";
509 prefix <<
"ERROR, input matrix A.isFillComplete() is false; it is required to be true. "
510 "(Result matrix B is not required to be isFillComplete()).");
512 prefix <<
"ERROR, input matrix B must not be fill complete!");
514 prefix <<
"ERROR, input matrix B must not have static graph!");
516 prefix <<
"ERROR, input matrix B must not be locally indexed!");
518 using Teuchos::ParameterList;
531 typename crs_matrix_type::nonconst_global_inds_host_view_type
a_inds(
"a_inds",
A.getLocalMaxNumRowEntries());
532 typename crs_matrix_type::nonconst_values_host_view_type
a_vals(
"a_vals",
A.getLocalMaxNumRowEntries());
535 if (
scalarB != Teuchos::ScalarTraits<SC>::one())
539 if (
scalarA != Teuchos::ScalarTraits<SC>::zero()) {
541 row =
B.getRowMap()->getGlobalElement(
i);
544 if (
scalarA != Teuchos::ScalarTraits<SC>::one()) {
557Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
566 const Teuchos::RCP<Teuchos::ParameterList>&
params) {
567 using Teuchos::ParameterList;
570 using Teuchos::rcpFromRef;
574 params->isParameter(
"Call fillComplete") && !
params->get<
bool>(
"Call fillComplete"),
575 std::invalid_argument,
576 "Tpetra::MatrixMatrix::add(): this version of add() always calls fillComplete\n"
577 "on the result, but you explicitly set 'Call fillComplete' = false in the parameter list. Don't set this explicitly.");
578 params->set(
"Call fillComplete",
true);
589 "TpetraExt::MatrixMatrix::add(): A and B must both be fill complete.");
600template <
class LO,
class GO,
class LOView,
class GOView,
class LocalMap>
601struct ConvertGlobalToLocalFunctor {
607 KOKKOS_FUNCTION
void operator()(
const GO i)
const {
608 lids(i) = localColMap.getLocalElement(gids(i));
613 const LocalMap localColMap;
616template <
class Scalar,
629 const Teuchos::RCP<Teuchos::ParameterList>&
params) {
632 using Teuchos::rcp_dynamic_cast;
633 using Teuchos::rcp_implicit_cast;
634 using Teuchos::rcpFromRef;
635 using Teuchos::TimeMonitor;
646 using exec_space =
typename crs_graph_type::execution_space;
647 using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
648 const char*
prefix_mmm =
"TpetraExt::MatrixMatrix::add: ";
649 constexpr bool debug =
false;
654 std::ostringstream
os;
655 os <<
"Proc " <<
A.getMap()->getComm()->getRank() <<
": "
656 <<
"TpetraExt::MatrixMatrix::add" << std::endl;
657 std::cerr <<
os.str();
661 prefix_mmm <<
"C must be a 'new' matrix (neither locally nor globally indexed).");
663 prefix_mmm <<
"A and B must both be fill complete.");
664#ifdef HAVE_TPETRA_DEBUG
666 if (
A.isFillComplete() &&
B.isFillComplete()) {
669 !
A.getDomainMap()->locallySameAs(*
B.getDomainMap())) ||
671 !
A.getDomainMap()->isSameAs(*
B.getRangeMap())) ||
673 !
A.getRangeMap()->isSameAs(*
B.getDomainMap()));
675 prefix_mmm <<
"The domain Maps of Op(A) and Op(B) are not the same.");
679 !
A.getRangeMap()->isSameAs(*
B.getRangeMap())) ||
681 !
A.getRangeMap()->isSameAs(*
B.getDomainMap())) ||
683 !
A.getDomainMap()->isSameAs(*
B.getRangeMap()));
685 prefix_mmm <<
"The range Maps of Op(A) and Op(B) are not the same.");
689 using Teuchos::ParameterList;
697#ifdef HAVE_TPETRA_DEBUG
700 "Please report this bug to the Tpetra developers.");
707 std::ostringstream
os;
708 os <<
"Proc " <<
A.getMap()->getComm()->getRank() <<
": "
709 <<
"Form explicit xpose of B" << std::endl;
710 std::cerr <<
os.str();
715#ifdef HAVE_TPETRA_DEBUG
717 prefix_mmm <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
719 !
Aprime->isFillComplete() || !
Bprime->isFillComplete(), std::invalid_argument,
720 prefix_mmm <<
"Aprime and Bprime must both be fill complete. "
721 "Please report this bug to the Tpetra developers.");
733 typedef typename AddKern::values_array values_array;
734 typedef typename AddKern::row_ptrs_array row_ptrs_array;
735 typedef typename AddKern::col_inds_array col_inds_array;
745 if (!(
Aprime->getRowMap()->isSameAs(*(
Bprime->getRowMap())))) {
747 auto import = rcp(new import_type(Aprime->getRowMap(), Bprime->getRowMap()));
759 if (Teuchos::nonnull(
params) &&
params->isParameter(
"Call fillComplete")) {
771 rowptrs = row_ptrs_array(
"C rowptrs", 0);
776 using global_col_inds_array =
typename AddKern::global_col_inds_array;
785 std::ostringstream
os;
786 os <<
"Proc " <<
A.getMap()->getComm()->getRank() <<
": "
787 <<
"Call AddKern::convertToGlobalAndAdd(...)" << std::endl;
788 std::cerr <<
os.str();
790 AddKern::convertToGlobalAndAdd(
794 std::ostringstream
os;
795 os <<
"Proc " <<
A.getMap()->getComm()->getRank() <<
": "
796 <<
"Finished AddKern::convertToGlobalAndAdd(...)" << std::endl;
797 std::cerr <<
os.str();
806 Kokkos::parallel_for(Kokkos::RangePolicy<exec_space>(0,
globalColinds.extent(0)),
808 col_inds_array, global_col_inds_array,
819 auto Arowptrs =
Alocal.graph.row_map;
820 auto Browptrs =
Blocal.graph.row_map;
821 auto Acolinds =
Alocal.graph.entries;
822 auto Bcolinds =
Blocal.graph.entries;
828 std::ostringstream
os;
829 os <<
"Proc " <<
A.getMap()->getComm()->getRank() <<
": "
830 <<
"Call AddKern::addSorted(...)" << std::endl;
831 std::cerr <<
os.str();
833 AddKern::addSorted(
Avals, Arowptrs, Acolinds,
alpha,
Bvals, Browptrs, Bcolinds,
beta,
Aprime->getGlobalNumCols(),
vals,
rowptrs,
colinds);
840 std::ostringstream
os;
841 os <<
"Proc " <<
A.getMap()->getComm()->getRank() <<
": "
842 <<
"Call AddKern::addUnsorted(...)" << std::endl;
843 std::cerr <<
os.str();
845 AddKern::addUnsorted(
Avals, Arowptrs, Acolinds,
alpha,
Bvals, Browptrs, Bcolinds,
beta,
Aprime->getGlobalNumCols(),
vals,
rowptrs,
colinds);
856 std::ostringstream
os;
857 os <<
"Proc " <<
A.getMap()->getComm()->getRank() <<
": "
858 <<
"Create Cimport" << std::endl;
859 std::cerr <<
os.str();
865 std::ostringstream
os;
866 os <<
"Proc " <<
A.getMap()->getComm()->getRank() <<
": "
867 <<
"Create Cexport" << std::endl;
868 std::cerr <<
os.str();
874 std::ostringstream
os;
875 os <<
"Proc " <<
A.getMap()->getComm()->getRank() <<
": "
876 <<
"Call C->expertStaticFillComplete(...)" << std::endl;
877 std::cerr <<
os.str();
899 using Teuchos::Array;
900 using Teuchos::ArrayRCP;
901 using Teuchos::ArrayView;
904 using Teuchos::rcp_dynamic_cast;
905 using Teuchos::rcpFromRef;
906 using Teuchos::tuple;
908 typedef Teuchos::ScalarTraits<Scalar> STS;
916 std::string
prefix =
"TpetraExt::MatrixMatrix::Add(): ";
919 !
A.isFillComplete() || !
B.isFillComplete(), std::invalid_argument,
920 prefix <<
"A and B must both be fill complete before calling this function.");
924 prefix <<
"C is null (must be allocated), but A.haveGlobalConstants() is false. "
925 "Please report this bug to the Tpetra developers.");
927 prefix <<
"C is null (must be allocated), but B.haveGlobalConstants() is false. "
928 "Please report this bug to the Tpetra developers.");
931#ifdef HAVE_TPETRA_DEBUG
938 prefix <<
"The domain Maps of Op(A) and Op(B) are not the same.");
945 prefix <<
"The range Maps of Op(A) and Op(B) are not the same.");
949 using Teuchos::ParameterList;
962#ifdef HAVE_TPETRA_DEBUG
964 prefix <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
976#ifdef HAVE_TPETRA_DEBUG
978 prefix <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
988 C->setAllToScalar(STS::zero());
998 if (
Aprime->getRowMap()->isSameAs(*
Bprime->getRowMap())) {
1007 C =
rcp(
new crs_matrix_type(
Aprime->getRowMap(),
Aprime->getGlobalMaxNumRowEntries() +
Bprime->getGlobalMaxNumRowEntries()));
1011#ifdef HAVE_TPETRA_DEBUG
1013 prefix <<
"At this point, Aprime is null. Please report this bug to the Tpetra developers.");
1015 prefix <<
"At this point, Bprime is null. Please report this bug to the Tpetra developers.");
1017 prefix <<
"At this point, C is null. Please report this bug to the Tpetra developers.");
1025 for (
int k = 0;
k < 2; ++
k) {
1026 typename crs_matrix_type::nonconst_global_inds_host_view_type
Indices;
1027 typename crs_matrix_type::nonconst_values_host_view_type
Values;
1035#ifdef HAVE_TPETRA_DEBUG
1037 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1040#ifdef HAVE_TPETRA_DEBUG
1042 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1048 size_t numEntries =
Mat[
k]->getNumEntriesInGlobalRow(
globalRow);
1049 if (numEntries > 0) {
1050 if (numEntries >
Indices.extent(0)) {
1051 Kokkos::resize(
Indices, numEntries);
1052 Kokkos::resize(
Values, numEntries);
1056 if (
scalar[
k] != STS::one()) {
1057 for (
size_t j = 0;
j < numEntries; ++
j) {
1066 prefix <<
"sumIntoGlobalValues failed to add entries from A or B into C.");
1075 C->fillComplete(
C->getDomainMap(),
1094 std::string
prefix =
"TpetraExt::MatrixMatrix::Add(): ";
1097 prefix <<
"C must not be null");
1099 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
C_ =
C;
1105namespace MMdetails {
1157template <
class Scalar,
1159 class GlobalOrdinal,
1161void mult_AT_B_newmatrix(
1162 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
1163 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& B,
1164 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1165 const std::string& label,
1166 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1170 typedef LocalOrdinal LO;
1171 typedef GlobalOrdinal GO;
1173 typedef CrsMatrixStruct<SC, LO, GO, NO> crs_matrix_struct_type;
1174 typedef RowMatrixTransposer<SC, LO, GO, NO> transposer_type;
1181 transposer_type transposer(rcpFromRef(A), label + std::string(
"XP: "));
1183 using Teuchos::ParameterList;
1184 RCP<ParameterList> transposeParams(
new ParameterList);
1185 transposeParams->set(
"sort",
true);
1186 if (!params.is_null()) {
1187 transposeParams->set(
"compute global constants",
1188 params->get(
"compute global constants: temporaries",
1191 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Atrans =
1192 transposer.createTransposeLocal(transposeParams);
1201 crs_matrix_struct_type Aview;
1202 crs_matrix_struct_type Bview;
1203 RCP<const Import<LO, GO, NO>> dummyImporter;
1206 RCP<Teuchos::ParameterList> importParams1(
new ParameterList);
1207 if (!params.is_null()) {
1208 importParams1->set(
"compute global constants",
1209 params->get(
"compute global constants: temporaries",
1211 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
1212 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
1213 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1214 int mm_optimization_core_count =
1216 mm_optimization_core_count =
1217 slist.get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1218 int mm_optimization_core_count2 =
1219 params->get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1220 if (mm_optimization_core_count2 < mm_optimization_core_count) {
1221 mm_optimization_core_count = mm_optimization_core_count2;
1223 auto& sip1 = importParams1->sublist(
"matrixmatrix: kernel params",
false);
1224 sip1.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1225 sip1.set(
"isMatrixMatrix_TransferAndFillComplete", isMM);
1226 sip1.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1229 MMdetails::import_and_extract_views(*Atrans, Atrans->getRowMap(),
1230 Aview, dummyImporter,
true,
1231 label, importParams1);
1233 RCP<ParameterList> importParams2(
new ParameterList);
1234 if (!params.is_null()) {
1235 importParams2->set(
"compute global constants",
1236 params->get(
"compute global constants: temporaries",
1238 auto slist = params->sublist(
"matrixmatrix: kernel params",
false);
1239 bool isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
1240 bool overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1241 int mm_optimization_core_count =
1243 mm_optimization_core_count =
1244 slist.get(
"MM_TAFC_OptimizationCoreCount",
1245 mm_optimization_core_count);
1246 int mm_optimization_core_count2 =
1247 params->get(
"MM_TAFC_OptimizationCoreCount",
1248 mm_optimization_core_count);
1249 if (mm_optimization_core_count2 < mm_optimization_core_count) {
1250 mm_optimization_core_count = mm_optimization_core_count2;
1252 auto& sip2 = importParams2->sublist(
"matrixmatrix: kernel params",
false);
1253 sip2.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1254 sip2.set(
"isMatrixMatrix_TransferAndFillComplete", isMM);
1255 sip2.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1258 if (B.getRowMap()->isSameAs(*Atrans->getColMap())) {
1259 MMdetails::import_and_extract_views(B, B.getRowMap(), Bview, dummyImporter,
true, label, importParams2);
1261 MMdetails::import_and_extract_views(B, Atrans->getColMap(), Bview, dummyImporter,
false, label, importParams2);
1267 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Ctemp;
1270 bool needs_final_export = !Atrans->getGraph()->getExporter().is_null();
1271 if (needs_final_export) {
1274 Ctemp = rcp(&C,
false);
1277 mult_A_B_newmatrix(Aview, Bview, *Ctemp, label, params);
1285 RCP<Tpetra::CrsMatrix<SC, LO, GO, NO>> Crcp(&C,
false);
1287 if (needs_final_export) {
1288 ParameterList labelList;
1289 labelList.set(
"Timer Label", label);
1290 if (!params.is_null()) {
1291 ParameterList& params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
1292 ParameterList& labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
1294 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1295 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
1296 if (mm_optimization_core_count2 < mm_optimization_core_count) mm_optimization_core_count = mm_optimization_core_count2;
1297 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count,
"Core Count above which the optimized neighbor discovery is used");
1298 bool isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
1299 bool overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
1301 labelList_subList.set(
"isMatrixMatrix_TransferAndFillComplete", isMM,
1302 "This parameter should be set to true only for MatrixMatrix operations: the optimization in Epetra that was ported to Tpetra does _not_ take into account the possibility that for any given source PID, a particular GID may not exist on the target PID: i.e. a transfer operation. A fix for this general case is in development.");
1303 labelList.set(
"compute global constants", params->get(
"compute global constants",
true));
1304 labelList.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
1306 Ctemp->exportAndFillComplete(Crcp,
1307 *Ctemp->getGraph()->getExporter(),
1310 rcp(&labelList,
false));
1312#ifdef HAVE_TPETRA_MMM_STATISTICS
1313 printMultiplicationStatistics(Ctemp->getGraph()->getExporter(), label + std::string(
" AT_B MMM"));
1319template <
class Scalar,
1321 class GlobalOrdinal,
1324 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1325 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1326 CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1327 const std::string& ,
1328 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1329 using Teuchos::Array;
1330 using Teuchos::ArrayRCP;
1331 using Teuchos::ArrayView;
1332 using Teuchos::null;
1333 using Teuchos::OrdinalTraits;
1335 bool skipExplicitZero =
true;
1336 if (params && params->isParameter(
"MM Skip Explicit Zeros")) {
1337 skipExplicitZero = params->get<
bool>(
"MM Skip Explicit Zeros");
1340 typedef Teuchos::ScalarTraits<Scalar> STS;
1342 LocalOrdinal C_firstCol = Bview.
colMap->getMinLocalIndex();
1343 LocalOrdinal C_lastCol = Bview.colMap->getMaxLocalIndex();
1345 LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
1346 LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
1348 ArrayView<const GlobalOrdinal> bcols = Bview.colMap->getLocalElementList();
1349 ArrayView<const GlobalOrdinal> bcols_import = null;
1350 if (Bview.importColMap != null) {
1351 C_firstCol_import = Bview.importColMap->getMinLocalIndex();
1352 C_lastCol_import = Bview.importColMap->getMaxLocalIndex();
1354 bcols_import = Bview.importColMap->getLocalElementList();
1357 size_t C_numCols = C_lastCol - C_firstCol +
1358 OrdinalTraits<LocalOrdinal>::one();
1359 size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
1360 OrdinalTraits<LocalOrdinal>::one();
1362 if (C_numCols_import > C_numCols)
1363 C_numCols = C_numCols_import;
1365 Array<Scalar> dwork = Array<Scalar>(C_numCols);
1366 Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
1367 Array<size_t> iwork2 = Array<size_t>(C_numCols);
1369 Array<Scalar> C_row_i = dwork;
1370 Array<GlobalOrdinal> C_cols = iwork;
1371 Array<size_t> c_index = iwork2;
1372 Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2 * C_numCols);
1373 Array<Scalar> combined_values = Array<Scalar>(2 * C_numCols);
1375 size_t C_row_i_length, j, k, last_index;
1378 LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
1379 Array<LocalOrdinal> Acol2Brow(Aview.colMap->getLocalNumElements(), LO_INVALID);
1380 Array<LocalOrdinal> Acol2Irow(Aview.colMap->getLocalNumElements(), LO_INVALID);
1381 if (Aview.colMap->isSameAs(*Bview.origMatrix->getRowMap())) {
1383 for (LocalOrdinal i = Aview.colMap->getMinLocalIndex(); i <=
1384 Aview.colMap->getMaxLocalIndex();
1389 for (LocalOrdinal i = Aview.colMap->getMinLocalIndex(); i <=
1390 Aview.colMap->getMaxLocalIndex();
1392 GlobalOrdinal GID = Aview.colMap->getGlobalElement(i);
1393 LocalOrdinal BLID = Bview.origMatrix->getRowMap()->getLocalElement(GID);
1394 if (BLID != LO_INVALID)
1395 Acol2Brow[i] = BLID;
1397 Acol2Irow[i] = Bview.importMatrix->getRowMap()->getLocalElement(GID);
1407 auto Arowptr = Aview.origMatrix->getLocalRowPtrsHost();
1408 auto Acolind = Aview.origMatrix->getLocalIndicesHost();
1409 auto Avals = Aview.origMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1410 auto Browptr = Bview.origMatrix->getLocalRowPtrsHost();
1411 auto Bcolind = Bview.origMatrix->getLocalIndicesHost();
1412 auto Bvals = Bview.origMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1413 decltype(Browptr) Irowptr;
1414 decltype(Bcolind) Icolind;
1415 decltype(Bvals) Ivals;
1416 if (!Bview.importMatrix.is_null()) {
1417 Irowptr = Bview.importMatrix->getLocalRowPtrsHost();
1418 Icolind = Bview.importMatrix->getLocalIndicesHost();
1419 Ivals = Bview.importMatrix->getLocalValuesHost(Tpetra::Access::ReadOnly);
1422 bool C_filled = C.isFillComplete();
1424 for (
size_t i = 0; i < C_numCols; i++)
1425 c_index[i] = OrdinalTraits<size_t>::invalid();
1428 size_t Arows = Aview.rowMap->getLocalNumElements();
1429 for (
size_t i = 0; i < Arows; ++i) {
1432 GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i);
1438 C_row_i_length = OrdinalTraits<size_t>::zero();
1440 for (k = Arowptr[i]; k < Arowptr[i + 1]; ++k) {
1441 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1442 const Scalar Aval = Avals[k];
1443 if (Aval == STS::zero() && skipExplicitZero)
1446 if (Ak == LO_INVALID)
1449 for (j = Browptr[Ak]; j < Browptr[Ak + 1]; ++j) {
1450 LocalOrdinal col = Bcolind[j];
1453 if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
1456 C_row_i[C_row_i_length] = Aval * Bvals[j];
1457 C_cols[C_row_i_length] = col;
1458 c_index[col] = C_row_i_length;
1463 C_row_i[c_index[col]] += Aval *
static_cast<Scalar
>(Bvals[j]);
1468 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1469 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1470 C_cols[ii] = bcols[C_cols[ii]];
1471 combined_index[ii] = C_cols[ii];
1472 combined_values[ii] = C_row_i[ii];
1474 last_index = C_row_i_length;
1480 C_row_i_length = OrdinalTraits<size_t>::zero();
1482 for (k = Arowptr[i]; k < Arowptr[i + 1]; ++k) {
1483 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1484 const Scalar Aval = Avals[k];
1485 if (Aval == STS::zero() && skipExplicitZero)
1488 if (Ak != LO_INVALID)
continue;
1490 Ak = Acol2Irow[Acolind[k]];
1491 for (j = Irowptr[Ak]; j < Irowptr[Ak + 1]; ++j) {
1492 LocalOrdinal col = Icolind[j];
1495 if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
1498 C_row_i[C_row_i_length] = Aval * Ivals[j];
1499 C_cols[C_row_i_length] = col;
1500 c_index[col] = C_row_i_length;
1506 C_row_i[c_index[col]] += Aval *
static_cast<Scalar
>(Ivals[j]);
1511 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1512 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1513 C_cols[ii] = bcols_import[C_cols[ii]];
1514 combined_index[last_index] = C_cols[ii];
1515 combined_values[last_index] = C_row_i[ii];
1521 C_filled ? C.sumIntoGlobalValues(
1523 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1524 combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
1525 : C.insertGlobalValues(
1527 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1528 combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
1533template <
class Scalar,
1535 class GlobalOrdinal,
1537void setMaxNumEntriesPerRow(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview) {
1538 typedef typename Teuchos::Array<Teuchos::ArrayView<const LocalOrdinal>>::size_type local_length_size;
1539 Mview.maxNumRowEntries = Teuchos::OrdinalTraits<local_length_size>::zero();
1541 if (Mview.indices.size() > Teuchos::OrdinalTraits<local_length_size>::zero()) {
1542 Mview.maxNumRowEntries = Mview.indices[0].size();
1544 for (local_length_size i = 1; i < Mview.indices.size(); ++i)
1545 if (Mview.indices[i].size() > Mview.maxNumRowEntries)
1546 Mview.maxNumRowEntries = Mview.indices[i].size();
1551template <
class CrsMatrixType>
1552size_t C_estimate_nnz(CrsMatrixType& A, CrsMatrixType& B) {
1554 size_t Aest = 100, Best = 100;
1555 if (A.getLocalNumEntries() >= A.getLocalNumRows())
1556 Aest = (A.getLocalNumRows() > 0) ? A.getLocalNumEntries() / A.getLocalNumRows() : 100;
1557 if (B.getLocalNumEntries() >= B.getLocalNumRows())
1558 Best = (B.getLocalNumRows() > 0) ? B.getLocalNumEntries() / B.getLocalNumRows() : 100;
1560 size_t nnzperrow = (size_t)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
1561 nnzperrow *= nnzperrow;
1563 return (
size_t)(A.getLocalNumRows() * nnzperrow * 0.75 + 100);
1572template <
class Scalar,
1574 class GlobalOrdinal,
1576void mult_A_B_newmatrix(
1577 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1578 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1579 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1580 const std::string& label,
1581 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1582 using Teuchos::Array;
1583 using Teuchos::ArrayRCP;
1584 using Teuchos::ArrayView;
1589 typedef LocalOrdinal LO;
1590 typedef GlobalOrdinal GO;
1592 typedef Import<LO, GO, NO> import_type;
1593 typedef Map<LO, GO, NO> map_type;
1596 typedef typename map_type::local_map_type local_map_type;
1598 typedef typename KCRS::StaticCrsGraphType graph_t;
1599 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1600 typedef typename NO::execution_space execution_space;
1601 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1602 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1606 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1609 RCP<const import_type> Cimport;
1610 RCP<const map_type> Ccolmap;
1611 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1612 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1613 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
1614 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
1615 local_map_type Irowmap_local;
1616 if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
1617 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
1618 local_map_type Icolmap_local;
1619 if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
1626 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements()), Icol2Ccol;
1628 if (Bview.importMatrix.is_null()) {
1631 Ccolmap = Bview.colMap;
1632 const LO colMapSize =
static_cast<LO
>(Bview.colMap->getLocalNumElements());
1634 Kokkos::parallel_for(
1635 "Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1636 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1637 KOKKOS_LAMBDA(
const LO i) {
1649 if (!Bimport.is_null() && !Iimport.is_null()) {
1650 Cimport = Bimport->setUnion(*Iimport);
1651 }
else if (!Bimport.is_null() && Iimport.is_null()) {
1652 Cimport = Bimport->setUnion();
1653 }
else if (Bimport.is_null() && !Iimport.is_null()) {
1654 Cimport = Iimport->setUnion();
1656 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1658 Ccolmap = Cimport->getTargetMap();
1663 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
1664 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1671 Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
1672 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
1673 Kokkos::parallel_for(
1674 "Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement", range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
1675 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
1677 Kokkos::parallel_for(
1678 "Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement", range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
1679 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
1687 C.replaceColMap(Ccolmap);
1705 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"), Aview.colMap->getLocalNumElements());
1706 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"), Aview.colMap->getLocalNumElements());
1708 Kokkos::parallel_for(
1709 "Tpetra::mult_A_B_newmatrix::construct_tables", range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1), KOKKOS_LAMBDA(
const LO i) {
1710 GO aidx = Acolmap_local.getGlobalElement(i);
1711 LO B_LID = Browmap_local.getLocalElement(aidx);
1712 if (B_LID != LO_INVALID) {
1713 targetMapToOrigRow(i) = B_LID;
1714 targetMapToImportRow(i) = LO_INVALID;
1716 LO I_LID = Irowmap_local.getLocalElement(aidx);
1717 targetMapToOrigRow(i) = LO_INVALID;
1718 targetMapToImportRow(i) = I_LID;
1724 KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Node, lo_view_t>::mult_A_B_newmatrix_kernel_wrapper(Aview, Bview, targetMapToOrigRow, targetMapToImportRow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
1729template <
class Scalar,
1731 class GlobalOrdinal,
1733void mult_A_B_newmatrix(BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1734 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1735 Teuchos::RCP<BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& C) {
1736 using Teuchos::Array;
1737 using Teuchos::ArrayRCP;
1738 using Teuchos::ArrayView;
1739 using Teuchos::null;
1744 typedef LocalOrdinal LO;
1745 typedef GlobalOrdinal GO;
1747 typedef Import<LO, GO, NO> import_type;
1748 typedef Map<LO, GO, NO> map_type;
1749 typedef BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> block_crs_matrix_type;
1750 typedef typename block_crs_matrix_type::crs_graph_type graph_t;
1753 typedef typename map_type::local_map_type local_map_type;
1754 typedef typename block_crs_matrix_type::local_matrix_device_type KBSR;
1755 typedef typename KBSR::device_type device_t;
1756 typedef typename KBSR::StaticCrsGraphType static_graph_t;
1757 typedef typename static_graph_t::row_map_type::non_const_type lno_view_t;
1758 typedef typename static_graph_t::entries_type::non_const_type lno_nnz_view_t;
1759 typedef typename KBSR::values_type::non_const_type scalar_view_t;
1760 typedef typename NO::execution_space execution_space;
1761 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1762 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1764 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1767 RCP<const import_type> Cimport;
1768 RCP<const map_type> Ccolmap;
1769 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1770 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1771 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
1772 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
1773 local_map_type Irowmap_local;
1774 if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
1775 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
1776 local_map_type Icolmap_local;
1777 if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
1784 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements()), Icol2Ccol;
1786 if (Bview.importMatrix.is_null()) {
1789 Ccolmap = Bview.colMap;
1790 const LO colMapSize =
static_cast<LO
>(Bview.colMap->getLocalNumElements());
1792 Kokkos::parallel_for(
1793 "Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1794 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1795 KOKKOS_LAMBDA(
const LO i) {
1807 if (!Bimport.is_null() && !Iimport.is_null()) {
1808 Cimport = Bimport->setUnion(*Iimport);
1809 }
else if (!Bimport.is_null() && Iimport.is_null()) {
1810 Cimport = Bimport->setUnion();
1811 }
else if (Bimport.is_null() && !Iimport.is_null()) {
1812 Cimport = Iimport->setUnion();
1814 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1816 Ccolmap = Cimport->getTargetMap();
1823 Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
1824 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
1825 Kokkos::parallel_for(
1826 "Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement",
1827 range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()),
1828 KOKKOS_LAMBDA(
const LO i) {
1829 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
1831 Kokkos::parallel_for(
1832 "Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement",
1833 range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()),
1834 KOKKOS_LAMBDA(
const LO i) {
1835 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
1855 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),
1856 Aview.colMap->getLocalNumElements());
1857 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),
1858 Aview.colMap->getLocalNumElements());
1860 Kokkos::parallel_for(
1861 "Tpetra::mult_A_B_newmatrix::construct_tables",
1862 range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1),
1863 KOKKOS_LAMBDA(
const LO i) {
1864 GO aidx = Acolmap_local.getGlobalElement(i);
1865 LO B_LID = Browmap_local.getLocalElement(aidx);
1866 if (B_LID != LO_INVALID) {
1867 targetMapToOrigRow(i) = B_LID;
1868 targetMapToImportRow(i) = LO_INVALID;
1870 LO I_LID = Irowmap_local.getLocalElement(aidx);
1871 targetMapToOrigRow(i) = LO_INVALID;
1872 targetMapToImportRow(i) = I_LID;
1877 using KernelHandle =
1878 KokkosKernels::Experimental::KokkosKernelsHandle<
typename lno_view_t::const_value_type,
1879 typename lno_nnz_view_t::const_value_type,
1880 typename scalar_view_t::const_value_type,
1881 typename device_t::execution_space,
1882 typename device_t::memory_space,
1883 typename device_t::memory_space>;
1884 int team_work_size = 16;
1885 std::string myalg(
"SPGEMM_KK_MEMORY");
1886 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
1889 kh.create_spgemm_handle(alg_enum);
1890 kh.set_team_work_size(team_work_size);
1893 const KBSR Amat = Aview.origMatrix->getLocalMatrixDevice();
1894 const KBSR Bmerged = Tpetra::MMdetails::merge_matrices(Aview, Bview,
1895 targetMapToOrigRow, targetMapToImportRow,
1896 Bcol2Ccol, Icol2Ccol,
1897 Ccolmap.getConst()->getLocalNumElements());
1899 RCP<graph_t> graphC;
1900 typename KBSR::values_type values;
1905 KokkosSparse::block_spgemm_symbolic(kh, Amat,
false, Bmerged,
false, Cmat);
1906 KokkosSparse::block_spgemm_numeric(kh, Amat,
false, Bmerged,
false, Cmat);
1907 kh.destroy_spgemm_handle();
1910 graphC = rcp(
new graph_t(Cmat.graph, Aview.origMatrix->getRowMap(), Ccolmap.getConst()));
1911 values = Cmat.values;
1913 C = rcp(
new block_crs_matrix_type(*graphC, values, Aview.blocksize));
1918template <
class Scalar,
1920 class GlobalOrdinal,
1922 class LocalOrdinalViewType>
1923void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
1924 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
1925 const LocalOrdinalViewType& targetMapToOrigRow,
1926 const LocalOrdinalViewType& targetMapToImportRow,
1927 const LocalOrdinalViewType& Bcol2Ccol,
1928 const LocalOrdinalViewType& Icol2Ccol,
1929 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1930 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> Cimport,
1931 const std::string& label,
1932 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1933 using Teuchos::Array;
1934 using Teuchos::ArrayRCP;
1935 using Teuchos::ArrayView;
1942 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
1943 typedef typename KCRS::StaticCrsGraphType graph_t;
1944 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
1945 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1946 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1947 typedef typename KCRS::values_type::non_const_type scalar_view_t;
1950 typedef LocalOrdinal LO;
1951 typedef GlobalOrdinal GO;
1953 typedef Map<LO, GO, NO> map_type;
1954 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1955 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1956 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1958 bool skipExplicitZero =
true;
1959 if (params && params->isParameter(
"MM Skip Explicit Zeros")) {
1960 skipExplicitZero = params->get<
bool>(
"MM Skip Explicit Zeros");
1964 RCP<const map_type> Ccolmap = C.getColMap();
1965 size_t m = Aview.origMatrix->getLocalNumRows();
1966 size_t n = Ccolmap->getLocalNumElements();
1967 size_t b_max_nnz_per_row = Bview.origMatrix->getLocalMaxNumRowEntries();
1970 const KCRS Amat = Aview.origMatrix->getLocalMatrixHost();
1971 const KCRS Bmat = Bview.origMatrix->getLocalMatrixHost();
1973 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
1974 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
1975 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
1977 c_lno_view_t Irowptr;
1978 lno_nnz_view_t Icolind;
1979 scalar_view_t Ivals;
1980 if (!Bview.importMatrix.is_null()) {
1981 auto lclB = Bview.importMatrix->getLocalMatrixHost();
1982 Irowptr = lclB.graph.row_map;
1983 Icolind = lclB.graph.entries;
1984 Ivals = lclB.values;
1985 b_max_nnz_per_row = std::max(b_max_nnz_per_row, Bview.importMatrix->getLocalMaxNumRowEntries());
1995 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
1996 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"), m + 1);
1997 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"), CSR_alloc);
1998 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"), CSR_alloc);
2008 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2009 std::vector<size_t> c_status(n, ST_INVALID);
2019 size_t CSR_ip = 0, OLD_ip = 0;
2020 for (
size_t i = 0; i < m; i++) {
2023 Crowptr[i] = CSR_ip;
2026 for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
2027 LO Aik = Acolind[k];
2028 const SC Aval = Avals[k];
2029 if (Aval == SC_ZERO && skipExplicitZero)
2032 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2039 size_t Bk =
static_cast<size_t>(targetMapToOrigRow[Aik]);
2042 for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
2043 LO Bkj = Bcolind[j];
2044 LO Cij = Bcol2Ccol[Bkj];
2046 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2048 c_status[Cij] = CSR_ip;
2049 Ccolind[CSR_ip] = Cij;
2050 Cvals[CSR_ip] = Aval * Bvals[j];
2054 Cvals[c_status[Cij]] += Aval * Bvals[j];
2065 size_t Ik =
static_cast<size_t>(targetMapToImportRow[Aik]);
2066 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
2067 LO Ikj = Icolind[j];
2068 LO Cij = Icol2Ccol[Ikj];
2070 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2072 c_status[Cij] = CSR_ip;
2073 Ccolind[CSR_ip] = Cij;
2074 Cvals[CSR_ip] = Aval * Ivals[j];
2077 Cvals[c_status[Cij]] += Aval * Ivals[j];
2084 if (i + 1 < m && CSR_ip + std::min(n, (Arowptr[i + 2] - Arowptr[i + 1]) * b_max_nnz_per_row) > CSR_alloc) {
2086 Kokkos::resize(Ccolind, CSR_alloc);
2087 Kokkos::resize(Cvals, CSR_alloc);
2092 Crowptr[m] = CSR_ip;
2095 Kokkos::resize(Ccolind, CSR_ip);
2096 Kokkos::resize(Cvals, CSR_ip);
2102 if (params.is_null() || params->get(
"sort entries",
true)) {
2104 Import_Util::sortCrsEntries(Crowptr, Ccolind, Cvals);
2106 C.setAllValues(Crowptr, Ccolind, Cvals);
2119 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2120 labelList->set(
"Timer Label", label);
2121 if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants",
true));
2122 RCP<const Export<LO, GO, NO>> dummyExport;
2123 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
2128template <
class Scalar,
2130 class GlobalOrdinal,
2133 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2134 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2135 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2136 const std::string& label,
2137 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2138 using Teuchos::Array;
2139 using Teuchos::ArrayRCP;
2140 using Teuchos::ArrayView;
2145 typedef LocalOrdinal LO;
2146 typedef GlobalOrdinal GO;
2148 typedef Import<LO, GO, NO> import_type;
2149 typedef Map<LO, GO, NO> map_type;
2152 typedef typename map_type::local_map_type local_map_type;
2154 typedef typename KCRS::StaticCrsGraphType graph_t;
2155 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2156 typedef typename NO::execution_space execution_space;
2157 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2158 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2163 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2166 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2167 RCP<const map_type> Ccolmap = C.getColMap();
2168 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2169 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2170 local_map_type Irowmap_local;
2171 if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2172 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2173 local_map_type Icolmap_local;
2174 if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2175 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2178 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements()), Icol2Ccol;
2182 Kokkos::parallel_for(
2183 range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
2184 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2187 if (!Bview.importMatrix.is_null()) {
2188 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2189 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
2191 Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
2192 Kokkos::parallel_for(
2193 range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
2194 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2200 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"), Aview.colMap->getLocalNumElements());
2201 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"), Aview.colMap->getLocalNumElements());
2202 Kokkos::parallel_for(
2203 range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1), KOKKOS_LAMBDA(
const LO i) {
2204 GO aidx = Acolmap_local.getGlobalElement(i);
2205 LO B_LID = Browmap_local.getLocalElement(aidx);
2206 if (B_LID != LO_INVALID) {
2207 targetMapToOrigRow(i) = B_LID;
2208 targetMapToImportRow(i) = LO_INVALID;
2210 LO I_LID = Irowmap_local.getLocalElement(aidx);
2211 targetMapToOrigRow(i) = LO_INVALID;
2212 targetMapToImportRow(i) = I_LID;
2218 KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Node, lo_view_t>::mult_A_B_reuse_kernel_wrapper(Aview, Bview, targetMapToOrigRow, targetMapToImportRow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
2222template <
class Scalar,
2224 class GlobalOrdinal,
2226 class LocalOrdinalViewType>
2227void KernelWrappers<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2228 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2229 const LocalOrdinalViewType& targetMapToOrigRow,
2230 const LocalOrdinalViewType& targetMapToImportRow,
2231 const LocalOrdinalViewType& Bcol2Ccol,
2232 const LocalOrdinalViewType& Icol2Ccol,
2233 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2234 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> ,
2235 const std::string& label,
2236 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2241 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
2242 typedef typename KCRS::StaticCrsGraphType graph_t;
2243 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2244 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2245 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2248 typedef LocalOrdinal LO;
2249 typedef GlobalOrdinal GO;
2251 typedef Map<LO, GO, NO> map_type;
2252 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2253 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2254 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2258 bool throwOnInsert =
true;
2259 if (!params.is_null() && params->isType<
bool>(
"MM Throw For Non-Existent Entries"))
2260 throwOnInsert = params->get<
bool>(
"MM Throw For Non-Existent Entries");
2266 RCP<const map_type> Ccolmap = C.getColMap();
2267 size_t m = Aview.origMatrix->getLocalNumRows();
2268 size_t n = Ccolmap->getLocalNumElements();
2271 const KCRS Amat = Aview.origMatrix->getLocalMatrixHost();
2272 const KCRS Bmat = Bview.origMatrix->getLocalMatrixHost();
2273 const KCRS Cmat = C.getLocalMatrixHost();
2275 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2276 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2277 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2278 scalar_view_t Cvals = Cmat.values;
2280 c_lno_view_t Irowptr;
2281 lno_nnz_view_t Icolind;
2282 scalar_view_t Ivals;
2283 if (!Bview.importMatrix.is_null()) {
2284 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2285 Irowptr = lclB.graph.row_map;
2286 Icolind = lclB.graph.entries;
2287 Ivals = lclB.values;
2299 std::vector<size_t> c_status(n, ST_INVALID);
2302 size_t CSR_ip = 0, OLD_ip = 0;
2303 for (
size_t i = 0; i < m; i++) {
2306 OLD_ip = Crowptr[i];
2307 CSR_ip = Crowptr[i + 1];
2308 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2309 c_status[Ccolind[k]] = k;
2315 for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
2316 LO Aik = Acolind[k];
2317 const SC Aval = Avals[k];
2318 if (Aval == SC_ZERO)
2321 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2323 size_t Bk =
static_cast<size_t>(targetMapToOrigRow[Aik]);
2325 for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
2326 LO Bkj = Bcolind[j];
2327 LO Cij = Bcol2Ccol[Bkj];
2329 const bool badInsert = c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip;
2331 Cvals[c_status[Cij]] += Aval * Bvals[j];
2332 else if (throwOnInsert)
2333 TEUCHOS_TEST_FOR_EXCEPTION(badInsert,
2334 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph "
2335 <<
"(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2340 size_t Ik =
static_cast<size_t>(targetMapToImportRow[Aik]);
2341 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
2342 LO Ikj = Icolind[j];
2343 LO Cij = Icol2Ccol[Ikj];
2345 const bool badInsert = c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip;
2347 Cvals[c_status[Cij]] += Aval * Ivals[j];
2348 else if (throwOnInsert)
2349 TEUCHOS_TEST_FOR_EXCEPTION(badInsert,
2350 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph "
2351 <<
"(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2359 C.fillComplete(C.getDomainMap(), C.getRangeMap());
2365template <
class Scalar,
2367 class GlobalOrdinal,
2369void jacobi_A_B_newmatrix(
2370 typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
2371 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Dinv,
2372 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2373 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2374 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2375 const std::string& label,
2376 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2377 using Teuchos::Array;
2378 using Teuchos::ArrayRCP;
2379 using Teuchos::ArrayView;
2383 typedef LocalOrdinal LO;
2384 typedef GlobalOrdinal GO;
2387 typedef Import<LO, GO, NO> import_type;
2388 typedef Map<LO, GO, NO> map_type;
2389 typedef typename map_type::local_map_type local_map_type;
2393 typedef typename KCRS::StaticCrsGraphType graph_t;
2394 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2395 typedef typename NO::execution_space execution_space;
2396 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2397 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2401 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2404 RCP<const import_type> Cimport;
2405 RCP<const map_type> Ccolmap;
2406 RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
2407 RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
2408 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2409 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2410 local_map_type Irowmap_local;
2411 if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2412 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2413 local_map_type Icolmap_local;
2414 if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2421 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements()), Icol2Ccol;
2423 if (Bview.importMatrix.is_null()) {
2426 Ccolmap = Bview.colMap;
2430 Kokkos::RangePolicy<execution_space, LO> range(0,
static_cast<LO
>(Bview.colMap->getLocalNumElements()));
2431 Kokkos::parallel_for(
2432 range, KOKKOS_LAMBDA(
const size_t i) {
2433 Bcol2Ccol(i) =
static_cast<LO
>(i);
2444 if (!Bimport.is_null() && !Iimport.is_null()) {
2445 Cimport = Bimport->setUnion(*Iimport);
2446 Ccolmap = Cimport->getTargetMap();
2448 }
else if (!Bimport.is_null() && Iimport.is_null()) {
2449 Cimport = Bimport->setUnion();
2451 }
else if (Bimport.is_null() && !Iimport.is_null()) {
2452 Cimport = Iimport->setUnion();
2455 throw std::runtime_error(
"TpetraExt::Jacobi status of matrix importers is nonsensical");
2457 Ccolmap = Cimport->getTargetMap();
2459 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2460 std::runtime_error,
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
2467 Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
2468 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2469 Kokkos::parallel_for(
2470 range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
2471 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2473 Kokkos::parallel_for(
2474 range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
2475 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2483 C.replaceColMap(Ccolmap);
2501 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"), Aview.colMap->getLocalNumElements());
2502 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"), Aview.colMap->getLocalNumElements());
2503 Kokkos::parallel_for(
2504 range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1), KOKKOS_LAMBDA(
const LO i) {
2505 GO aidx = Acolmap_local.getGlobalElement(i);
2506 LO B_LID = Browmap_local.getLocalElement(aidx);
2507 if (B_LID != LO_INVALID) {
2508 targetMapToOrigRow(i) = B_LID;
2509 targetMapToImportRow(i) = LO_INVALID;
2511 LO I_LID = Irowmap_local.getLocalElement(aidx);
2512 targetMapToOrigRow(i) = LO_INVALID;
2513 targetMapToImportRow(i) = I_LID;
2519 KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Node, lo_view_t>::jacobi_A_B_newmatrix_kernel_wrapper(omega, Dinv, Aview, Bview, targetMapToOrigRow, targetMapToImportRow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
2526template <
class Scalar,
2528 class GlobalOrdinal,
2530 class LocalOrdinalViewType>
2531void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(
typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
2532 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Dinv,
2533 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2534 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2535 const LocalOrdinalViewType& targetMapToOrigRow,
2536 const LocalOrdinalViewType& targetMapToImportRow,
2537 const LocalOrdinalViewType& Bcol2Ccol,
2538 const LocalOrdinalViewType& Icol2Ccol,
2539 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2540 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> Cimport,
2541 const std::string& label,
2542 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2545 using Teuchos::Array;
2546 using Teuchos::ArrayRCP;
2547 using Teuchos::ArrayView;
2552 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
2553 typedef typename KCRS::StaticCrsGraphType graph_t;
2554 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2555 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2556 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2557 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2560 typedef typename scalar_view_t::memory_space scalar_memory_space;
2563 typedef LocalOrdinal LO;
2564 typedef GlobalOrdinal GO;
2567 typedef Map<LO, GO, NO> map_type;
2568 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2569 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2572 RCP<const map_type> Ccolmap = C.getColMap();
2573 size_t m = Aview.origMatrix->getLocalNumRows();
2574 size_t n = Ccolmap->getLocalNumElements();
2575 size_t b_max_nnz_per_row = Bview.origMatrix->getLocalMaxNumRowEntries();
2578 const KCRS Amat = Aview.origMatrix->getLocalMatrixHost();
2579 const KCRS Bmat = Bview.origMatrix->getLocalMatrixHost();
2581 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2582 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2583 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2585 c_lno_view_t Irowptr;
2586 lno_nnz_view_t Icolind;
2587 scalar_view_t Ivals;
2588 if (!Bview.importMatrix.is_null()) {
2589 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2590 Irowptr = lclB.graph.row_map;
2591 Icolind = lclB.graph.entries;
2592 Ivals = lclB.values;
2593 b_max_nnz_per_row = std::max(b_max_nnz_per_row, Bview.importMatrix->getLocalMaxNumRowEntries());
2598 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
2606 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2607 Array<size_t> c_status(n, ST_INVALID);
2616 size_t CSR_alloc = std::max(C_estimate_nnz(*Aview.origMatrix, *Bview.origMatrix), n);
2617 lno_view_t Crowptr(Kokkos::ViewAllocateWithoutInitializing(
"Crowptr"), m + 1);
2618 lno_nnz_view_t Ccolind(Kokkos::ViewAllocateWithoutInitializing(
"Ccolind"), CSR_alloc);
2619 scalar_view_t Cvals(Kokkos::ViewAllocateWithoutInitializing(
"Cvals"), CSR_alloc);
2620 size_t CSR_ip = 0, OLD_ip = 0;
2622 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2636 for (
size_t i = 0; i < m; i++) {
2639 Crowptr[i] = CSR_ip;
2640 SC minusOmegaDval = -omega * Dvals(i, 0);
2643 for (
size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
2644 Scalar Bval = Bvals[j];
2645 if (Bval == SC_ZERO)
2647 LO Bij = Bcolind[j];
2648 LO Cij = Bcol2Ccol[Bij];
2651 c_status[Cij] = CSR_ip;
2652 Ccolind[CSR_ip] = Cij;
2653 Cvals[CSR_ip] = Bvals[j];
2658 for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
2659 LO Aik = Acolind[k];
2660 const SC Aval = Avals[k];
2661 if (Aval == SC_ZERO)
2664 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2666 size_t Bk =
static_cast<size_t>(targetMapToOrigRow[Aik]);
2668 for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
2669 LO Bkj = Bcolind[j];
2670 LO Cij = Bcol2Ccol[Bkj];
2672 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2674 c_status[Cij] = CSR_ip;
2675 Ccolind[CSR_ip] = Cij;
2676 Cvals[CSR_ip] = minusOmegaDval * Aval * Bvals[j];
2680 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
2686 size_t Ik =
static_cast<size_t>(targetMapToImportRow[Aik]);
2687 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
2688 LO Ikj = Icolind[j];
2689 LO Cij = Icol2Ccol[Ikj];
2691 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2693 c_status[Cij] = CSR_ip;
2694 Ccolind[CSR_ip] = Cij;
2695 Cvals[CSR_ip] = minusOmegaDval * Aval * Ivals[j];
2698 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
2705 if (i + 1 < m && CSR_ip + std::min(n, (Arowptr[i + 2] - Arowptr[i + 1] + 1) * b_max_nnz_per_row) > CSR_alloc) {
2707 Kokkos::resize(Ccolind, CSR_alloc);
2708 Kokkos::resize(Cvals, CSR_alloc);
2712 Crowptr[m] = CSR_ip;
2715 Kokkos::resize(Ccolind, CSR_ip);
2716 Kokkos::resize(Cvals, CSR_ip);
2725 C.replaceColMap(Ccolmap);
2732 if (params.is_null() || params->get(
"sort entries",
true)) {
2734 Import_Util::sortCrsEntries(Crowptr, Ccolind, Cvals);
2736 C.setAllValues(Crowptr, Ccolind, Cvals);
2749 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2750 labelList->set(
"Timer Label", label);
2751 if (!params.is_null()) labelList->set(
"compute global constants", params->get(
"compute global constants",
true));
2752 RCP<const Export<LO, GO, NO>> dummyExport;
2753 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport, dummyExport, labelList);
2759template <
class Scalar,
2761 class GlobalOrdinal,
2763void jacobi_A_B_reuse(
2764 typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
2765 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Dinv,
2766 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2767 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2768 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2769 const std::string& label,
2770 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2771 using Teuchos::Array;
2772 using Teuchos::ArrayRCP;
2773 using Teuchos::ArrayView;
2777 typedef LocalOrdinal LO;
2778 typedef GlobalOrdinal GO;
2781 typedef Import<LO, GO, NO> import_type;
2782 typedef Map<LO, GO, NO> map_type;
2785 typedef typename map_type::local_map_type local_map_type;
2787 typedef typename KCRS::StaticCrsGraphType graph_t;
2788 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2789 typedef typename NO::execution_space execution_space;
2790 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2791 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2793 RCP<const import_type> Cimport = C.getGraph()->getImporter();
2794 lo_view_t Bcol2Ccol, Icol2Ccol;
2795 lo_view_t targetMapToOrigRow;
2796 lo_view_t targetMapToImportRow;
2800 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2803 RCP<const map_type> Ccolmap = C.getColMap();
2804 local_map_type Acolmap_local = Aview.colMap->getLocalMap();
2805 local_map_type Browmap_local = Bview.origMatrix->getRowMap()->getLocalMap();
2806 local_map_type Irowmap_local;
2807 if (!Bview.importMatrix.is_null()) Irowmap_local = Bview.importMatrix->getRowMap()->getLocalMap();
2808 local_map_type Bcolmap_local = Bview.origMatrix->getColMap()->getLocalMap();
2809 local_map_type Icolmap_local;
2810 if (!Bview.importMatrix.is_null()) Icolmap_local = Bview.importMatrix->getColMap()->getLocalMap();
2811 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2814 Bcol2Ccol = lo_view_t(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"), Bview.colMap->getLocalNumElements());
2818 Kokkos::parallel_for(
2819 range_type(0, Bview.origMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
2820 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2823 if (!Bview.importMatrix.is_null()) {
2824 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()),
2825 std::runtime_error,
"Tpetra::Jacobi: Import setUnion messed with the DomainMap in an unfortunate way");
2827 Kokkos::resize(Icol2Ccol, Bview.importMatrix->getColMap()->getLocalNumElements());
2828 Kokkos::parallel_for(
2829 range_type(0, Bview.importMatrix->getColMap()->getLocalNumElements()), KOKKOS_LAMBDA(
const LO i) {
2830 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2836 targetMapToOrigRow = lo_view_t(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"), Aview.colMap->getLocalNumElements());
2837 targetMapToImportRow = lo_view_t(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"), Aview.colMap->getLocalNumElements());
2838 Kokkos::parallel_for(
2839 range_type(Aview.colMap->getMinLocalIndex(), Aview.colMap->getMaxLocalIndex() + 1), KOKKOS_LAMBDA(
const LO i) {
2840 GO aidx = Acolmap_local.getGlobalElement(i);
2841 LO B_LID = Browmap_local.getLocalElement(aidx);
2842 if (B_LID != LO_INVALID) {
2843 targetMapToOrigRow(i) = B_LID;
2844 targetMapToImportRow(i) = LO_INVALID;
2846 LO I_LID = Irowmap_local.getLocalElement(aidx);
2847 targetMapToOrigRow(i) = LO_INVALID;
2848 targetMapToImportRow(i) = I_LID;
2855 KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Node, lo_view_t>::jacobi_A_B_reuse_kernel_wrapper(omega, Dinv, Aview, Bview, targetMapToOrigRow, targetMapToImportRow, Bcol2Ccol, Icol2Ccol, C, Cimport, label, params);
2859template <
class Scalar,
2861 class GlobalOrdinal,
2863 class LocalOrdinalViewType>
2864void KernelWrappers2<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(
typename Teuchos::ScalarTraits<Scalar>::magnitudeType omega,
2865 const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Dinv,
2866 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
2867 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
2868 const LocalOrdinalViewType& targetMapToOrigRow,
2869 const LocalOrdinalViewType& targetMapToImportRow,
2870 const LocalOrdinalViewType& Bcol2Ccol,
2871 const LocalOrdinalViewType& Icol2Ccol,
2872 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
2873 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> ,
2874 const std::string& label,
2875 const Teuchos::RCP<Teuchos::ParameterList>& ) {
2883 typedef typename Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type KCRS;
2884 typedef typename KCRS::StaticCrsGraphType graph_t;
2885 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2886 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2887 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2888 typedef typename scalar_view_t::memory_space scalar_memory_space;
2891 typedef LocalOrdinal LO;
2892 typedef GlobalOrdinal GO;
2894 typedef Map<LO, GO, NO> map_type;
2895 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2896 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2897 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2900 RCP<const map_type> Ccolmap = C.getColMap();
2901 size_t m = Aview.origMatrix->getLocalNumRows();
2902 size_t n = Ccolmap->getLocalNumElements();
2905 const KCRS Amat = Aview.origMatrix->getLocalMatrixHost();
2906 const KCRS Bmat = Bview.origMatrix->getLocalMatrixHost();
2907 const KCRS Cmat = C.getLocalMatrixHost();
2909 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2910 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2911 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2912 scalar_view_t Cvals = Cmat.values;
2914 c_lno_view_t Irowptr;
2915 lno_nnz_view_t Icolind;
2916 scalar_view_t Ivals;
2917 if (!Bview.importMatrix.is_null()) {
2918 auto lclB = Bview.importMatrix->getLocalMatrixHost();
2919 Irowptr = lclB.graph.row_map;
2920 Icolind = lclB.graph.entries;
2921 Ivals = lclB.values;
2926 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
2933 std::vector<size_t> c_status(n, ST_INVALID);
2936 size_t CSR_ip = 0, OLD_ip = 0;
2937 for (
size_t i = 0; i < m; i++) {
2940 OLD_ip = Crowptr[i];
2941 CSR_ip = Crowptr[i + 1];
2942 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2943 c_status[Ccolind[k]] = k;
2949 SC minusOmegaDval = -omega * Dvals(i, 0);
2952 for (
size_t j = Browptr[i]; j < Browptr[i + 1]; j++) {
2953 Scalar Bval = Bvals[j];
2954 if (Bval == SC_ZERO)
2956 LO Bij = Bcolind[j];
2957 LO Cij = Bcol2Ccol[Bij];
2959 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2960 std::runtime_error,
"Trying to insert a new entry into a static graph");
2962 Cvals[c_status[Cij]] = Bvals[j];
2966 for (
size_t k = Arowptr[i]; k < Arowptr[i + 1]; k++) {
2967 LO Aik = Acolind[k];
2968 const SC Aval = Avals[k];
2969 if (Aval == SC_ZERO)
2972 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2974 size_t Bk =
static_cast<size_t>(targetMapToOrigRow[Aik]);
2976 for (
size_t j = Browptr[Bk]; j < Browptr[Bk + 1]; ++j) {
2977 LO Bkj = Bcolind[j];
2978 LO Cij = Bcol2Ccol[Bkj];
2980 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2981 std::runtime_error,
"Trying to insert a new entry into a static graph");
2983 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
2988 size_t Ik =
static_cast<size_t>(targetMapToImportRow[Aik]);
2989 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik + 1]; ++j) {
2990 LO Ikj = Icolind[j];
2991 LO Cij = Icol2Ccol[Ikj];
2993 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2994 std::runtime_error,
"Trying to insert a new entry into a static graph");
2996 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
3004 C.fillComplete(C.getDomainMap(), C.getRangeMap());
3009template <
class Scalar,
3011 class GlobalOrdinal,
3013void import_and_extract_views(
3014 const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& A,
3015 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node>> targetMap,
3016 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3017 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> prototypeImporter,
3018 bool userAssertsThereAreNoRemotes,
3019 const std::string& label,
3020 const Teuchos::RCP<Teuchos::ParameterList>& params) {
3021 using Teuchos::Array;
3022 using Teuchos::ArrayView;
3023 using Teuchos::null;
3028 typedef LocalOrdinal LO;
3029 typedef GlobalOrdinal GO;
3032 typedef Map<LO, GO, NO> map_type;
3033 typedef Import<LO, GO, NO> import_type;
3034 typedef CrsMatrix<SC, LO, GO, NO> crs_matrix_type;
3044 Aview.deleteContents();
3046 Aview.origMatrix = rcp(&A,
false);
3048 Aview.origMatrix->getApplyHelper();
3049 Aview.origRowMap = A.getRowMap();
3050 Aview.rowMap = targetMap;
3051 Aview.colMap = A.getColMap();
3052 Aview.domainMap = A.getDomainMap();
3053 Aview.importColMap = null;
3054 RCP<const map_type> rowMap = A.getRowMap();
3055 const int numProcs = rowMap->getComm()->getSize();
3058 if (userAssertsThereAreNoRemotes || numProcs < 2)
3061 RCP<const import_type> importer;
3062 if (params != null && params->isParameter(
"importer")) {
3063 importer = params->get<RCP<const import_type>>(
"importer");
3071 RCP<const map_type> remoteRowMap;
3072 size_t numRemote = 0;
3074 if (!prototypeImporter.is_null() &&
3075 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3076 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3080 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3081 numRemote = prototypeImporter->getNumRemoteIDs();
3083 Array<GO> remoteRows(numRemote);
3084 for (
size_t i = 0; i < numRemote; i++)
3085 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3087 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3088 rowMap->getIndexBase(), rowMap->getComm(), params));
3091 }
else if (prototypeImporter.is_null()) {
3095 ArrayView<const GO> rows = targetMap->getLocalElementList();
3096 size_t numRows = targetMap->getLocalNumElements();
3098 Array<GO> remoteRows(numRows);
3099 for (
size_t i = 0; i < numRows; ++i) {
3100 const LO mlid = rowMap->getLocalElement(rows[i]);
3102 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3103 remoteRows[numRemote++] = rows[i];
3105 remoteRows.resize(numRemote);
3106 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3107 rowMap->getIndexBase(), rowMap->getComm(), params));
3116 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3117 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3125 if (!remoteRowMap.is_null() && (remoteRowMap->getGlobalNumElements() > 0)) {
3131 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3133 importer = rcp(
new import_type(rowMap, remoteRowMap));
3135 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match A.getRowMap()!");
3139 params->set(
"importer", importer);
3142 if (importer != null) {
3147 Teuchos::ParameterList labelList;
3148 labelList.set(
"Timer Label", label);
3149 auto& labelList_subList = labelList.sublist(
"matrixmatrix: kernel params",
false);
3152 bool overrideAllreduce =
false;
3155 Teuchos::ParameterList params_sublist;
3156 if (!params.is_null()) {
3157 labelList.set(
"compute global constants", params->get(
"compute global constants",
false));
3158 params_sublist = params->sublist(
"matrixmatrix: kernel params",
false);
3159 mm_optimization_core_count = params_sublist.get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
3160 int mm_optimization_core_count2 = params->get(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
3161 if (mm_optimization_core_count2 < mm_optimization_core_count) mm_optimization_core_count = mm_optimization_core_count2;
3162 isMM = params_sublist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
3163 overrideAllreduce = params_sublist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
3165 labelList_subList.set(
"isMatrixMatrix_TransferAndFillComplete", isMM);
3166 labelList_subList.set(
"MM_TAFC_OptimizationCoreCount", mm_optimization_core_count);
3167 labelList_subList.set(
"MM_TAFC_OverrideAllreduceCheck", overrideAllreduce);
3170 A.getDomainMap(), importer->getTargetMap(), rcpFromRef(labelList));
3172 Aview.importMatrix->getApplyHelper();
3178 sprintf(str,
"import_matrix.%d.dat",count);
3183#ifdef HAVE_TPETRA_MMM_STATISTICS
3184 printMultiplicationStatistics(importer, label + std::string(
" I&X MMM"));
3191 Aview.importColMap = Aview.importMatrix->getColMap();
3197template <
class Scalar,
3199 class GlobalOrdinal,
3201void import_and_extract_views(
3202 const BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& M,
3203 Teuchos::RCP<
const Map<LocalOrdinal, GlobalOrdinal, Node>> targetMap,
3204 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Mview,
3205 Teuchos::RCP<
const Import<LocalOrdinal, GlobalOrdinal, Node>> prototypeImporter,
3206 bool userAssertsThereAreNoRemotes) {
3207 using Teuchos::Array;
3208 using Teuchos::ArrayView;
3209 using Teuchos::null;
3214 typedef LocalOrdinal LO;
3215 typedef GlobalOrdinal GO;
3218 typedef Map<LO, GO, NO> map_type;
3219 typedef Import<LO, GO, NO> import_type;
3220 typedef BlockCrsMatrix<SC, LO, GO, NO> blockcrs_matrix_type;
3228 Mview.deleteContents();
3232 Mview.origMatrix->getApplyHelper();
3233 Mview.origRowMap = M.getRowMap();
3234 Mview.rowMap = targetMap;
3235 Mview.colMap = M.getColMap();
3236 Mview.importColMap = null;
3237 RCP<const map_type> rowMap = M.getRowMap();
3238 const int numProcs = rowMap->getComm()->getSize();
3241 if (userAssertsThereAreNoRemotes || numProcs < 2)
return;
3245 RCP<const map_type> remoteRowMap;
3246 size_t numRemote = 0;
3248 if (!prototypeImporter.is_null() &&
3249 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3250 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3252 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3253 numRemote = prototypeImporter->getNumRemoteIDs();
3255 Array<GO> remoteRows(numRemote);
3256 for (
size_t i = 0; i < numRemote; i++)
3257 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3259 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3260 rowMap->getIndexBase(), rowMap->getComm()));
3263 }
else if (prototypeImporter.is_null()) {
3265 ArrayView<const GO> rows = targetMap->getLocalElementList();
3266 size_t numRows = targetMap->getLocalNumElements();
3268 Array<GO> remoteRows(numRows);
3269 for (
size_t i = 0; i < numRows; ++i) {
3270 const LO mlid = rowMap->getLocalElement(rows[i]);
3272 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3273 remoteRows[numRemote++] = rows[i];
3275 remoteRows.resize(numRemote);
3276 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3277 rowMap->getIndexBase(), rowMap->getComm()));
3286 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3287 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3294 RCP<const import_type> importer;
3296 if (!remoteRowMap.is_null() && (remoteRowMap->getGlobalNumElements() > 0)) {
3299 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3301 importer = rcp(
new import_type(rowMap, remoteRowMap));
3303 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match M.getRowMap()!");
3306 if (importer != null) {
3311 Mview.importMatrix->getApplyHelper();
3314 Mview.importColMap = Mview.importMatrix->getColMap();
3320template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3322merge_matrices(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3323 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3324 const LocalOrdinalViewType& Acol2Brow,
3325 const LocalOrdinalViewType& Acol2Irow,
3326 const LocalOrdinalViewType& Bcol2Ccol,
3327 const LocalOrdinalViewType& Icol2Ccol,
3328 const size_t mergedNodeNumCols) {
3331 typedef typename KCRS::StaticCrsGraphType graph_t;
3332 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3333 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3334 typedef typename KCRS::values_type::non_const_type scalar_view_t;
3336 const KCRS Ak = Aview.
origMatrix->getLocalMatrixDevice();
3337 const KCRS Bk = Bview.origMatrix->getLocalMatrixDevice();
3340 if (!Bview.importMatrix.is_null() || (Bview.importMatrix.is_null() && (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
3346 if (!Bview.importMatrix.is_null()) Iks = Bview.importMatrix->getLocalMatrixDevice();
3348 size_t merge_numrows = Ak.numCols();
3351 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3353 const LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3356 typedef typename Node::execution_space execution_space;
3357 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3358 Kokkos::parallel_scan(
3359 "Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type(0, merge_numrows),
3360 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3361 if (
final) Mrowptr(i) = update;
3364 if (Acol2Brow(i) != LO_INVALID)
3365 ct = Bk.graph.row_map(Acol2Brow(i) + 1) - Bk.graph.row_map(Acol2Brow(i));
3367 ct = Iks.graph.row_map(Acol2Irow(i) + 1) - Iks.graph.row_map(Acol2Irow(i));
3370 if (
final && i + 1 == merge_numrows)
3371 Mrowptr(i + 1) = update;
3375 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr, merge_numrows);
3376 lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"), merge_nnz);
3377 scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"), merge_nnz);
3380 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3381 Kokkos::parallel_for(
3382 "Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type(0, merge_numrows), KOKKOS_LAMBDA(
const size_t i) {
3383 if (Acol2Brow(i) != LO_INVALID) {
3384 size_t row = Acol2Brow(i);
3385 size_t start = Bk.graph.row_map(row);
3386 for (
size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
3387 Mvalues(j) = Bk.values(j - Mrowptr(i) + start);
3388 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j - Mrowptr(i) + start));
3391 size_t row = Acol2Irow(i);
3392 size_t start = Iks.graph.row_map(row);
3393 for (
size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
3394 Mvalues(j) = Iks.values(j - Mrowptr(i) + start);
3395 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j - Mrowptr(i) + start));
3400 KCRS newmat(
"CrsMatrix", merge_numrows, mergedNodeNumCols, merge_nnz, Mvalues, Mrowptr, Mcolind);
3410template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3411const typename Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type
3412merge_matrices(BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
3413 BlockCrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
3414 const LocalOrdinalViewType& Acol2Brow,
3415 const LocalOrdinalViewType& Acol2Irow,
3416 const LocalOrdinalViewType& Bcol2Ccol,
3417 const LocalOrdinalViewType& Icol2Ccol,
3418 const size_t mergedNodeNumCols) {
3420 typedef typename Tpetra::BlockCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type KBCRS;
3421 typedef typename KBCRS::StaticCrsGraphType graph_t;
3422 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3423 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3424 typedef typename KBCRS::values_type::non_const_type scalar_view_t;
3427 const KBCRS Ak = Aview.
origMatrix->getLocalMatrixDevice();
3428 const KBCRS Bk = Bview.origMatrix->getLocalMatrixDevice();
3431 if (!Bview.importMatrix.is_null() ||
3432 (Bview.importMatrix.is_null() &&
3433 (&*Aview.origMatrix->getGraph()->getColMap() != &*Bview.origMatrix->getGraph()->getRowMap()))) {
3438 if (!Bview.importMatrix.is_null()) Iks = Bview.importMatrix->getLocalMatrixDevice();
3439 size_t merge_numrows = Ak.numCols();
3442 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3444 const LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3447 typedef typename Node::execution_space execution_space;
3448 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3449 Kokkos::parallel_scan(
3450 "Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type(0, merge_numrows),
3451 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3452 if (
final) Mrowptr(i) = update;
3455 if (Acol2Brow(i) != LO_INVALID)
3456 ct = Bk.graph.row_map(Acol2Brow(i) + 1) - Bk.graph.row_map(Acol2Brow(i));
3458 ct = Iks.graph.row_map(Acol2Irow(i) + 1) - Iks.graph.row_map(Acol2Irow(i));
3461 if (
final && i + 1 == merge_numrows)
3462 Mrowptr(i + 1) = update;
3466 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr, merge_numrows);
3467 const int blocksize = Ak.blockDim();
3468 lno_nnz_view_t Mcolind(Kokkos::ViewAllocateWithoutInitializing(
"Mcolind"), merge_nnz);
3469 scalar_view_t Mvalues(Kokkos::ViewAllocateWithoutInitializing(
"Mvals"), merge_nnz * blocksize * blocksize);
3472 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3473 Kokkos::parallel_for(
3474 "Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type(0, merge_numrows), KOKKOS_LAMBDA(
const size_t i) {
3475 if (Acol2Brow(i) != LO_INVALID) {
3476 size_t row = Acol2Brow(i);
3477 size_t start = Bk.graph.row_map(row);
3478 for (
size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
3479 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j - Mrowptr(i) + start));
3481 for (
int b = 0; b < blocksize * blocksize; ++b) {
3482 const int val_indx = j * blocksize * blocksize + b;
3483 const int b_val_indx = (j - Mrowptr(i) +
start) * blocksize * blocksize + b;
3484 Mvalues(val_indx) = Bk.values(b_val_indx);
3488 size_t row = Acol2Irow(i);
3489 size_t start = Iks.graph.row_map(row);
3490 for (
size_t j = Mrowptr(i); j < Mrowptr(i + 1); j++) {
3491 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j - Mrowptr(i) + start));
3493 for (
int b = 0; b < blocksize * blocksize; ++b) {
3494 const int val_indx = j * blocksize * blocksize + b;
3495 const int b_val_indx = (j - Mrowptr(i) +
start) * blocksize * blocksize + b;
3496 Mvalues(val_indx) = Iks.values(b_val_indx);
3503 KBCRS newmat(
"CrsMatrix", merge_numrows, mergedNodeNumCols, merge_nnz, Mvalues, Mrowptr, Mcolind, blocksize);
3512template <
typename SC,
typename LO,
typename GO,
typename NO>
3513void AddKernels<SC, LO, GO, NO>::
3515 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
3516 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
3517 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
3518 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3519 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
3520 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
3521 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
3522 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3524 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3525 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3526 typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds) {
3528 using Teuchos::TimeMonitor;
3529 using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
3530 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
3531 auto nrows = Arowptrs.extent(0) - 1;
3532 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3533 typename AddKern::KKH handle;
3534 handle.create_spadd_handle(
true);
3535 auto addHandle = handle.get_spadd_handle();
3539 KokkosSparse::spadd_symbolic(&handle,
3540 nrows, numGlobalCols,
3541 Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
3543 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3544 Ccolinds = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3548 KokkosSparse::spadd_numeric(&handle,
3549 nrows, numGlobalCols,
3550 Arowptrs, Acolinds, Avals, scalarA,
3551 Browptrs, Bcolinds, Bvals, scalarB,
3552 Crowptrs, Ccolinds, Cvals);
3555template <
typename SC,
typename LO,
typename GO,
typename NO>
3556void AddKernels<SC, LO, GO, NO>::
3558 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
3559 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
3560 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
3561 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3562 const typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
3563 const typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
3564 const typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
3565 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3567 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3568 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3569 typename MMdetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds) {
3571 using Teuchos::TimeMonitor;
3572 using AddKern = MMdetails::AddKernels<SC, LO, GO, NO>;
3573 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
3574 auto nrows = Arowptrs.extent(0) - 1;
3575 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3576 typedef MMdetails::AddKernels<SC, LO, GO, NO> AddKern;
3577 typename AddKern::KKH handle;
3578 handle.create_spadd_handle(
false);
3579 auto addHandle = handle.get_spadd_handle();
3582 KokkosSparse::spadd_symbolic(&handle,
3583 nrows, numGlobalCols,
3584 Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
3586 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3587 Ccolinds = col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3590 KokkosSparse::spadd_numeric(&handle,
3591 nrows, numGlobalCols,
3592 Arowptrs, Acolinds, Avals, scalarA,
3593 Browptrs, Bcolinds, Bvals, scalarB,
3594 Crowptrs, Ccolinds, Cvals);
3597template <
typename GO,
3598 typename LocalIndicesType,
3599 typename GlobalIndicesType,
3600 typename ColMapType>
3601struct ConvertLocalToGlobalFunctor {
3602 ConvertLocalToGlobalFunctor(
3603 const LocalIndicesType& colindsOrig_,
3604 const GlobalIndicesType& colindsConverted_,
3605 const ColMapType& colmap_)
3606 : colindsOrig(colindsOrig_)
3607 , colindsConverted(colindsConverted_)
3608 , colmap(colmap_) {}
3609 KOKKOS_INLINE_FUNCTION
void
3610 operator()(
const GO i)
const {
3611 colindsConverted(i) = colmap.getGlobalElement(colindsOrig(i));
3613 LocalIndicesType colindsOrig;
3614 GlobalIndicesType colindsConverted;
3618template <
typename SC,
typename LO,
typename GO,
typename NO>
3619void MMdetails::AddKernels<SC, LO, GO, NO>::
3620 convertToGlobalAndAdd(
3621 const typename MMdetails::AddKernels<SC, LO, GO, NO>::KCRS A,
3622 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
3623 const typename MMdetails::AddKernels<SC, LO, GO, NO>::KCRS B,
3624 const typename MMdetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
3625 const typename MMdetails::AddKernels<SC, LO, GO, NO>::local_map_type& AcolMap,
3626 const typename MMdetails::AddKernels<SC, LO, GO, NO>::local_map_type& BcolMap,
3627 typename MMdetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
3628 typename MMdetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
3629 typename MMdetails::AddKernels<SC, LO, GO, NO>::global_col_inds_array& Ccolinds) {
3631 using Teuchos::TimeMonitor;
3634 using KKH_GO = KokkosKernels::Experimental::KokkosKernelsHandle<size_t, GO, impl_scalar_type,
3635 typename NO::execution_space,
typename NO::memory_space,
typename NO::memory_space>;
3637 const values_array Avals = A.values;
3638 const values_array Bvals = B.values;
3639 const col_inds_array Acolinds = A.graph.entries;
3640 const col_inds_array Bcolinds = B.graph.entries;
3641 auto Arowptrs = A.graph.row_map;
3642 auto Browptrs = B.graph.row_map;
3643 global_col_inds_array AcolindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"A colinds (converted)"), Acolinds.extent(0));
3644 global_col_inds_array BcolindsConverted(Kokkos::ViewAllocateWithoutInitializing(
"B colinds (converted)"), Bcolinds.extent(0));
3648 ConvertLocalToGlobalFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertA(Acolinds, AcolindsConverted, AcolMap);
3649 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsA", range_type(0, Acolinds.extent(0)), convertA);
3650 ConvertLocalToGlobalFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertB(Bcolinds, BcolindsConverted, BcolMap);
3651 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsB", range_type(0, Bcolinds.extent(0)), convertB);
3653 handle.create_spadd_handle(
false);
3654 auto addHandle = handle.get_spadd_handle();
3657 auto nrows = Arowptrs.extent(0) - 1;
3658 Crowptrs = row_ptrs_array(Kokkos::ViewAllocateWithoutInitializing(
"C row ptrs"), nrows + 1);
3659 KokkosSparse::spadd_symbolic(&handle,
3661 Arowptrs, AcolindsConverted, Browptrs, BcolindsConverted, Crowptrs);
3662 Cvals = values_array(
"C values", addHandle->get_c_nnz());
3663 Ccolinds = global_col_inds_array(Kokkos::ViewAllocateWithoutInitializing(
"C colinds"), addHandle->get_c_nnz());
3667 KokkosSparse::spadd_numeric(&handle,
3669 Arowptrs, AcolindsConverted, Avals, scalarA,
3670 Browptrs, BcolindsConverted, Bvals, scalarB,
3671 Crowptrs, Ccolinds, Cvals);
3686#define TPETRA_MATRIXMATRIX_INSTANT(SCALAR, LO, GO, NODE) \
3687 template void MatrixMatrix::Multiply( \
3688 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3690 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3692 CrsMatrix<SCALAR, LO, GO, NODE>& C, \
3693 bool call_FillComplete_on_result, \
3694 const std::string& label, \
3695 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3697 template void MatrixMatrix::Multiply( \
3698 const Teuchos::RCP<const BlockCrsMatrix<SCALAR, LO, GO, NODE>>& A, \
3700 const Teuchos::RCP<const BlockCrsMatrix<SCALAR, LO, GO, NODE>>& B, \
3702 Teuchos::RCP<BlockCrsMatrix<SCALAR, LO, GO, NODE>>& C, \
3703 const std::string& label); \
3705 template void MatrixMatrix::Jacobi( \
3706 typename Teuchos::ScalarTraits<SCALAR>::magnitudeType omega, \
3707 const Vector<SCALAR, LO, GO, NODE>& Dinv, \
3708 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3709 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3710 CrsMatrix<SCALAR, LO, GO, NODE>& C, \
3711 bool call_FillComplete_on_result, \
3712 const std::string& label, \
3713 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3715 template void MatrixMatrix::Add( \
3716 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3719 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3722 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>>& C); \
3724 template void MatrixMatrix::Add( \
3725 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3728 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3731 const Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>>& C); \
3733 template void MatrixMatrix::Add( \
3734 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3737 CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3740 template Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE>> \
3741 MatrixMatrix::add<SCALAR, LO, GO, NODE>(const SCALAR& alpha, \
3742 const bool transposeA, \
3743 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3744 const SCALAR& beta, \
3745 const bool transposeB, \
3746 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3747 const Teuchos::RCP<const Map<LO, GO, NODE>>& domainMap, \
3748 const Teuchos::RCP<const Map<LO, GO, NODE>>& rangeMap, \
3749 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3752 MatrixMatrix::add<SCALAR, LO, GO, NODE>(const SCALAR& alpha, \
3753 const bool transposeA, \
3754 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
3755 const SCALAR& beta, \
3756 const bool transposeB, \
3757 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \
3758 CrsMatrix<SCALAR, LO, GO, NODE>& C, \
3759 const Teuchos::RCP<const Map<LO, GO, NODE>>& domainMap, \
3760 const Teuchos::RCP<const Map<LO, GO, NODE>>& rangeMap, \
3761 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3763 template struct MMdetails::AddKernels<SCALAR, LO, GO, NODE>; \
3765 template void MMdetails::import_and_extract_views<SCALAR, LO, GO, NODE>(const CrsMatrix<SCALAR, LO, GO, NODE>& M, \
3766 Teuchos::RCP<const Map<LO, GO, NODE>> targetMap, \
3767 CrsMatrixStruct<SCALAR, LO, GO, NODE>& Mview, \
3768 Teuchos::RCP<const Import<LO, GO, NODE>> prototypeImporter, \
3769 bool userAssertsThereAreNoRemotes, \
3770 const std::string& label, \
3771 const Teuchos::RCP<Teuchos::ParameterList>& params); \
3773 template void MMdetails::import_and_extract_views<SCALAR, LO, GO, NODE>(const BlockCrsMatrix<SCALAR, LO, GO, NODE>& M, \
3774 Teuchos::RCP<const Map<LO, GO, NODE>> targetMap, \
3775 BlockCrsMatrixStruct<SCALAR, LO, GO, NODE>& Mview, \
3776 Teuchos::RCP<const Import<LO, GO, NODE>> prototypeImporter, \
3777 bool userAssertsThereAreNoRemotes);
Matrix Market file readers and writers for Tpetra objects.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declaration and definition of Tpetra::Details::getEntryOnHost.
Utility functions for packing and unpacking sparse matrix entries.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
Stand-alone utility functions and macros.
Forward declaration of some Tpetra Matrix Matrix objects.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Struct that holds views of the contents of a CrsMatrix.
Teuchos::RCP< const map_type > colMap
Col map for the original version of the matrix.
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static void writeSparseFile(const std::string &filename, const sparse_matrix_type &matrix, const std::string &matrixName, const std::string &matrixDescription, const bool debug=false)
Print the sparse matrix in Matrix Market format, with comments.
void start()
Start the deep_copy counter.
void Jacobi(typename Teuchos::ScalarTraits< Scalar >::magnitudeType omega, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
void Multiply(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, bool transposeB, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Sparse matrix-matrix multiply.
void Add(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, Scalar scalarA, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarB)
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void removeCrsMatrixZeros(CrsMatrixType &matrix, typename Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::magnitudeType const &threshold=Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::magnitude(Teuchos::ScalarTraits< typename CrsMatrixType::scalar_type >::zero()))
Remove zero entries from a matrix.