11#ifndef AMESOS2_TPETRACRSMATRIX_MATRIXADAPTER_DEF_HPP 
   12#define AMESOS2_TPETRACRSMATRIX_MATRIXADAPTER_DEF_HPP 
   15#include "Amesos2_TpetraRowMatrix_AbstractMatrixAdapter_def.hpp" 
   16#include "Amesos2_MatrixAdapter_def.hpp" 
   20  template <
typename Scalar,
 
   21            typename LocalOrdinal,
 
   22            typename GlobalOrdinal,
 
   24  ConcreteMatrixAdapter<
 
   25    Tpetra::CrsMatrix<Scalar,
 
   29    >::ConcreteMatrixAdapter(Teuchos::RCP<matrix_t> m)
 
   30      : AbstractConcreteMatrixAdapter<Tpetra::RowMatrix<Scalar,
 
   34                                      Tpetra::CrsMatrix<Scalar,
 
   40  template <
typename Scalar,
 
   41            typename LocalOrdinal,
 
   42            typename GlobalOrdinal,
 
   44  Teuchos::RCP<const MatrixAdapter<Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > >
 
   45  ConcreteMatrixAdapter<
 
   46    Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
 
   47    >::get_impl(
const Teuchos::Ptr<const map_t> map, EDistribution distribution)
 const 
   51      using Teuchos::rcpFromPtr;
 
   52      typedef Tpetra::Import<local_ordinal_t, global_ordinal_t, node_t> import_t;
 
   54      RCP<import_t> importer =
 
   55        rcp (
new import_t (this->getRowMap(), rcpFromPtr (map)));
 
   59      t_mat = Tpetra::importAndFillCompleteCrsMatrix<matrix_t>( (this->mat_), *importer ); 
 
   62      if ( distribution == CONTIGUOUS_AND_ROOTED ) {
 
   64        auto myRank = map->getComm()->getRank();
 
   66        auto local_matrix = t_mat->getLocalMatrixDevice();
 
   67        const size_t global_num_contiguous_entries = t_mat->getGlobalNumRows();
 
   68        const size_t local_num_contiguous_entries = (myRank == 0) ? t_mat->getGlobalNumRows() : 0;
 
   72        RCP<const map_t> contiguousRowMap = rcp( 
new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) );
 
   73        RCP<const map_t> contiguousColMap = rcp( 
new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) );
 
   74        RCP<const map_t> contiguousDomainMap = rcp( 
new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) );
 
   75        RCP<const map_t> contiguousRangeMap  = rcp( 
new map_t(global_num_contiguous_entries, local_num_contiguous_entries, 0, (t_mat->getComm() ) ) );
 
   77        RCP<matrix_t> contiguous_t_mat = rcp( 
new matrix_t(contiguousRowMap, contiguousColMap, local_matrix) );
 
   78        contiguous_t_mat->resumeFill();
 
   79        contiguous_t_mat->expertStaticFillComplete(contiguousDomainMap, contiguousRangeMap);
 
   81        return rcp (
new ConcreteMatrixAdapter<matrix_t> (contiguous_t_mat));
 
   84      return rcp (
new ConcreteMatrixAdapter<matrix_t> (t_mat));
 
   89  template <
typename Scalar,
 
   90            typename LocalOrdinal,
 
   91            typename GlobalOrdinal,
 
   93  Teuchos::RCP<const MatrixAdapter<Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > >
 
   94  ConcreteMatrixAdapter<
 
   95    Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
 
   96    >::reindex_impl(Teuchos::RCP<const map_t> &contigRowMap,
 
   97                    Teuchos::RCP<const map_t> &contigColMap,
 
   98                    const EPhase current_phase)
 const 
  100      typedef Tpetra::Map< local_ordinal_t, global_ordinal_t, node_t> contiguous_map_type;
 
  103#ifdef HAVE_AMESOS2_TIMERS 
  104      auto reindexTimer = Teuchos::TimeMonitor::getNewTimer(
"Time to re-index matrix gids");
 
  105      Teuchos::TimeMonitor ReindexTimer(*reindexTimer);
 
  108      auto rowMap = this->mat_->getRowMap();
 
  109      auto colMap = this->mat_->getColMap();
 
  110      auto rowComm = rowMap->getComm();
 
  111      auto colComm = colMap->getComm();
 
  113      GlobalOrdinal indexBase = rowMap->getIndexBase();
 
  114      GlobalOrdinal numDoFs = this->mat_->getGlobalNumRows();
 
  115      LocalOrdinal nRows = this->mat_->getLocalNumRows();
 
  116      LocalOrdinal nCols = colMap->getLocalNumElements();
 
  118      RCP<matrix_t> contiguous_t_mat;
 
  120      if(current_phase == PREORDERING || current_phase == SYMBFACT) {
 
  121        auto tmpMap = rcp (
new contiguous_map_type (numDoFs, nRows, indexBase, rowComm));
 
  122        global_ordinal_t frow = tmpMap->getMinGlobalIndex();
 
  125        Kokkos::View<global_ordinal_t*, HostExecSpaceType> rowIndexList (
"rowIndexList", nRows);
 
  126        for (local_ordinal_t k = 0; k < nRows; k++) {
 
  127          rowIndexList(k) = frow+k; 
 
  130        Kokkos::View<global_ordinal_t*, HostExecSpaceType> colIndexList (
"colIndexList", nCols);
 
  133        for (local_ordinal_t k = 0; k < nCols; k++) {
 
  134          colIndexList(k) = numDoFs+indexBase;
 
  136        typedef Tpetra::MultiVector<global_ordinal_t,
 
  140        Teuchos::ArrayView<const global_ordinal_t> rowIndexArray(rowIndexList.data(), nRows);
 
  141        Teuchos::ArrayView<const global_ordinal_t> colIndexArray(colIndexList.data(), nCols);
 
  142        gid_mv_t row_mv (rowMap, rowIndexArray, nRows, 1);
 
  143        gid_mv_t col_mv (colMap, colIndexArray, nCols, 1);
 
  144        typedef Tpetra::Import<local_ordinal_t, global_ordinal_t, node_t> import_t;
 
  145        RCP<import_t> importer = rcp (
new import_t (rowMap, colMap));
 
  146        col_mv.doImport (row_mv, *importer, Tpetra::INSERT);
 
  149          auto col_view = col_mv.getLocalViewHost(Tpetra::Access::ReadOnly);
 
  150          for(
int i=0; i<nCols; i++) colIndexList(i) = col_view(i,0);
 
  153        contigRowMap = rcp (
new contiguous_map_type (numDoFs, rowIndexList.data(), nRows, indexBase, rowComm));
 
  154        contigColMap = rcp (
new contiguous_map_type (numDoFs, colIndexList.data(), nCols, indexBase, colComm));
 
  157        auto lclMatrix = this->mat_->getLocalMatrixDevice();
 
  158        contiguous_t_mat = rcp( 
new matrix_t(contigRowMap, contigColMap, lclMatrix));
 
  161        auto lclMatrix = this->mat_->getLocalMatrixDevice();
 
  162        auto importer  = this->mat_->getCrsGraph()->getImporter();
 
  163        auto exporter  = this->mat_->getCrsGraph()->getExporter();
 
  164        contiguous_t_mat = rcp( 
new matrix_t(lclMatrix, contigRowMap, contigColMap, contigRowMap, contigColMap, importer,exporter));
 
  168      return rcp (
new ConcreteMatrixAdapter<matrix_t> (contiguous_t_mat));
 
  172  template <
typename Scalar,
 
  173            typename LocalOrdinal,
 
  174            typename GlobalOrdinal,
 
  176  template<
typename KV_S, 
typename KV_GO, 
typename KV_GS, 
typename host_ordinal_type_array, 
typename host_scalar_type_array>
 
  178  ConcreteMatrixAdapter<
 
  179    Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
 
  180    >::gather_impl(KV_S& nzvals, KV_GO& indices, KV_GS& pointers,
 
  181                   host_ordinal_type_array &perm_g2l,
 
  182                   host_ordinal_type_array &recvCountRows, host_ordinal_type_array &recvDisplRows,
 
  183                   host_ordinal_type_array &recvCounts, host_ordinal_type_array &recvDispls,
 
  184                   host_ordinal_type_array &transpose_map, host_scalar_type_array &nzvals_t,
 
  185                   bool column_major, EPhase current_phase)
 const 
  187      LocalOrdinal ret = -1;
 
  189#ifdef HAVE_AMESOS2_TIMERS 
  190        Teuchos::RCP< Teuchos::Time > gatherTime = Teuchos::TimeMonitor::getNewCounter (
"Amesos2::gather");
 
  191        Teuchos::TimeMonitor GatherTimer(*gatherTime);
 
  193        auto rowMap = this->mat_->getRowMap();
 
  194        auto colMap = this->mat_->getColMap();
 
  195        auto comm = rowMap->getComm();
 
  196        auto nRanks = comm->getSize();
 
  197        auto myRank = comm->getRank();
 
  199        global_ordinal_t nRows = this->mat_->getGlobalNumRows();
 
  200        auto lclMatrix = this->mat_->getLocalMatrixDevice();
 
  203        if(current_phase == PREORDERING || current_phase == SYMBFACT) {
 
  205          auto lclRowptr_d = lclMatrix.graph.row_map;
 
  206          auto lclColind_d = lclMatrix.graph.entries;
 
  207          auto lclRowptr = Kokkos::create_mirror_view(lclRowptr_d);
 
  208          auto lclColind = Kokkos::create_mirror_view(lclColind_d);
 
  209          Kokkos::deep_copy(lclRowptr, lclRowptr_d);
 
  210          Kokkos::deep_copy(lclColind, lclColind_d);
 
  213          global_ordinal_t rowIndexBase = rowMap->getIndexBase();
 
  214          global_ordinal_t colIndexBase = colMap->getIndexBase();
 
  216          host_ordinal_type_array  perm_l2g; 
 
  219          bool swap_cols = 
false; 
 
  224          LocalOrdinal myNRows = this->mat_->getLocalNumRows();
 
  225          Kokkos::resize(recvCounts, nRanks);
 
  226          Kokkos::resize(recvDispls, nRanks+1);
 
  227          bool need_to_perm = 
false;
 
  229#ifdef HAVE_AMESOS2_TIMERS 
  230            Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter (
"Amesos2::gather(rowptr)");
 
  231            Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
 
  233            Teuchos::gather<int, LocalOrdinal> (&myNRows, 1, recvCounts.data(), 1, 0, *comm);
 
  235              Kokkos::resize(recvDispls, nRanks+1);
 
  237              for (
int p = 1; p <= nRanks; p++) {
 
  238                recvDispls(p) = recvDispls(p-1) + recvCounts(p-1);
 
  240              if (recvDispls(nRanks) != nRows) {
 
  241                TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, 
"Amesos2_TpetraCrsMatrix_MatrixAdapter::gather_impl : mismatch between gathered(local nrows) and global nrows.");
 
  244              for (
int p = 0; p < nRanks; p++) {
 
  248              recvDispls(nRanks) = 0;
 
  252              host_ordinal_type_array lclMap;
 
  253              Kokkos::resize(lclMap, myNRows);
 
  255                Kokkos::resize(perm_g2l, nRows);
 
  256                Kokkos::resize(perm_l2g, nRows);
 
  258                Kokkos::resize(perm_g2l, 1);
 
  260              for (
int i=0; i < myNRows; i++) {
 
  261                lclMap(i) = rowMap->getGlobalElement(i);
 
  263              Teuchos::gatherv<int, LocalOrdinal> (lclMap.data(), myNRows, perm_g2l.data(),
 
  264                                                   recvCounts.data(), recvDispls.data(),
 
  267                for (
int i=0; i < nRows; i++) {
 
  268                  perm_g2l(i) -= rowIndexBase; 
 
  269                  perm_l2g(perm_g2l(i)) = i;   
 
  270                  if (i != perm_g2l(i)) need_to_perm = 
true;
 
  276            KV_GS lclRowptr_ (
"localRowptr_", lclRowptr.extent(0));
 
  277            for (
int i = 0; i < int(lclRowptr.extent(0)); i++) lclRowptr_(i) = lclRowptr(i);
 
  278            if (myRank == 0 && (column_major || need_to_perm)) {
 
  279              Kokkos::resize(pointers_t, nRows+1);
 
  280            } 
else if (myRank != 0) {
 
  281              Kokkos::resize(pointers_t, 2);
 
  283            LocalOrdinal sendIdx = (myNRows > 0 ? 1 : 0); 
 
  284            LocalOrdinal *pointers_ = ((myRank != 0 || (column_major || need_to_perm)) ? pointers_t.data() : pointers.data());
 
  285            Teuchos::gatherv<int, LocalOrdinal> (&lclRowptr_(sendIdx), myNRows, &pointers_[1],
 
  286                                                 recvCounts.data(), recvDispls.data(),
 
  290            Kokkos::resize(recvCountRows, nRanks);
 
  291            Kokkos::resize(recvDisplRows, nRanks+1);
 
  292            Kokkos::deep_copy(recvCountRows, recvCounts);
 
  293            Kokkos::deep_copy(recvDisplRows, recvDispls);
 
  297              recvCounts(0) = pointers_[recvDispls(1)];
 
  298              LocalOrdinal displs = recvCounts(0);
 
  299              for (
int p = 1; p < nRanks; p++) {
 
  302                if (recvDispls(p+1) > recvDispls(p)) {
 
  304                  recvCounts(p) = pointers_[recvDispls(p+1)];
 
  306                  for (
int i = 1+recvDispls(p); i <= recvDispls(p+1); i++) {
 
  307                    pointers_[i] += displs;
 
  309                  displs += recvCounts(p);
 
  312              ret = pointers_[nRows];
 
  316#ifdef HAVE_AMESOS2_TIMERS 
  317            Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter (
"Amesos2::gather(colind)");
 
  318            Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
 
  323              for (
int p = 0; p < nRanks; p++) {
 
  324                recvDispls(p+1) = recvDispls(p) + recvCounts(p);
 
  328            KV_GO lclColind_ (
"localColind_", lclColind.extent(0));
 
  329            for (
int i = 0; i < int(lclColind.extent(0)); i++) {
 
  330              lclColind_(i) = (colMap->getGlobalElement((lclColind(i))) - colIndexBase);
 
  332            if (column_major || need_to_perm) {
 
  333              Kokkos::resize(indices_t, indices.extent(0));
 
  334              Teuchos::gatherv<int, LocalOrdinal> (lclColind_.data(), lclColind_.extent(0), indices_t.data(),
 
  335                                                   recvCounts.data(), recvDispls.data(),
 
  338              Teuchos::gatherv<int, LocalOrdinal> (lclColind_.data(), lclColind_.extent(0), indices.data(),
 
  339                                                   recvCounts.data(), recvDispls.data(),
 
  344#ifdef HAVE_AMESOS2_TIMERS 
  345            Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter (
"Amesos2::gather(transpose index)");
 
  346            Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
 
  348            if (swap_cols && need_to_perm) {
 
  350              for (
int i=0; i<ret; i++) {
 
  351                if (column_major || need_to_perm) {
 
  352                  indices_t(i) = perm_l2g(indices_t(i));
 
  354                  indices(i) = perm_l2g(indices(i));
 
  361              Kokkos::resize(transpose_map, ret);
 
  363              for (
int i=0; i<=nRows; i++) {
 
  366              for (
int k=0; k<ret; k++) {
 
  367                int col = indices_t(k);
 
  372              for (
int i=1; i < nRows; i++) {
 
  373                pointers(i+1) += pointers(i);
 
  376              for (
int row=0; row<nRows; row++) {
 
  378                int i = (swap_cols ? row : perm_l2g(row));
 
  379                for (
int k=pointers_t(i); k<pointers_t(i+1); k++) {
 
  380                  int col = indices_t(k);
 
  382                    transpose_map(k) = pointers(1+col);
 
  383                    indices(pointers(1+col)) = row;
 
  387                    transpose_map(k) = -1;
 
  391            } 
else if (need_to_perm) {
 
  393              Kokkos::resize(transpose_map, ret);
 
  394              for (
int i=0; i<nRows; i++) {
 
  395                int row = perm_g2l(i);
 
  396                pointers(row+2) = pointers_t(i+1)-pointers_t(i);
 
  398              for (
int i=1; i < nRows; i++) {
 
  399                pointers(i+1) += pointers(i);
 
  401              for (
int i=0; i<nRows; i++) {
 
  402                int row = perm_g2l(i);
 
  403                for (
int k=pointers_t(i); k<pointers_t(i+1); k++) {
 
  404                  int col = indices_t(k);
 
  406                    transpose_map(k) = pointers(1+row);
 
  407                    indices(pointers(1+row)) = col;
 
  410                    transpose_map(k) = -1;
 
  415              Kokkos::resize(transpose_map, 0);
 
  418          if (!need_to_perm || swap_cols) {
 
  420            Kokkos::resize(perm_g2l, 0);
 
  426#ifdef HAVE_AMESOS2_TIMERS 
  427            Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter (
"Amesos2::gather(nzvals)");
 
  428            Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
 
  431            auto lclNzvals_d = lclMatrix.values;
 
  432            auto lclNzvals = Kokkos::create_mirror_view(lclNzvals_d);;
 
  433            Kokkos::deep_copy(lclNzvals, lclNzvals_d);
 
  436            if (transpose_map.extent(0) > 0) {
 
  437              Kokkos::resize(nzvals_t, nzvals.extent(0));
 
  438              Teuchos::gatherv<int, Scalar> (
reinterpret_cast<Scalar*
> (lclNzvals.data()), lclNzvals.extent(0),
 
  439                                             reinterpret_cast<Scalar*
> (nzvals_t.data()), recvCounts.data(), recvDispls.data(),
 
  442              Teuchos::gatherv<int, Scalar> (
reinterpret_cast<Scalar*
> (lclNzvals.data()), lclNzvals.extent(0),
 
  443                                             reinterpret_cast<Scalar*
> (nzvals.data()), recvCounts.data(), recvDispls.data(),
 
  449            ret = pointers(nRows);
 
  450#ifdef HAVE_AMESOS2_TIMERS 
  451            Teuchos::RCP< Teuchos::Time > gatherTime_ = Teuchos::TimeMonitor::getNewCounter (
"Amesos2::gather(transpose values)");
 
  452            Teuchos::TimeMonitor GatherTimer_(*gatherTime_);
 
  454            if (transpose_map.extent(0) > 0) {
 
  460              Kokkos::parallel_for(
"Amesos2::TpetraCrsMatrixAdapter::gather", Kokkos::RangePolicy<HostExecSpaceType>(0, ret),
 
  461                KOKKOS_LAMBDA(
const int k) { 
if (transpose_map(k) >= 0) nzvals(transpose_map(k)) = nzvals_t(k); });
 
  466        Teuchos::broadcast<int, LocalOrdinal>(*comm, 0, 1, &ret);
 
  472  template <
typename Scalar,
 
  473            typename LocalOrdinal,
 
  474            typename GlobalOrdinal,
 
  477  ConcreteMatrixAdapter<
 
  478    Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>
 
  479    >::describe (Teuchos::FancyOStream& os,
 
  480                 const Teuchos::EVerbosityLevel verbLevel)
 const 
  482      this->mat_->describe(os, verbLevel);
 
 
Specialization of the ConcreteMatrixAdapter for Tpetra::CrsMatrix. Inherits all its functionality fro...