49#include <Epetra_Export.h> 
   50#include <Epetra_Import.h> 
   51#include <Epetra_Util.h> 
   52#include <Epetra_Map.h> 
   53#include <Epetra_Comm.h> 
   54#include <Epetra_CrsMatrix.h> 
   55#include <Epetra_Vector.h> 
   56#include <Epetra_Directory.h> 
   57#include <Epetra_HashTable.h> 
   58#include <Epetra_Distributor.h> 
   59#include <Epetra_IntSerialDenseVector.h> 
   66#include <Epetra_MpiDistributor.h> 
   70#include <Teuchos_TimeMonitor.hpp> 
   82  int nnzperrow=(int)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
 
   85  return (
int)(A.
NumMyRows()*nnzperrow*0.75 + 100);
 
 
   93static inline int auto_resize(std::vector<int> &x,
int num_new){
 
   94  int newsize=x.size() + EPETRA_MAX((
int)x.size(),num_new);
 
  103template<
typename int_type>
 
  105                                        std::vector<int> &Bcols2Ccols, std::vector<int> &Icols2Ccols)
 
  109#ifdef ENABLE_MMM_TIMINGS 
  110  using Teuchos::TimeMonitor;
 
  111  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 1")));
 
  117  int Bstart=0, Istart=0, Cstart=0,Pstart=0;
 
  124  int_type * Bgids = 0;
 
  125  BColMap.MyGlobalElementsPtr(Bgids);
 
  127  int_type * Igids    = 0;
 
  131  if((
int)Bcols2Ccols.size() != Nb) Bcols2Ccols.resize(Nb);
 
  132  if((
int)Icols2Ccols.size() != Ni) Icols2Ccols.resize(Ni);
 
  140    if(!Distor) EPETRA_CHK_ERR(-2);
 
  147  std::vector<int> Bpids(Nb), Ipids(Ni);
 
  149  else Bpids.assign(Nb,-1);
 
  163  std::vector<int_type> Cgids(Csize);
 
  164  Cremotepids.resize(Psize);
 
  166#ifdef ENABLE_MMM_TIMINGS 
  167  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 2")));
 
  179    for(i=0; i<DomainMap.
NumMyElements(); i++) Bcols2Ccols[i] = i;
 
  184    for(i = 0; i < NumDomainElements; i++) {
 
  185      int_type GID = (int_type) DomainMap.GID64(i);
 
  186      int LID = BColMap.
LID(GID);
 
  189        Bcols2Ccols[LID]=Cstart;
 
  196        LID = IColMap.
LID(GID);
 
  198          Icols2Ccols[LID]=Cstart;
 
  207  for(i=0,j=0; i<Ni && Ipids[i]==-1; i++) {
 
  208    while(Cgids[j]!=Igids[i]) j++;
 
  214#ifdef ENABLE_MMM_TIMINGS 
  215  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 3")));
 
  223  int initial_temp_length = 0;
 
  224  const int * lengths_from=0;
 
  227    for(i=0; i < Distor->
NumReceives(); i++) initial_temp_length += lengths_from[i];
 
  229  else initial_temp_length=100;
 
  231  std::vector<int_type> Btemp(initial_temp_length),  Itemp(initial_temp_length);
 
  232  std::vector<int> Btemp2(initial_temp_length), Itemp2(initial_temp_length);
 
  235  while (Bstart < Nb || Istart < Ni) {
 
  236    int Bproc=NumProc+1, Iproc=NumProc+1, Cproc;
 
  239    if(Bstart < Nb) Bproc=Bpids[Bstart];
 
  240    if(Istart < Ni) Iproc=Ipids[Istart];
 
  242    Cproc = (Bproc < Iproc)?Bproc:Iproc;
 
  244    if(Bproc == Cproc && Iproc != Cproc) {
 
  247      for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
 
  251      int tCsize = Bnext-Bstart;
 
  252      if(Btemp.size() < (
size_t)tCsize) {Btemp2.resize(tCsize);}
 
  254      for(i=Bstart; i<Bnext; i++) {
 
  255        Cremotepids[i-Bstart+Pstart] = Cproc;
 
  256        Cgids[i-Bstart+Cstart]       = Bgids[i];
 
  257        Btemp2[i-Bstart]             = i;
 
  261      int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0;
 
  262      util.
Sort(
true, tCsize, &Cgids[Cstart], 0, 0, 1, &Bptr2, 0, 0);
 
  264      for(i=0, j=Cstart; i<tCsize; i++){
 
  265        while(Cgids[j] != Bgids[Btemp2[i]]) j++;
 
  266        Bcols2Ccols[Btemp2[i]] =  j;
 
  272    else if(Bproc != Cproc && Iproc == Cproc) {
 
  275      for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
 
  279      int tCsize = Inext-Istart;
 
  280      if(Itemp.size() < (
size_t)tCsize) {Itemp2.resize(tCsize);}
 
  282      for(i=Istart; i<Inext; i++) {
 
  283        Cremotepids[i-Istart+Pstart] = Cproc;
 
  284        Cgids[i-Istart+Cstart]       = Igids[i];
 
  285        Itemp2[i-Istart]             = i;
 
  289      int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
 
  290      util.
Sort(
true, tCsize, &Cgids[Cstart], 0, 0, 1, &Iptr2, 0, 0);
 
  292      for(i=0, j=Cstart; i<tCsize; i++){
 
  293        while(Cgids[j] != Igids[Itemp2[i]]) j++;
 
  294        Icols2Ccols[Itemp2[i]] =  j;
 
  304      for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
 
  308      for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
 
  312      int tBsize = Bnext-Bstart;
 
  313      int tIsize = Inext-Istart;
 
  315      if(Btemp.size() < (
size_t)tBsize) {Btemp.resize(tBsize); Btemp2.resize(tBsize);}
 
  316      if(Itemp.size() < (
size_t)tIsize) {Itemp.resize(tIsize); Itemp2.resize(tIsize);}
 
  318      for(i=Bstart; i<Bnext; i++) {Btemp[i-Bstart]=Bgids[i]; Btemp2[i-Bstart]=i;}
 
  319      for(i=Istart; i<Inext; i++) {Itemp[i-Istart]=Igids[i]; Itemp2[i-Istart]=i;}
 
  322      int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0; 
int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
 
  323      util.
Sort(
true, tBsize, Btemp.size() ? &Btemp[0] : 0, 0, 0, 1, &Bptr2, 0, 0);
 
  324      util.
Sort(
true, tIsize, Itemp.size() ? &Itemp[0] : 0, 0, 0, 1, &Iptr2, 0, 0);
 
  325      typename std::vector<int_type>::iterator mycstart = Cgids.begin()+Cstart;
 
  326      typename std::vector<int_type>::iterator last_el=std::set_union(Btemp.begin(),Btemp.begin()+tBsize,Itemp.begin(),Itemp.begin()+tIsize,mycstart);
 
  328      for(i=0, j=Cstart; i<tBsize; i++){
 
  329        while(Cgids[j] != Bgids[Btemp2[i]]) j++;
 
  330        Bcols2Ccols[Btemp2[i]] =  j;
 
  333      for(i=0, j=Cstart; i<tIsize; i++){
 
  334        while(Cgids[j] != Igids[Itemp2[i]]) j++;
 
  335        Icols2Ccols[Itemp2[i]] =  j;
 
  338      for(i=Pstart; i<(last_el - mycstart) + Pstart; i++) Cremotepids[i]=Cproc;
 
  339      Cstart = (last_el - mycstart) + Cstart;
 
  340      Pstart = (last_el - mycstart) + Pstart;
 
  346#ifdef ENABLE_MMM_TIMINGS 
  347  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 4")));
 
  351  Cremotepids.resize(Pstart);
 
  357  unionmap=
new Epetra_Map((int_type) -1,Cstart,Cgids.size() ? &Cgids[0] : 0, (int_type) B.
ColMap().IndexBase64(),
 
 
  371    double *tmp = 
new double[nnew];
 
  372    for(
int i=0; i<nold; i++)
 
 
  383template<
typename int_type>
 
  387                        std::vector<int> & Bcol2Ccol,
 
  388                        std::vector<int> & Bimportcol2Ccol,
 
  389                        std::vector<int>& Cremotepids,
 
  391      bool keep_all_hard_zeros
 
  393#ifdef ENABLE_MMM_TIMINGS 
  394  using Teuchos::TimeMonitor;
 
  395  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix SerialCore")));
 
  404  int NumMyDiagonals=0; 
 
  411  int *Arowptr, *Acolind;
 
  416  int *Browptr, *Bcolind;
 
  420  int *Irowptr=0, *Icolind=0;
 
  436  std::vector<int> c_status(n, -1);
 
  440  if(CSR_alloc < n) CSR_alloc = n;
 
  441  int CSR_ip=0,OLD_ip=0;
 
  447  CSR_colind.
Resize(CSR_alloc);
 
  451  std::vector<int> NumEntriesPerRow(m);
 
  455    bool found_diagonal=
false;
 
  456    CSR_rowptr[i]=CSR_ip;
 
  458    for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
 
  460      double Aval = Avals[k];
 
  461      if(!keep_all_hard_zeros && Aval==0) 
continue;
 
  466        for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
 
  467          int Cj=Bcol2Ccol[Bcolind[j]];
 
  469          if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
 
  471          if(c_status[Cj]<OLD_ip){
 
  474            CSR_colind[CSR_ip]=Cj;
 
  475            CSR_vals[CSR_ip]=Aval*Bvals[j];
 
  479            CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
 
  485        for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
 
  486          int Cj=Bimportcol2Ccol[Icolind[j]];
 
  488          if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
 
  490          if(c_status[Cj]<OLD_ip){
 
  493            CSR_colind[CSR_ip]=Cj;
 
  494            CSR_vals[CSR_ip]=Aval*Ivals[j];
 
  498            CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
 
  502    NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
 
  505    if(CSR_ip + n > CSR_alloc){
 
  508      CSR_colind.
Resize(CSR_alloc);
 
  513  CSR_rowptr[m]=CSR_ip;
 
  515#ifdef ENABLE_MMM_TIMINGS 
  516  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix Final Sort")));
 
  522#ifdef ENABLE_MMM_TIMINGS 
  523  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix Fast IE")));
 
  528  int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
 
  530  int *ExportLIDs=0, *ExportPIDs=0;
 
  550#ifdef ENABLE_MMM_TIMINGS 
  551  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix ESFC")));
 
 
  566template<
typename int_type>
 
  570                   std::vector<int> & Bcol2Ccol,
 
  571                   std::vector<int> & Bimportcol2Ccol,
 
  573       bool keep_all_hard_zeros){
 
  575#ifdef ENABLE_MMM_TIMINGS 
  576  using Teuchos::TimeMonitor;
 
  577  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse SerialCore")));
 
  590  int *Arowptr, *Acolind;
 
  595  int *Browptr, *Bcolind;
 
  599  int *Irowptr=0, *Icolind=0;
 
  608  int *CSR_rowptr, *CSR_colind;
 
  617  std::vector<int> c_status(n, -1);
 
  620  int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
 
  621  int CSR_ip=0,OLD_ip=0;
 
  625    for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
 
  627      double Aval = Avals[k];
 
  628      if(!keep_all_hard_zeros && Aval==0) 
continue;
 
  633        for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
 
  634          int Cj=Bcol2Ccol[Bcolind[j]];
 
  636          if(c_status[Cj]<OLD_ip){
 
  638            if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
 
  640            CSR_colind[CSR_ip]=Cj;
 
  641            CSR_vals[CSR_ip]=Aval*Bvals[j];
 
  645            CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
 
  651        for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
 
  652          int Cj=Bimportcol2Ccol[Icolind[j]];
 
  654          if(c_status[Cj]<OLD_ip){
 
  656            if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
 
  658            CSR_colind[CSR_ip]=Cj;
 
  659            CSR_vals[CSR_ip]=Aval*Ivals[j];
 
  663            CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
 
  670#ifdef ENABLE_MMM_TIMINGS 
  671  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse Final Sort")));
 
 
  686template<
typename int_type>
 
  692                       bool call_FillComplete_on_result,
 
  693           bool keep_all_hard_zeros)
 
  698  int C_firstCol_import = 0;
 
  699  int C_lastCol_import = -1;
 
  702  Bview.
colMap->MyGlobalElementsPtr(bcols);
 
  703  int_type* bcols_import = NULL;
 
  710  int C_numCols = C_lastCol - C_firstCol + 1;
 
  711  int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
 
  713  if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
 
  716  double* dwork = 
new double[C_numCols];
 
  717  int_type* iwork = 
new int_type[C_numCols];
 
  718  int_type *c_cols=iwork;
 
  719  double *c_vals=dwork;
 
  720  int *c_index=
new int[C_numCols];
 
  726  int *Arowptr, *Acolind;
 
  738  for(k=0;k<C_numCols;k++) c_index[k]=-1;
 
  743    int_type global_row = (int_type) Aview.
rowMap->GID64(i);
 
  791    for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
 
  793      double Aval = Avals[k];
 
  795      if(Bview.
remote[Ak] || (!keep_all_hard_zeros && Aval==0)) 
continue;
 
  797      int* Bcol_inds = Bview.
indices[Ak];
 
  798      double* Bvals_k = Bview.
values[Ak];
 
  801        int col=Bcol_inds[j];
 
  808          c_cols[c_current]=col;
 
  809          c_vals[c_current]=Aval*Bvals_k[j];
 
  810          c_index[col]=c_current;
 
  815          c_vals[c_index[col]]+=Aval*Bvals_k[j];
 
  820    for(k=0; k<c_current; k++){
 
  821      c_index[c_cols[k]]=-1;
 
  822      c_cols[k]=bcols[c_cols[k]]; 
 
  852    for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
 
  854      double Aval = Avals[k];
 
  856      if(!Bview.
remote[Ak] || Aval==0) 
continue;
 
  858      int* Bcol_inds = Bview.
indices[Ak];
 
  859      double* Bvals_k = Bview.
values[Ak];
 
  862        int col=Bcol_inds[j];
 
  864          c_cols[c_current]=col;
 
  865          c_vals[c_current]=Aval*Bvals_k[j];
 
  866          c_index[col]=c_current;
 
  870          c_vals[c_index[col]]+=Aval*Bvals_k[j];
 
  875    for(k=0; k<c_current; k++){
 
  876      c_index[c_cols[k]]=-1;
 
  877      c_cols[k]=bcols_import[c_cols[k]];
 
  897  if(call_FillComplete_on_result)
 
 
  913template<
typename int_type>
 
  915                           CrsMatrixStruct & Aview,
 
  917                           CrsMatrixStruct& Bview,
 
  919         bool call_FillComplete_on_result,
 
  920         bool keep_all_hard_zeros){
 
  927  std::vector<int> Cremotepids;
 
  929  std::vector<int> Bimportcol2Ccol;
 
  930  if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
 
  932#ifdef ENABLE_MMM_TIMINGS 
  933  using Teuchos::TimeMonitor;
 
  934  Teuchos::RCP<Teuchos::TimeMonitor> MM;
 
  938  if(!call_FillComplete_on_result) {
 
  939#ifdef ENABLE_MMM_TIMINGS 
  940    MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM General Multiply")));
 
  942    rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,
false,keep_all_hard_zeros);
 
  950  int *C_rowptr, *C_colind;
 
  953  bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
 
  956  if(!NewFlag && ExtractFailFlag){
 
  957#ifdef ENABLE_MMM_TIMINGS 
  958    MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM General Multiply")));
 
  960    rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result,keep_all_hard_zeros);
 
  965#ifdef ENABLE_MMM_TIMINGS 
  966  if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix CMap")));
 
  967  else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse CMap")));
 
  972    if(Bview.importMatrix) {
 
  973      EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
 
  986#ifdef ENABLE_MMM_TIMINGS 
  987  if(NewFlag)  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix Lookups")));
 
  988  else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse Lookups")));
 
  997    if(colmap_B->
SameAs(*colmap_C)){
 
 1005        Bcol2Ccol[i]=colmap_C->
LID((int_type) colmap_B->GID64(i));
 
 1006        if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
 
 1010    if(Bview.importMatrix){
 
 1011      Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
 
 1012      for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
 
 1013      Bimportcol2Ccol[i]=colmap_C->
LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
 
 1014      if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
 
 1019#ifdef ENABLE_MMM_TIMINGS 
 1025    EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C,keep_all_hard_zeros));
 
 1029    EPETRA_CHK_ERR(mult_A_B_reuse<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C,keep_all_hard_zeros));
 
 1042                           CrsMatrixStruct & Aview,
 
 1044                           CrsMatrixStruct& Bview,
 
 1046                           bool call_FillComplete_on_result,\
 
 1047         bool keep_all_hard_zeros){
 
 1049#ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 
 1051    return Tmult_A_B<int>(A, Aview, B, Bview, C, call_FillComplete_on_result, keep_all_hard_zeros);
 
 1055#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 
 1057    return Tmult_A_B<long long>(A, Aview, B, Bview, C, call_FillComplete_on_result, keep_all_hard_zeros);
 
 1061    throw "EpetraExt::MatrixMatrix::mult_A_B: GlobalIndices type unknown";
 
 1068template<
typename int_type>
 
 1074                     std::vector<int> & Bcol2Ccol,
 
 1075                     std::vector<int> & Bimportcol2Ccol,
 
 1078#ifdef ENABLE_MMM_TIMINGS 
 1079  using Teuchos::TimeMonitor;
 
 1080  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse SerialCore")));
 
 1093  int *Arowptr, *Acolind;
 
 1098  int *Browptr, *Bcolind;
 
 1102  int *Irowptr=0, *Icolind=0;
 
 1111  const double *Dvals = Dinv.
Values();
 
 1114  int *CSR_rowptr, *CSR_colind;
 
 1123  std::vector<int> c_status(n, -1);
 
 1126  int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
 
 1127  int CSR_ip=0,OLD_ip=0;
 
 1131    double Dval = Dvals[i];
 
 1134    for(k=Browptr[i]; k<Browptr[i+1]; k++){
 
 1136      double Bval = Bvals[k];
 
 1137      if(Bval==0) 
continue;
 
 1138      int Ck=Bcol2Ccol[Bcolind[k]];
 
 1141      c_status[Ck]=CSR_ip;
 
 1142      CSR_colind[CSR_ip]=Ck;
 
 1143      CSR_vals[CSR_ip]= Bvals[k];
 
 1148    for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
 
 1150      double Aval = Avals[k];
 
 1151      if(Aval==0) 
continue;
 
 1156        for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
 
 1157          int Cj=Bcol2Ccol[Bcolind[j]];
 
 1159          if(c_status[Cj]<OLD_ip){
 
 1161            if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
 
 1162            c_status[Cj]=CSR_ip;
 
 1163            CSR_colind[CSR_ip]=Cj;
 
 1164            CSR_vals[CSR_ip]= - omega * Dval * Aval * Bvals[j];
 
 1168            CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
 
 1174        for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
 
 1175          int Cj=Bimportcol2Ccol[Icolind[j]];
 
 1177          if(c_status[Cj]<OLD_ip){
 
 1179            if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
 
 1180            c_status[Cj]=CSR_ip;
 
 1181            CSR_colind[CSR_ip]=Cj;
 
 1182            CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
 
 1186            CSR_vals[c_status[Cj]]-=omega * Dval * Aval * Ivals[j];
 
 1193#ifdef ENABLE_MMM_TIMINGS 
 1194  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse Final Sort")));
 
 
 1206template<
typename int_type>
 
 1212                         std::vector<int> & Bcol2Ccol,
 
 1213                         std::vector<int> & Bimportcol2Ccol,
 
 1214                         std::vector<int>& Cremotepids,
 
 1217#ifdef ENABLE_MMM_TIMINGS 
 1218  using Teuchos::TimeMonitor;
 
 1219  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix SerialCore")));
 
 1226  int NumMyDiagonals=0; 
 
 1233  int *Arowptr, *Acolind;
 
 1238  int *Browptr, *Bcolind;
 
 1243  const double *Dvals = Dinv.
Values();
 
 1245  int *Irowptr=0, *Icolind=0;
 
 1261  std::vector<int> c_status(n, -1);
 
 1266  int CSR_ip=0,OLD_ip=0;
 
 1272  CSR_colind.
Resize(CSR_alloc);
 
 1276  std::vector<int> NumEntriesPerRow(m);
 
 1280    bool found_diagonal=
false;
 
 1281    CSR_rowptr[i]=CSR_ip;
 
 1282    double Dval = Dvals[i];
 
 1285    for(k=Browptr[i]; k<Browptr[i+1]; k++){
 
 1287      double Bval = Bvals[k];
 
 1288      if(Bval==0) 
continue;
 
 1289      int Ck=Bcol2Ccol[Bcolind[k]];
 
 1292      c_status[Ck]=CSR_ip;
 
 1293      CSR_colind[CSR_ip]=Ck;
 
 1294      CSR_vals[CSR_ip]= Bvals[k];
 
 1299    for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
 
 1300      int Ak      = Acolind[k];
 
 1301      double Aval = Avals[k];
 
 1302      if(Aval==0) 
continue;
 
 1307        for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
 
 1308          int Cj=Bcol2Ccol[Bcolind[j]];
 
 1310          if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
 
 1312          if(c_status[Cj]<OLD_ip){
 
 1314            c_status[Cj]=CSR_ip;
 
 1315            CSR_colind[CSR_ip]=Cj;
 
 1316            CSR_vals[CSR_ip]= - omega * Dval* Aval * Bvals[j];
 
 1320            CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
 
 1326        for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
 
 1327          int Cj=Bimportcol2Ccol[Icolind[j]];
 
 1329          if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
 
 1331          if(c_status[Cj]<OLD_ip){
 
 1333            c_status[Cj]=CSR_ip;
 
 1334            CSR_colind[CSR_ip]=Cj;
 
 1335            CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
 
 1339            CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Ivals[j];
 
 1343    NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
 
 1346    if(CSR_ip + n > CSR_alloc){
 
 1349      CSR_colind.
Resize(CSR_alloc);
 
 1354  CSR_rowptr[m]=CSR_ip;
 
 1356#ifdef ENABLE_MMM_TIMINGS 
 1357  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix Final Sort")));
 
 1363#ifdef ENABLE_MMM_TIMINGS 
 1364  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix Fast IE")));
 
 1369  int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
 
 1371  int *ExportLIDs=0, *ExportPIDs=0;
 
 1387#ifdef ENABLE_MMM_TIMINGS 
 1388  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix ESFC")));
 
 
 1402template<
typename int_type>
 
 1403int MatrixMatrix::Tjacobi_A_B(
double omega,
 
 1408                             CrsMatrixStruct& Bview,
 
 1410                              bool call_FillComplete_on_result){
 
 1416  std::vector<int> Cremotepids;
 
 1418  std::vector<int> Bimportcol2Ccol;
 
 1419  if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
 
 1421#ifdef ENABLE_MMM_TIMINGS 
 1422  using Teuchos::TimeMonitor;
 
 1423  Teuchos::RCP<Teuchos::TimeMonitor> MM;
 
 1427  if(!call_FillComplete_on_result) {
 
 1428#ifdef ENABLE_MMM_TIMINGS 
 1429    MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi General Multiply")));
 
 1431    throw std::runtime_error(
"jacobi_A_B_general not implemented");
 
 1440  int *C_rowptr, *C_colind;
 
 1443  bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
 
 1446  if(!NewFlag && ExtractFailFlag){
 
 1447#ifdef ENABLE_MMM_TIMINGS 
 1448    MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi General Multiply")));
 
 1450    throw std::runtime_error(
"jacobi_A_B_general not implemented");
 
 1455#ifdef ENABLE_MMM_TIMINGS 
 1456  if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Newmatrix CMap")));
 
 1457  else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse CMap")));
 
 1462    if(Bview.importMatrix) {
 
 1463      EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
 
 1476#ifdef ENABLE_MMM_TIMINGS 
 1477  if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Newmatrix Lookups")));
 
 1478  else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse Lookups")));
 
 1487    if(colmap_B->
SameAs(*colmap_C)){
 
 1495        Bcol2Ccol[i]=colmap_C->
LID((int_type) colmap_B->GID64(i));
 
 1496        if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
 
 1500    if(Bview.importMatrix){
 
 1501      Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
 
 1502      for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
 
 1503      Bimportcol2Ccol[i]=colmap_C->
LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
 
 1504      if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
 
 1513    EPETRA_CHK_ERR(jacobi_A_B_newmatrix<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C));
 
 1517    EPETRA_CHK_ERR(jacobi_A_B_reuse<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C));
 
 1529int MatrixMatrix::jacobi_A_B(
double omega,
 
 1532                             CrsMatrixStruct & Aview,
 
 1534                             CrsMatrixStruct& Bview,
 
 1536                             bool call_FillComplete_on_result)
 
 1538#ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 
 1540    return Tjacobi_A_B<int>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
 
 1544#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 
 1546      return Tjacobi_A_B<long long>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
 
 1550    throw "EpetraExt::MatrixMatrix::jacobi_A_B: GlobalIndices type unknown";
 
 1558template<
typename int_type>
 
 1559int MatrixMatrix::Tmult_AT_B_newmatrix(
const CrsMatrixStruct & Atransview, 
const CrsMatrixStruct & Bview, 
Epetra_CrsMatrix & C,
bool keep_all_hard_zeros) {
 
 1563#ifdef ENABLE_MMM_TIMINGS 
 1564  using Teuchos::TimeMonitor;
 
 1565  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM-T Transpose")));
 
 1572#ifdef ENABLE_MMM_TIMINGS 
 1573  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM-T AB-core")));
 
 1575  RCP<Epetra_CrsMatrix> Ctemp;
 
 1578  bool needs_final_export = Atransview.origMatrix->Exporter() != 0;
 
 1579  if(needs_final_export)
 
 1580    Ctemp = rcp(
new Epetra_CrsMatrix(Copy,Atransview.origMatrix->RowMap(),Bview.origMatrix->ColMap(),0));
 
 1582    EPETRA_CHK_ERR( C.
ReplaceColMap(Bview.origMatrix->ColMap()) );
 
 1583    Ctemp = rcp(&C,
false);
 
 1587  std::vector<int> Bcol2Ccol(Bview.origMatrix->NumMyCols());
 
 1588  for(
int i=0; i<Bview.origMatrix->NumMyCols(); i++)
 
 1590  std::vector<int> Bimportcol2Ccol,Cremotepids;
 
 1591  if(Bview.origMatrix->Importer())
 
 1594  EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(*Atransview.origMatrix,*Bview.origMatrix,Bview,
 
 1595                                              Bcol2Ccol,Bimportcol2Ccol,Cremotepids,
 
 1596                                              *Ctemp,keep_all_hard_zeros));
 
 1601#ifdef ENABLE_MMM_TIMINGS 
 1602  MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM-T ESFC")));
 
 1605  if(needs_final_export)
 
 1606    C.FusedExport(*Ctemp,*Ctemp->Exporter(),&Bview.origMatrix->DomainMap(),&Atransview.origMatrix->RangeMap(),
false);
 
 1616int MatrixMatrix::mult_AT_B_newmatrix(
const CrsMatrixStruct & Atransview, 
const CrsMatrixStruct & Bview, 
Epetra_CrsMatrix & C, 
bool keep_all_hard_zeros) {
 
 1618#ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 
 1619  if(Atransview.origMatrix->RowMap().GlobalIndicesInt() && Bview.origMatrix->RowMap().GlobalIndicesInt()) {
 
 1620    return Tmult_AT_B_newmatrix<int>(Atransview,Bview,C,keep_all_hard_zeros);
 
 1624#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 
 1625  if(Atransview.origMatrix->RowMap().GlobalIndicesLongLong() && Bview.origMatrix->RowMap().GlobalIndicesLongLong()) {
 
 1626    return Tmult_AT_B_newmatrix<long long>(Atransview,Bview,C,keep_all_hard_zeros);
 
 1630    throw "EpetraExt::MatrixMatrix::mult_AT_B_newmatrix: GlobalIndices type unknown";
 
const Epetra_Map * rowMap
 
std::vector< int > targetMapToImportRow
 
const Epetra_Map * colMap
 
LightweightCrsMatrix * importMatrix
 
std::vector< int > targetMapToOrigRow
 
std::vector< double > vals_
 
std::vector< int > ExportPIDs_
 
std::vector< int > ColMapOwningPIDs_
 
std::vector< int > colind_
 
std::vector< int > ExportLIDs_
 
std::vector< int > rowptr_
 
void MyGlobalElementsPtr(int *&MyGlobalElementList) const
 
int NumMyElements() const
 
int MyGlobalElements(int *MyGlobalElementList) const
 
bool DistributedGlobal() const
 
bool SameAs(const Epetra_BlockMap &Map) const
 
bool GlobalIndicesInt() const
 
int NumMyElements() const
 
bool GlobalIndicesLongLong() const
 
virtual int NumProc() const=0
 
virtual int MyPID() const=0
 
virtual int SumIntoGlobalValues(int GlobalRow, int NumEntries, const double *Values, const int *Indices)
 
const Epetra_Map & RowMap() const
 
virtual int InsertGlobalValues(int GlobalRow, int NumEntries, const double *Values, const int *Indices)
 
int FillComplete(bool OptimizeDataStorage=true)
 
int ExpertStaticFillComplete(const Epetra_Map &DomainMap, const Epetra_Map &RangeMap, const Epetra_Import *Importer=0, const Epetra_Export *Exporter=0, int NumMyDiagonals=-1)
 
int NumMyNonzeros() const
 
int ReplaceColMap(const Epetra_BlockMap &newmap)
 
const Epetra_Comm & Comm() const
 
const Epetra_Map & RangeMap() const
 
int ExtractCrsDataPointers(int *&IndexOffset, int *&Indices, double *&Values_in) const
 
const Epetra_Map & ColMap() const
 
Epetra_IntSerialDenseVector & ExpertExtractIndexOffset()
 
bool IndicesAreLocal() const
 
int ExpertMakeUniqueCrsGraphData()
 
const Epetra_Map & DomainMap() const
 
const Epetra_Import * Importer() const
 
Epetra_IntSerialDenseVector & ExpertExtractIndices()
 
bool IndicesAreGlobal() const
 
double *& ExpertExtractValues()
 
int Resize(int Length_in)
 
const int * LengthsFrom() const
 
static int SortCrsEntries(int NumRows, const int *CRS_rowptr, int *CRS_colind, double *CRS_vals)
 
static int GetRemotePIDs(const Epetra_Import &Importer, std::vector< int > &RemotePIDs)
 
static void EPETRA_LIB_DLL_EXPORT Sort(bool SortAscending, int NumKeys, T *Keys, int NumDoubleCompanions, double **DoubleCompanions, int NumIntCompanions, int **IntCompanions, int NumLongLongCompanions, long long **LongLongCompanions)
 
static int GetPids(const Epetra_Import &Importer, std::vector< int > &pids, bool use_minus_one_for_local)
 
EpetraExt::BlockCrsMatrix: A class for constructing a distributed block matrix.
 
void resize_doubles(int nold, int nnew, double *&d)
 
int jacobi_A_B_newmatrix(double omega, const Epetra_Vector &Dinv, const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, std::vector< int > &Cremotepids, Epetra_CrsMatrix &C)
 
int mult_A_B_general(const Epetra_CrsMatrix &A, CrsMatrixStruct &Aview, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, Epetra_CrsMatrix &C, bool call_FillComplete_on_result, bool keep_all_hard_zeros)
 
int mult_A_B_newmatrix(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, const CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, std::vector< int > &Cremotepids, Epetra_CrsMatrix &C, bool keep_all_hard_zeros)
 
static int C_estimate_nnz(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B)
 
int aztecoo_and_ml_compatible_map_union(const Epetra_CrsMatrix &B, const LightweightCrsMatrix &Bimport, Epetra_Map *&unionmap, std::vector< int > &Cremotepids, std::vector< int > &Bcols2Ccols, std::vector< int > &Icols2Ccols)
 
int jacobi_A_B_reuse(double omega, const Epetra_Vector &Dinv, const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, Epetra_CrsMatrix &C)
 
int mult_A_B_reuse(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, Epetra_CrsMatrix &C, bool keep_all_hard_zeros)