EpetraExt Development
Loading...
Searching...
No Matches
EpetraExt_MatrixMatrix_mult_A_B.cpp
Go to the documentation of this file.
1//@HEADER
2// ***********************************************************************
3//
4// EpetraExt: Epetra Extended - Linear Algebra Services Package
5// Copyright (2011) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40//@HEADER
41
44
45#include <EpetraExt_MMHelpers.h>
46
48
49#include <Epetra_Export.h>
50#include <Epetra_Import.h>
51#include <Epetra_Util.h>
52#include <Epetra_Map.h>
53#include <Epetra_Comm.h>
54#include <Epetra_CrsMatrix.h>
55#include <Epetra_Vector.h>
56#include <Epetra_Directory.h>
57#include <Epetra_HashTable.h>
58#include <Epetra_Distributor.h>
59#include <Epetra_IntSerialDenseVector.h>
60
61#ifdef HAVE_VECTOR
62#include <vector>
63#endif
64
65#ifdef HAVE_MPI
66#include <Epetra_MpiDistributor.h>
67#endif
68
69
70#include <Teuchos_TimeMonitor.hpp>
71
72namespace EpetraExt {
73
74/*****************************************************************************/
75/*****************************************************************************/
76/*****************************************************************************/
77static inline int C_estimate_nnz(const Epetra_CrsMatrix & A, const Epetra_CrsMatrix &B){
78 // Follows the NZ estimate in ML's ml_matmatmult.c
79 int Aest=(A.NumMyRows()>0)? A.NumMyNonzeros()/A.NumMyRows():100;
80 int Best=(B.NumMyRows()>0)? B.NumMyNonzeros()/B.NumMyRows():100;
81
82 int nnzperrow=(int)(sqrt((double)Aest) + sqrt((double)Best) - 1);
83 nnzperrow*=nnzperrow;
84
85 return (int)(A.NumMyRows()*nnzperrow*0.75 + 100);
86}
87
88// Commented out unused, file-local function.
89#if 0
90/*****************************************************************************/
91/*****************************************************************************/
92/*****************************************************************************/
93static inline int auto_resize(std::vector<int> &x,int num_new){
94 int newsize=x.size() + EPETRA_MAX((int)x.size(),num_new);
95 x.resize(newsize);
96 return newsize;
97}
98#endif // 0
99
100/*****************************************************************************/
101/*****************************************************************************/
102/*****************************************************************************/
103template<typename int_type>
104int aztecoo_and_ml_compatible_map_union(const Epetra_CrsMatrix &B, const LightweightCrsMatrix &Bimport, Epetra_Map*& unionmap, std::vector<int>& Cremotepids,
105 std::vector<int> &Bcols2Ccols, std::vector<int> &Icols2Ccols)
106{
107#ifdef HAVE_MPI
108
109#ifdef ENABLE_MMM_TIMINGS
110 using Teuchos::TimeMonitor;
111 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 1")));
112#endif
113
114 // So we need to merge the ColMap of B and the TargetMap of Bimport in an AztecOO/Ifpack/ML compliant order.
115 Epetra_Util util;
116 int i,j,MyPID = B.Comm().MyPID(), NumProc = B.Comm().NumProc();
117 int Bstart=0, Istart=0, Cstart=0,Pstart=0;
118
119 const Epetra_Map & BColMap = B.ColMap();
120 const Epetra_Map & DomainMap = B.DomainMap();
121 const LightweightMap & IColMap = Bimport.ColMap_;
122
123 int Nb = BColMap.NumMyElements();
124 int_type * Bgids = 0;
125 BColMap.MyGlobalElementsPtr(Bgids);
126 int Ni = IColMap.NumMyElements();
127 int_type * Igids = 0;
128 if(Ni>0)
129 IColMap.MyGlobalElementsPtr(Igids);
130
131 if((int)Bcols2Ccols.size() != Nb) Bcols2Ccols.resize(Nb);
132 if((int)Icols2Ccols.size() != Ni) Icols2Ccols.resize(Ni);
133
134 // Since we're getting called, we know we have to be using an MPI implementation of Epetra.
135 // Which means we should have an MpiDistributor for both B and Bimport.
136 // Unless all of B's columns are owned by the calling proc (e.g. MueLu for A*Ptent w/ uncoupled aggregation)
137 Epetra_MpiDistributor *Distor=0;
138 if(B.Importer()) {
139 Distor=dynamic_cast<Epetra_MpiDistributor*>(&B.Importer()->Distributor());
140 if(!Distor) EPETRA_CHK_ERR(-2);
141 }
142
143 // **********************
144 // Stage 1: Get the owning PIDs
145 // **********************
146 // Note: if B doesn't have an importer, the calling proc owns all its colids...
147 std::vector<int> Bpids(Nb), Ipids(Ni);
148 if(B.Importer()) {EPETRA_CHK_ERR(util.GetPids(*B.Importer(),Bpids,true));}
149 else Bpids.assign(Nb,-1);
150
151 if(Ni != (int) Bimport.ColMapOwningPIDs_.size()) {
152 EPETRA_CHK_ERR(-21);
153 }
154 for(i=0;i<Ni;i++){
155 Ipids[i] = (Bimport.ColMapOwningPIDs_[i]==MyPID)?(-1):(Bimport.ColMapOwningPIDs_[i]);
156 }
157
158 // **********************
159 // Stage 2: Allocate memory (make things too big)
160 // **********************
161 int Csize=Nb+Ni;
162 int Psize=Nb+Ni;
163 std::vector<int_type> Cgids(Csize);
164 Cremotepids.resize(Psize);
165
166#ifdef ENABLE_MMM_TIMINGS
167 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 2")));
168#endif
169
170 // **********************
171 // Stage 3: Local Unknowns
172 // **********************
173 if(!B.Importer() || (B.Importer()->NumSameIDs() == DomainMap.NumMyElements())) {
174 // B's colmap has all of the domain elements. We can just copy those into the start of the array.
175 DomainMap.MyGlobalElements(Cgids.size() ? &Cgids[0] : 0);
176 Cstart=DomainMap.NumMyElements();
177 Bstart=DomainMap.NumMyElements();
178
179 for(i=0; i<DomainMap.NumMyElements(); i++) Bcols2Ccols[i] = i;
180 }
181 else {
182 // There are more entries in the DomainMap than B's ColMap. So we stream through both B and Bimport for the copy.
183 int NumDomainElements = DomainMap.NumMyElements();
184 for(i = 0; i < NumDomainElements; i++) {
185 int_type GID = (int_type) DomainMap.GID64(i);
186 int LID = BColMap.LID(GID);
187 // B has this guy
188 if(LID!=-1) {
189 Bcols2Ccols[LID]=Cstart;
190 Cgids[Cstart] = GID;
191 Cstart++;
192 Bstart++;
193 }
194 else {
195 // B import has this guy
196 LID = IColMap.LID(GID);
197 if(LID!=-1) {
198 Icols2Ccols[LID]=Cstart;
199 Cgids[Cstart] = GID;
200 Cstart++;
201 }
202 }
203 }
204 }
205
206 // Now advance Ilast to the last owned unknown in Bimport
207 for(i=0,j=0; i<Ni && Ipids[i]==-1; i++) {
208 while(Cgids[j]!=Igids[i]) j++;
209 Icols2Ccols[i]=j;
210 }
211 Istart=i;
212
213
214#ifdef ENABLE_MMM_TIMINGS
215 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 3")));
216#endif
217
218
219 // **********************
220 // Stage 4: Processor-by-processor set_union
221 // **********************
222 // NOTE: Intial sizes for Btemp/Itemp from B's distributor. This should be exact for Btemp and a decent guess for Itemp
223 int initial_temp_length = 0;
224 const int * lengths_from=0;
225 if(Distor) {
226 lengths_from= Distor->LengthsFrom();
227 for(i=0; i < Distor->NumReceives(); i++) initial_temp_length += lengths_from[i];
228 }
229 else initial_temp_length=100;
230
231 std::vector<int_type> Btemp(initial_temp_length), Itemp(initial_temp_length);
232 std::vector<int> Btemp2(initial_temp_length), Itemp2(initial_temp_length);
233
234
235 while (Bstart < Nb || Istart < Ni) {
236 int Bproc=NumProc+1, Iproc=NumProc+1, Cproc;
237
238 // Find the next active processor ID
239 if(Bstart < Nb) Bproc=Bpids[Bstart];
240 if(Istart < Ni) Iproc=Ipids[Istart];
241
242 Cproc = (Bproc < Iproc)?Bproc:Iproc;
243
244 if(Bproc == Cproc && Iproc != Cproc) {
245 // Only B has this processor. Copy the data.
246 // B: Find the beginning of the next processor
247 for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
248 int Bnext=i;
249
250 // Copy data to C
251 int tCsize = Bnext-Bstart;
252 if(Btemp.size() < (size_t)tCsize) {Btemp2.resize(tCsize);}
253
254 for(i=Bstart; i<Bnext; i++) {
255 Cremotepids[i-Bstart+Pstart] = Cproc;
256 Cgids[i-Bstart+Cstart] = Bgids[i];
257 Btemp2[i-Bstart] = i;
258 }
259
260 // Sort & record reindexing
261 int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0;
262 util.Sort(true, tCsize, &Cgids[Cstart], 0, 0, 1, &Bptr2, 0, 0);
263
264 for(i=0, j=Cstart; i<tCsize; i++){
265 while(Cgids[j] != Bgids[Btemp2[i]]) j++;
266 Bcols2Ccols[Btemp2[i]] = j;
267 }
268 Cstart+=tCsize;
269 Pstart+=tCsize;
270 Bstart=Bnext;
271 }
272 else if(Bproc != Cproc && Iproc == Cproc) {
273 // Only I has this processor. Copy the data.
274 // I: Find the beginning of the next processor
275 for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
276 int Inext=i;
277
278 // Copy data to C
279 int tCsize = Inext-Istart;
280 if(Itemp.size() < (size_t)tCsize) {Itemp2.resize(tCsize);}
281
282 for(i=Istart; i<Inext; i++) {
283 Cremotepids[i-Istart+Pstart] = Cproc;
284 Cgids[i-Istart+Cstart] = Igids[i];
285 Itemp2[i-Istart] = i;
286 }
287
288 // Sort & record reindexing
289 int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
290 util.Sort(true, tCsize, &Cgids[Cstart], 0, 0, 1, &Iptr2, 0, 0);
291
292 for(i=0, j=Cstart; i<tCsize; i++){
293 while(Cgids[j] != Igids[Itemp2[i]]) j++;
294 Icols2Ccols[Itemp2[i]] = j;
295 }
296 Cstart+=tCsize;
297 Pstart+=tCsize;
298 Istart=Inext;
299 }
300 else {
301 // Both B and I have this processor, so we need to do a set_union. So we need to sort.
302 int Bnext, Inext;
303 // B: Find the beginning of the next processor
304 for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
305 Bnext=i;
306
307 // I: Find the beginning of the next processor
308 for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
309 Inext=i;
310
311 // Copy data to temp
312 int tBsize = Bnext-Bstart;
313 int tIsize = Inext-Istart;
314 // int tCsize = tBsize+tIsize;
315 if(Btemp.size() < (size_t)tBsize) {Btemp.resize(tBsize); Btemp2.resize(tBsize);}
316 if(Itemp.size() < (size_t)tIsize) {Itemp.resize(tIsize); Itemp2.resize(tIsize);}
317
318 for(i=Bstart; i<Bnext; i++) {Btemp[i-Bstart]=Bgids[i]; Btemp2[i-Bstart]=i;}
319 for(i=Istart; i<Inext; i++) {Itemp[i-Istart]=Igids[i]; Itemp2[i-Istart]=i;}
320
321 // Sort & set_union
322 int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0; int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
323 util.Sort(true, tBsize, Btemp.size() ? &Btemp[0] : 0, 0, 0, 1, &Bptr2, 0, 0);
324 util.Sort(true, tIsize, Itemp.size() ? &Itemp[0] : 0, 0, 0, 1, &Iptr2, 0, 0);
325 typename std::vector<int_type>::iterator mycstart = Cgids.begin()+Cstart;
326 typename std::vector<int_type>::iterator last_el=std::set_union(Btemp.begin(),Btemp.begin()+tBsize,Itemp.begin(),Itemp.begin()+tIsize,mycstart);
327
328 for(i=0, j=Cstart; i<tBsize; i++){
329 while(Cgids[j] != Bgids[Btemp2[i]]) j++;
330 Bcols2Ccols[Btemp2[i]] = j;
331 }
332
333 for(i=0, j=Cstart; i<tIsize; i++){
334 while(Cgids[j] != Igids[Itemp2[i]]) j++;
335 Icols2Ccols[Itemp2[i]] = j;
336 }
337
338 for(i=Pstart; i<(last_el - mycstart) + Pstart; i++) Cremotepids[i]=Cproc;
339 Cstart = (last_el - mycstart) + Cstart;
340 Pstart = (last_el - mycstart) + Pstart;
341 Bstart=Bnext;
342 Istart=Inext;
343 }
344 } // end while
345
346#ifdef ENABLE_MMM_TIMINGS
347 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 4")));
348#endif
349
350 // Resize the RemotePIDs down
351 Cremotepids.resize(Pstart);
352
353 // **********************
354 // Stage 5: Call constructor
355 // **********************
356 // Make the map
357 unionmap=new Epetra_Map((int_type) -1,Cstart,Cgids.size() ? &Cgids[0] : 0, (int_type) B.ColMap().IndexBase64(),
358 B.Comm(),B.ColMap().DistributedGlobal(),(int_type) B.ColMap().MinAllGID64(),(int_type) B.ColMap().MaxAllGID64());
359 return 0;
360#else
361 return -1;
362#endif
363 }
364
365/*****************************************************************************/
366/*****************************************************************************/
367/*****************************************************************************/
368// Provide a "resize" operation for double*'s.
369inline void resize_doubles(int nold,int nnew,double*& d){
370 if(nnew > nold){
371 double *tmp = new double[nnew];
372 for(int i=0; i<nold; i++)
373 tmp[i]=d[i];
374 delete [] d;
375 d=tmp;
376 }
377}
378
379
380/*****************************************************************************/
381/*****************************************************************************/
382/*****************************************************************************/
383template<typename int_type>
385 const Epetra_CrsMatrix & B,
386 const CrsMatrixStruct& Bview,
387 std::vector<int> & Bcol2Ccol,
388 std::vector<int> & Bimportcol2Ccol,
389 std::vector<int>& Cremotepids,
391 bool keep_all_hard_zeros
392){
393#ifdef ENABLE_MMM_TIMINGS
394 using Teuchos::TimeMonitor;
395 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix SerialCore")));
396#endif
397
398 // *****************************
399 // Improved Parallel Gustavson in Local IDs
400 // *****************************
401 const Epetra_Map * colmap_C = &(C.ColMap());
402
403
404 int NumMyDiagonals=0; // Counter to speed up ESFC
405
406 int m=A.NumMyRows();
407 int n=colmap_C->NumMyElements();
408 int i,j,k;
409
410 // DataPointers for A
411 int *Arowptr, *Acolind;
412 double *Avals;
413 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
414
415 // DataPointers for B, Bimport
416 int *Browptr, *Bcolind;
417 double *Bvals;
418 EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
419
420 int *Irowptr=0, *Icolind=0;
421 double *Ivals=0;
422 if(Bview.importMatrix){
423 Irowptr = &Bview.importMatrix->rowptr_[0];
424 Icolind = (Bview.importMatrix->colind_.size()>0)?(&Bview.importMatrix->colind_[0]):0;
425 Ivals = (Bview.importMatrix->vals_.size()>0)?(&Bview.importMatrix->vals_[0]):0;
426 }
427
428 // MemorySetup: If somebody else is sharing this C's graphdata, make a new one.
429 // This is needed because I'm about to walk all over the CrsGrapData...
431
432 // The status array will contain the index into colind where this entry was last deposited.
433 // c_status[i] < CSR_ip - not in the row yet.
434 // c_status[i] >= CSR_ip, this is the entry where you can find the data
435 // We start with this filled with -1's indicating that there are no entries yet.
436 std::vector<int> c_status(n, -1);
437
438 // Classic csr assembly (low memory edition)
439 int CSR_alloc=C_estimate_nnz(A,B);
440 if(CSR_alloc < n) CSR_alloc = n;
441 int CSR_ip=0,OLD_ip=0;
444 double *& CSR_vals = C.ExpertExtractValues();
445
446 CSR_rowptr.Resize(m+1);
447 CSR_colind.Resize(CSR_alloc);
448 resize_doubles(0,CSR_alloc,CSR_vals);
449
450 // Static Profile stuff
451 std::vector<int> NumEntriesPerRow(m);
452
453 // For each row of A/C
454 for(i=0; i<m; i++){
455 bool found_diagonal=false;
456 CSR_rowptr[i]=CSR_ip;
457
458 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
459 int Ak = Acolind[k];
460 double Aval = Avals[k];
461 if(!keep_all_hard_zeros && Aval==0) continue;
462
463 if(Bview.targetMapToOrigRow[Ak] != -1){
464 // Local matrix
465 int Bk = Bview.targetMapToOrigRow[Ak];
466 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
467 int Cj=Bcol2Ccol[Bcolind[j]];
468
469 if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
470
471 if(c_status[Cj]<OLD_ip){
472 // New entry
473 c_status[Cj]=CSR_ip;
474 CSR_colind[CSR_ip]=Cj;
475 CSR_vals[CSR_ip]=Aval*Bvals[j];
476 CSR_ip++;
477 }
478 else
479 CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
480 }
481 }
482 else{
483 // Remote matrix
484 int Ik = Bview.targetMapToImportRow[Ak];
485 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
486 int Cj=Bimportcol2Ccol[Icolind[j]];
487
488 if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
489
490 if(c_status[Cj]<OLD_ip){
491 // New entry
492 c_status[Cj]=CSR_ip;
493 CSR_colind[CSR_ip]=Cj;
494 CSR_vals[CSR_ip]=Aval*Ivals[j];
495 CSR_ip++;
496 }
497 else
498 CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
499 }
500 }
501 }
502 NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
503
504 // Resize for next pass if needed
505 if(CSR_ip + n > CSR_alloc){
506 resize_doubles(CSR_alloc,2*CSR_alloc,CSR_vals);
507 CSR_alloc*=2;
508 CSR_colind.Resize(CSR_alloc);
509 }
510 OLD_ip=CSR_ip;
511 }
512
513 CSR_rowptr[m]=CSR_ip;
514
515#ifdef ENABLE_MMM_TIMINGS
516 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Final Sort")));
517#endif
518
519 // Sort the entries
520 Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
521
522#ifdef ENABLE_MMM_TIMINGS
523 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Fast IE")));
524#endif
525
526 // Do a fast build of C's importer
527 Epetra_Import * Cimport=0;
528 int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
529 int NumExports=0;
530 int *ExportLIDs=0, *ExportPIDs=0;
531 if(Bview.importMatrix) {
532 NumExports = Bview.importMatrix->ExportLIDs_.size();
533 ExportLIDs = Bview.importMatrix->ExportLIDs_.size()?&Bview.importMatrix->ExportLIDs_[0]:0;
534 ExportPIDs = Bview.importMatrix->ExportPIDs_.size()?&Bview.importMatrix->ExportPIDs_[0]:0;
535 }
536 else if(B.Importer()) {
537 // Grab the exports from B proper
538 NumExports = B.Importer()->NumExportIDs();
539 ExportLIDs = B.Importer()->ExportLIDs();
540 ExportPIDs = B.Importer()->ExportPIDs();
541 }
542
543
544 if(B.Importer() && C.ColMap().SameAs(B.ColMap()))
545 Cimport = new Epetra_Import(*B.Importer()); // Because the domain maps are the same
546 else if(!C.ColMap().SameAs(B.DomainMap()))
547 Cimport = new Epetra_Import(C.ColMap(),B.DomainMap(),Cremotepids.size(),RemotePIDs,NumExports,ExportLIDs,ExportPIDs);
548
549
550#ifdef ENABLE_MMM_TIMINGS
551 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix ESFC")));
552#endif
553
554 // Update the CrsGraphData
555 C.ExpertStaticFillComplete(B.DomainMap(),A.RangeMap(),Cimport,0,NumMyDiagonals);
556
557 return 0;
558}
559
560
561
562
563/*****************************************************************************/
564/*****************************************************************************/
565/*****************************************************************************/
566template<typename int_type>
568 const Epetra_CrsMatrix & B,
569 CrsMatrixStruct& Bview,
570 std::vector<int> & Bcol2Ccol,
571 std::vector<int> & Bimportcol2Ccol,
573 bool keep_all_hard_zeros){
574
575#ifdef ENABLE_MMM_TIMINGS
576 using Teuchos::TimeMonitor;
577 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse SerialCore")));
578#endif
579
580 // *****************************
581 // Improved Parallel Gustavson in Local IDs
582 // *****************************
583 const Epetra_Map * colmap_C = &(C.ColMap());
584
585 int m=A.NumMyRows();
586 int n=colmap_C->NumMyElements();
587 int i,j,k;
588
589 // DataPointers for A
590 int *Arowptr, *Acolind;
591 double *Avals;
592 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
593
594 // DataPointers for B, Bimport
595 int *Browptr, *Bcolind;
596 double *Bvals;
597 EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
598
599 int *Irowptr=0, *Icolind=0;
600 double *Ivals=0;
601 if(Bview.importMatrix){
602 Irowptr = &Bview.importMatrix->rowptr_[0];
603 Icolind = &Bview.importMatrix->colind_[0];
604 Ivals = &Bview.importMatrix->vals_[0];
605 }
606
607 // DataPointers for C
608 int *CSR_rowptr, *CSR_colind;
609 double *CSR_vals;
610 EPETRA_CHK_ERR(C.ExtractCrsDataPointers(CSR_rowptr,CSR_colind,CSR_vals));
611
612
613 // The status array will contain the index into colind where this dude was last deposited.
614 // c_status[i] < CSR_ip - not in the row yet.
615 // c_status[i] >= CSR_ip, this is the entry where you can find the data
616 // We start with this filled with -1's indicating that there are no entries yet.
617 std::vector<int> c_status(n, -1);
618
619 // Classic csr assembly
620 int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
621 int CSR_ip=0,OLD_ip=0;
622
623 // For each row of A/C
624 for(i=0; i<m; i++){
625 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
626 int Ak=Acolind[k];
627 double Aval = Avals[k];
628 if(!keep_all_hard_zeros && Aval==0) continue;
629
630 if(Bview.targetMapToOrigRow[Ak] != -1){
631 // Local matrix
632 int Bk = Bview.targetMapToOrigRow[Ak];
633 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
634 int Cj=Bcol2Ccol[Bcolind[j]];
635
636 if(c_status[Cj]<OLD_ip){
637 // New entry
638 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
639 c_status[Cj]=CSR_ip;
640 CSR_colind[CSR_ip]=Cj;
641 CSR_vals[CSR_ip]=Aval*Bvals[j];
642 CSR_ip++;
643 }
644 else
645 CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
646 }
647 }
648 else{
649 // Remote matrix
650 int Ik = Bview.targetMapToImportRow[Ak];
651 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
652 int Cj=Bimportcol2Ccol[Icolind[j]];
653
654 if(c_status[Cj]<OLD_ip){
655 // New entry
656 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
657 c_status[Cj]=CSR_ip;
658 CSR_colind[CSR_ip]=Cj;
659 CSR_vals[CSR_ip]=Aval*Ivals[j];
660 CSR_ip++;
661 }
662 else
663 CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
664 }
665 }
666 }
667 OLD_ip=CSR_ip;
668 }
669
670#ifdef ENABLE_MMM_TIMINGS
671 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse Final Sort")));
672#endif
673
674 // Sort the entries
675 Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
676
677 return 0;
678}
679
680
681
682/*****************************************************************************/
683/*****************************************************************************/
684/*****************************************************************************/
685//kernel method for computing the local portion of C = A*B
686template<typename int_type>
688 CrsMatrixStruct & Aview,
689 const Epetra_CrsMatrix & B,
690 CrsMatrixStruct& Bview,
692 bool call_FillComplete_on_result,
693 bool keep_all_hard_zeros)
694{
695 int C_firstCol = Bview.colMap->MinLID();
696 int C_lastCol = Bview.colMap->MaxLID();
697
698 int C_firstCol_import = 0;
699 int C_lastCol_import = -1;
700
701 int_type* bcols = 0;
702 Bview.colMap->MyGlobalElementsPtr(bcols);
703 int_type* bcols_import = NULL;
704 if (Bview.importMatrix != NULL) {
705 C_firstCol_import = Bview.importMatrix->ColMap_.MinLID();
706 C_lastCol_import = Bview.importMatrix->ColMap_.MaxLID();
707 Bview.importMatrix->ColMap_.MyGlobalElementsPtr(bcols_import);
708 }
709
710 int C_numCols = C_lastCol - C_firstCol + 1;
711 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
712
713 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
714
715 // Allocate workspace memory
716 double* dwork = new double[C_numCols];
717 int_type* iwork = new int_type[C_numCols];
718 int_type *c_cols=iwork;
719 double *c_vals=dwork;
720 int *c_index=new int[C_numCols];
721
722 int i, j, k;
723 bool C_filled=C.Filled();
724
725 // DataPointers for A
726 int *Arowptr, *Acolind;
727 double *Avals;
728 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
729
730 //To form C = A*B we're going to execute this expression:
731 //
732 // C(i,j) = sum_k( A(i,k)*B(k,j) )
733 //
734 //Our goal, of course, is to navigate the data in A and B once, without
735 //performing searches for column-indices, etc.
736
737 // Mark indices as empty w/ -1
738 for(k=0;k<C_numCols;k++) c_index[k]=-1;
739
740 //loop over the rows of A.
741 for(i=0; i<A.NumMyRows(); ++i) {
742
743 int_type global_row = (int_type) Aview.rowMap->GID64(i);
744
745 //loop across the i-th row of A and for each corresponding row
746 //in B, loop across colums and accumulate product
747 //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words,
748 //as we stride across B(k,:) we're calculating updates for row i of the
749 //result matrix C.
750
751 /* Outline of the revised, ML-inspired algorithm
752
753 C_{i,j} = \sum_k A_{i,k} B_{k,j}
754
755 This algorithm uses a "middle product" formulation, with the loop ordering of
756 i, k, j. This means we compute a row of C at a time, but compute partial sums of
757 each entry in row i until we finish the k loop.
758
759 This algorithm also has a few twists worth documenting.
760
761 1) The first major twist involves the c_index, c_cols and c_vals arrays. The arrays c_cols
762 and c_vals store the *local* column index and values accumulator respectively. These
763 arrays are allocated to a size equal to the max number of local columns in C, namely C_numcols.
764 The value c_current tells us how many non-zeros we currently have in this row.
765
766 So how do we take a LCID and find the right accumulator? This is where the c_index array
767 comes in. At the start (and stop) and the i loop, c_index is filled with -1's. Now
768 whenever we find a LCID in the k loop, we first loop at c_index[lcid]. If this value is
769 -1 we haven't seen this entry yet. In which case we add the appropriate stuff to c_cols
770 and c_vals and then set c_index[lcid] to the location of the accumulator (c_current before
771 we increment it). If the value is NOT -1, this tells us the location in the c_vals/c_cols
772 arrays (namely c_index[lcid]) where our accumulator lives.
773
774 This trick works because we're working with local ids. We can then loop from 0 to c_current
775 and reset c_index to -1's when we're done, only touching the arrays that have changed.
776 While we're at it, we can switch to the global ids so we can call [Insert|SumInto]GlobalValues.
777 Thus, the effect of this trick is to avoid passes over the index array.
778
779 2) The second major twist involves handling the remote and local components of B separately.
780 (ML doesn't need to do this, because its local ordering scheme is consistent between the local
781 and imported components of B.) Since they have different column maps, they have inconsistent
782 local column ids. This means the "second twist" won't work as stated on both matrices at the
783 same time. While this could be handled any number of ways, I have chosen to do the two parts
784 of B separately to make the code easier to read (and reduce the memory footprint of the MMM).
785 */
786
787 // Local matrix: Zero Current counts for matrix
788 int c_current=0;
789
790 // Local matrix: Do the "middle product"
791 for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
792 int Ak = Acolind[k];
793 double Aval = Avals[k];
794 // We're skipping remote entries on this pass.
795 if(Bview.remote[Ak] || (!keep_all_hard_zeros && Aval==0)) continue;
796
797 int* Bcol_inds = Bview.indices[Ak];
798 double* Bvals_k = Bview.values[Ak];
799
800 for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
801 int col=Bcol_inds[j];
802 if(c_index[col]<0){
803 // We haven't seen this entry before; add it. (In ML, on
804 // the first pass, you haven't seen any of the entries
805 // before, so they are added without the check. Not sure
806 // how much more efficient that would be; depends on branch
807 // prediction. We've favored code readability here.)
808 c_cols[c_current]=col;
809 c_vals[c_current]=Aval*Bvals_k[j];
810 c_index[col]=c_current;
811 c_current++;
812 }
813 else{
814 // We've already seen this entry; accumulate it.
815 c_vals[c_index[col]]+=Aval*Bvals_k[j];
816 }
817 }
818 }
819 // Local matrix: Reset c_index and switch c_cols to GIDs
820 for(k=0; k<c_current; k++){
821 c_index[c_cols[k]]=-1;
822 c_cols[k]=bcols[c_cols[k]]; // Switch from local to global IDs.
823 }
824 // Local matrix: Insert.
825 //
826 // We should check global error results after the algorithm is
827 // through. It's probably safer just to let the algorithm run all
828 // the way through before doing this, since otherwise we have to
829 // remember to free all allocations carefully.
830 //
831 // FIXME (mfh 27 Mar 2015) This code collects error codes, but
832 // doesn't do anything with them. This results in build warnings
833 // (set but unused variable). Thus, I'm commenting out error code
834 // collection for now.
835#if 0
836 int err = C_filled ?
837 C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols)
838 :
839 C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
840#else
841 if (C_filled) {
842 C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols);
843 } else {
844 C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
845 }
846#endif // 0
847
848 // Remote matrix: Zero current counts again for matrix
849 c_current=0;
850
851 // Remote matrix: Do the "middle product"
852 for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
853 int Ak = Acolind[k];
854 double Aval = Avals[k];
855 // We're skipping local entries on this pass.
856 if(!Bview.remote[Ak] || Aval==0) continue;
857
858 int* Bcol_inds = Bview.indices[Ak];
859 double* Bvals_k = Bview.values[Ak];
860
861 for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
862 int col=Bcol_inds[j];
863 if(c_index[col]<0){
864 c_cols[c_current]=col;
865 c_vals[c_current]=Aval*Bvals_k[j];
866 c_index[col]=c_current;
867 c_current++;
868 }
869 else{
870 c_vals[c_index[col]]+=Aval*Bvals_k[j];
871 }
872 }
873 }
874 // Remote matrix: Reset c_index and switch c_cols to GIDs
875 for(k=0; k<c_current; k++){
876 c_index[c_cols[k]]=-1;
877 c_cols[k]=bcols_import[c_cols[k]];
878 }
879 // Remove matrix: Insert
880 //
881 // See above (on error handling).
882#if 0
883 err = C_filled ?
884 C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols)
885 :
886 C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
887#else
888 if (C_filled) {
889 C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols);
890 } else {
891 C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
892 }
893#endif // 0
894 }
895
896 // Since Multiply won't do this
897 if(call_FillComplete_on_result)
899
900 delete [] dwork;
901 delete [] iwork;
902 delete [] c_index;
903 return(0);
904}
905
906
907
908
909
910/*****************************************************************************/
911/*****************************************************************************/
912/*****************************************************************************/
913template<typename int_type>
914int MatrixMatrix::Tmult_A_B(const Epetra_CrsMatrix & A,
915 CrsMatrixStruct & Aview,
916 const Epetra_CrsMatrix & B,
917 CrsMatrixStruct& Bview,
919 bool call_FillComplete_on_result,
920 bool keep_all_hard_zeros){
921
922 int i,rv;
923 Epetra_Map* mapunion = 0;
924 const Epetra_Map * colmap_B = &(B.ColMap());
925 const Epetra_Map * colmap_C = &(C.ColMap());
926
927 std::vector<int> Cremotepids;
928 std::vector<int> Bcol2Ccol(B.ColMap().NumMyElements());
929 std::vector<int> Bimportcol2Ccol;
930 if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
931
932#ifdef ENABLE_MMM_TIMINGS
933 using Teuchos::TimeMonitor;
934 Teuchos::RCP<Teuchos::TimeMonitor> MM;
935#endif
936
937 // If the user doesn't want us to call FillComplete, use the general routine
938 if(!call_FillComplete_on_result) {
939#ifdef ENABLE_MMM_TIMINGS
940 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM General Multiply")));
941#endif
942 rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,false,keep_all_hard_zeros);
943 return rv;
944 }
945
946 // Is this a "clean" matrix
947 bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
948
949 // Does ExtractCrsDataPointers work?
950 int *C_rowptr, *C_colind;
951 double * C_vals;
952 C.ExtractCrsDataPointers(C_rowptr,C_colind,C_vals);
953 bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
954
955 // It's a new matrix that hasn't been fill-completed, use the general routine
956 if(!NewFlag && ExtractFailFlag){
957#ifdef ENABLE_MMM_TIMINGS
958 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM General Multiply")));
959#endif
960 rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result,keep_all_hard_zeros);
961 return rv;
962 }
963
964
965#ifdef ENABLE_MMM_TIMINGS
966 if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix CMap")));
967 else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse CMap")));
968#endif
969
970 // If new, build & clobber a colmap for C
971 if(NewFlag){
972 if(Bview.importMatrix) {
973 EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
974 EPETRA_CHK_ERR( C.ReplaceColMap(*mapunion) );
975 }
976 else {
977 EPETRA_CHK_ERR( C.ReplaceColMap(B.ColMap()) );
978 for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i;
979
980 // Copy B's remote list (if any)
981 if(B.Importer())
982 EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*B.Importer(),Cremotepids));
983 }
984 }
985
986#ifdef ENABLE_MMM_TIMINGS
987 if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Lookups")));
988 else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse Lookups")));
989#endif
990
991 // ********************************************
992 // Setup Bcol2Ccol / Bimportcol2Ccol lookups
993 // ********************************************
994 // Note: If we ran the map_union, we have this information already
995
996 if(!NewFlag) {
997 if(colmap_B->SameAs(*colmap_C)){
998 // Maps are the same: Use local IDs as the hash
999 for(i=0;i<colmap_B->NumMyElements();i++)
1000 Bcol2Ccol[i]=i;
1001 }
1002 else {
1003 // Maps are not the same: Use the map's hash
1004 for(i=0;i<colmap_B->NumMyElements();i++){
1005 Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i));
1006 if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
1007 }
1008 }
1009
1010 if(Bview.importMatrix){
1011 Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1012 for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
1013 Bimportcol2Ccol[i]=colmap_C->LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
1014 if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
1015 }
1016
1017 }
1018 }
1019#ifdef ENABLE_MMM_TIMINGS
1020 MM=Teuchos::null;
1021#endif
1022
1023 // Call the appropriate core routine
1024 if(NewFlag) {
1025 EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C,keep_all_hard_zeros));
1026 }
1027 else {
1028 // This always has a real map
1029 EPETRA_CHK_ERR(mult_A_B_reuse<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C,keep_all_hard_zeros));
1030 }
1031
1032 // Cleanup
1033 delete mapunion;
1034 return 0;
1035}
1036
1037
1038/*****************************************************************************/
1039/*****************************************************************************/
1040/*****************************************************************************/
1041int MatrixMatrix::mult_A_B(const Epetra_CrsMatrix & A,
1042 CrsMatrixStruct & Aview,
1043 const Epetra_CrsMatrix & B,
1044 CrsMatrixStruct& Bview,
1046 bool call_FillComplete_on_result,\
1047 bool keep_all_hard_zeros){
1048
1049#ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1050 if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
1051 return Tmult_A_B<int>(A, Aview, B, Bview, C, call_FillComplete_on_result, keep_all_hard_zeros);
1052 }
1053 else
1054#endif
1055#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1056 if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
1057 return Tmult_A_B<long long>(A, Aview, B, Bview, C, call_FillComplete_on_result, keep_all_hard_zeros);
1058 }
1059 else
1060#endif
1061 throw "EpetraExt::MatrixMatrix::mult_A_B: GlobalIndices type unknown";
1062}
1063
1064
1065/*****************************************************************************/
1066/*****************************************************************************/
1067/*****************************************************************************/
1068template<typename int_type>
1069int jacobi_A_B_reuse(double omega,
1070 const Epetra_Vector & Dinv,
1071 const Epetra_CrsMatrix & A,
1072 const Epetra_CrsMatrix & B,
1073 CrsMatrixStruct& Bview,
1074 std::vector<int> & Bcol2Ccol,
1075 std::vector<int> & Bimportcol2Ccol,
1076 Epetra_CrsMatrix& C){
1077
1078#ifdef ENABLE_MMM_TIMINGS
1079 using Teuchos::TimeMonitor;
1080 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse SerialCore")));
1081#endif
1082
1083 // *****************************
1084 // Improved Parallel Gustavson in Local IDs
1085 // *****************************
1086 const Epetra_Map * colmap_C = &(C.ColMap());
1087
1088 int m=A.NumMyRows();
1089 int n=colmap_C->NumMyElements();
1090 int i,j,k;
1091
1092 // DataPointers for A
1093 int *Arowptr, *Acolind;
1094 double *Avals;
1095 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
1096
1097 // DataPointers for B, Bimport
1098 int *Browptr, *Bcolind;
1099 double *Bvals;
1100 EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
1101
1102 int *Irowptr=0, *Icolind=0;
1103 double *Ivals=0;
1104 if(Bview.importMatrix){
1105 Irowptr = &Bview.importMatrix->rowptr_[0];
1106 Icolind = &Bview.importMatrix->colind_[0];
1107 Ivals = &Bview.importMatrix->vals_[0];
1108 }
1109
1110 // Data pointer for Dinv
1111 const double *Dvals = Dinv.Values();
1112
1113 // DataPointers for C
1114 int *CSR_rowptr, *CSR_colind;
1115 double *CSR_vals;
1116 EPETRA_CHK_ERR(C.ExtractCrsDataPointers(CSR_rowptr,CSR_colind,CSR_vals));
1117
1118
1119 // The status array will contain the index into colind where this dude was last deposited.
1120 // c_status[i] < CSR_ip - not in the row yet.
1121 // c_status[i] >= CSR_ip, this is the entry where you can find the data
1122 // We start with this filled with -1's indicating that there are no entries yet.
1123 std::vector<int> c_status(n, -1);
1124
1125 // Classic csr assembly
1126 int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
1127 int CSR_ip=0,OLD_ip=0;
1128
1129 // For each row of C
1130 for(i=0; i<m; i++){
1131 double Dval = Dvals[i];
1132
1133 // Entries of B
1134 for(k=Browptr[i]; k<Browptr[i+1]; k++){
1135 // int Bk = Bcolind[k];
1136 double Bval = Bvals[k];
1137 if(Bval==0) continue;
1138 int Ck=Bcol2Ccol[Bcolind[k]];
1139
1140 // Assume no repeated entries in B
1141 c_status[Ck]=CSR_ip;
1142 CSR_colind[CSR_ip]=Ck;
1143 CSR_vals[CSR_ip]= Bvals[k];
1144 CSR_ip++;
1145 }
1146
1147 // Entries of -omega * Dinv * A * B
1148 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
1149 int Ak=Acolind[k];
1150 double Aval = Avals[k];
1151 if(Aval==0) continue;
1152
1153 if(Bview.targetMapToOrigRow[Ak] != -1){
1154 // Local matrix
1155 int Bk = Bview.targetMapToOrigRow[Ak];
1156 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1157 int Cj=Bcol2Ccol[Bcolind[j]];
1158
1159 if(c_status[Cj]<OLD_ip){
1160 // New entry
1161 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
1162 c_status[Cj]=CSR_ip;
1163 CSR_colind[CSR_ip]=Cj;
1164 CSR_vals[CSR_ip]= - omega * Dval * Aval * Bvals[j];
1165 CSR_ip++;
1166 }
1167 else
1168 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
1169 }
1170 }
1171 else{
1172 // Remote matrix
1173 int Ik = Bview.targetMapToImportRow[Ak];
1174 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1175 int Cj=Bimportcol2Ccol[Icolind[j]];
1176
1177 if(c_status[Cj]<OLD_ip){
1178 // New entry
1179 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
1180 c_status[Cj]=CSR_ip;
1181 CSR_colind[CSR_ip]=Cj;
1182 CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
1183 CSR_ip++;
1184 }
1185 else
1186 CSR_vals[c_status[Cj]]-=omega * Dval * Aval * Ivals[j];
1187 }
1188 }
1189 }
1190 OLD_ip=CSR_ip;
1191 }
1192
1193#ifdef ENABLE_MMM_TIMINGS
1194 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse Final Sort")));
1195#endif
1196 // Sort the entries
1197 Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
1198
1199 return 0;
1200}
1201
1202
1203/*****************************************************************************/
1204/*****************************************************************************/
1205/*****************************************************************************/
1206template<typename int_type>
1207int jacobi_A_B_newmatrix(double omega,
1208 const Epetra_Vector & Dinv,
1209 const Epetra_CrsMatrix & A,
1210 const Epetra_CrsMatrix & B,
1211 CrsMatrixStruct& Bview,
1212 std::vector<int> & Bcol2Ccol,
1213 std::vector<int> & Bimportcol2Ccol,
1214 std::vector<int>& Cremotepids,
1216{
1217#ifdef ENABLE_MMM_TIMINGS
1218 using Teuchos::TimeMonitor;
1219 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix SerialCore")));
1220#endif
1221
1222 // *****************************
1223 // Improved Parallel Gustavson in Local IDs
1224 // *****************************
1225 const Epetra_Map * colmap_C = &(C.ColMap());
1226 int NumMyDiagonals=0; // Counter to speed up ESFC
1227
1228 int m=A.NumMyRows();
1229 int n=colmap_C->NumMyElements();
1230 int i,j,k;
1231
1232 // DataPointers for A
1233 int *Arowptr, *Acolind;
1234 double *Avals;
1235 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
1236
1237 // DataPointers for B, Bimport
1238 int *Browptr, *Bcolind;
1239 double *Bvals;
1240 EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
1241
1242 // Data pointer for Dinv
1243 const double *Dvals = Dinv.Values();
1244
1245 int *Irowptr=0, *Icolind=0;
1246 double *Ivals=0;
1247 if(Bview.importMatrix){
1248 Irowptr = &Bview.importMatrix->rowptr_[0];
1249 Icolind = (Bview.importMatrix->colind_.size()>0)?(&Bview.importMatrix->colind_[0]):0;
1250 Ivals = (Bview.importMatrix->vals_.size()>0)?(&Bview.importMatrix->vals_[0]):0;
1251 }
1252
1253 // MemorySetup: If somebody else is sharing this C's graphdata, make a new one.
1254 // This is needed because I'm about to walk all over the CrsGrapData...
1256
1257 // The status array will contain the index into colind where this entry was last deposited.
1258 // c_status[i] < CSR_ip - not in the row yet.
1259 // c_status[i] >= CSR_ip, this is the entry where you can find the data
1260 // We start with this filled with -1's indicating that there are no entries yet.
1261 std::vector<int> c_status(n, -1);
1262
1263 // Classic csr assembly (low memory edition)
1264 int CSR_alloc=C_estimate_nnz(A,B);
1265 if(CSR_alloc < B.NumMyNonzeros()) CSR_alloc = B.NumMyNonzeros(); // update for Jacobi
1266 int CSR_ip=0,OLD_ip=0;
1269 double *& CSR_vals = C.ExpertExtractValues();
1270
1271 CSR_rowptr.Resize(m+1);
1272 CSR_colind.Resize(CSR_alloc);
1273 resize_doubles(0,CSR_alloc,CSR_vals);
1274
1275 // Static Profile stuff
1276 std::vector<int> NumEntriesPerRow(m);
1277
1278 // For each row of C
1279 for(i=0; i<m; i++){
1280 bool found_diagonal=false;
1281 CSR_rowptr[i]=CSR_ip;
1282 double Dval = Dvals[i];
1283
1284 // Entries of B
1285 for(k=Browptr[i]; k<Browptr[i+1]; k++){
1286 //int Bk = Bcolind[k];
1287 double Bval = Bvals[k];
1288 if(Bval==0) continue;
1289 int Ck=Bcol2Ccol[Bcolind[k]];
1290
1291 // Assume no repeated entries in B
1292 c_status[Ck]=CSR_ip;
1293 CSR_colind[CSR_ip]=Ck;
1294 CSR_vals[CSR_ip]= Bvals[k];
1295 CSR_ip++;
1296 }
1297
1298 // Entries of -omega * Dinv * A * B
1299 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
1300 int Ak = Acolind[k];
1301 double Aval = Avals[k];
1302 if(Aval==0) continue;
1303
1304 if(Bview.targetMapToOrigRow[Ak] != -1){
1305 // Local matrix
1306 int Bk = Bview.targetMapToOrigRow[Ak];
1307 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1308 int Cj=Bcol2Ccol[Bcolind[j]];
1309
1310 if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
1311
1312 if(c_status[Cj]<OLD_ip){
1313 // New entry
1314 c_status[Cj]=CSR_ip;
1315 CSR_colind[CSR_ip]=Cj;
1316 CSR_vals[CSR_ip]= - omega * Dval* Aval * Bvals[j];
1317 CSR_ip++;
1318 }
1319 else
1320 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
1321 }
1322 }
1323 else{
1324 // Remote matrix
1325 int Ik = Bview.targetMapToImportRow[Ak];
1326 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1327 int Cj=Bimportcol2Ccol[Icolind[j]];
1328
1329 if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
1330
1331 if(c_status[Cj]<OLD_ip){
1332 // New entry
1333 c_status[Cj]=CSR_ip;
1334 CSR_colind[CSR_ip]=Cj;
1335 CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
1336 CSR_ip++;
1337 }
1338 else
1339 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Ivals[j];
1340 }
1341 }
1342 }
1343 NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
1344
1345 // Resize for next pass if needed
1346 if(CSR_ip + n > CSR_alloc){
1347 resize_doubles(CSR_alloc,2*CSR_alloc,CSR_vals);
1348 CSR_alloc*=2;
1349 CSR_colind.Resize(CSR_alloc);
1350 }
1351 OLD_ip=CSR_ip;
1352 }
1353
1354 CSR_rowptr[m]=CSR_ip;
1355
1356#ifdef ENABLE_MMM_TIMINGS
1357 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix Final Sort")));
1358#endif
1359
1360 // Sort the entries
1361 Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
1362
1363#ifdef ENABLE_MMM_TIMINGS
1364 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix Fast IE")));
1365#endif
1366
1367 // Do a fast build of C's importer
1368 Epetra_Import * Cimport=0;
1369 int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
1370 int NumExports=0;
1371 int *ExportLIDs=0, *ExportPIDs=0;
1372 if(Bview.importMatrix) {
1373 NumExports = Bview.importMatrix->ExportLIDs_.size();
1374 ExportLIDs = Bview.importMatrix->ExportLIDs_.size()?&Bview.importMatrix->ExportLIDs_[0]:0;
1375 ExportPIDs = Bview.importMatrix->ExportPIDs_.size()?&Bview.importMatrix->ExportPIDs_[0]:0;
1376 }
1377 else if(B.Importer()) {
1378 // Grab the exports from B proper
1379 NumExports = B.Importer()->NumExportIDs();
1380 ExportLIDs = B.Importer()->ExportLIDs();
1381 ExportPIDs = B.Importer()->ExportPIDs();
1382 }
1383
1384 if(!C.ColMap().SameAs(B.DomainMap()))
1385 Cimport = new Epetra_Import(C.ColMap(),B.DomainMap(),Cremotepids.size(),RemotePIDs,NumExports,ExportLIDs,ExportPIDs);
1386
1387#ifdef ENABLE_MMM_TIMINGS
1388 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix ESFC")));
1389#endif
1390
1391 // Update the CrsGraphData
1392 C.ExpertStaticFillComplete(B.DomainMap(),A.RangeMap(),Cimport,0,NumMyDiagonals);
1393
1394 return 0;
1395}
1396
1397
1398
1399/*****************************************************************************/
1400/*****************************************************************************/
1401/*****************************************************************************/
1402template<typename int_type>
1403int MatrixMatrix::Tjacobi_A_B(double omega,
1404 const Epetra_Vector & Dinv,
1405 const Epetra_CrsMatrix & A,
1406 CrsMatrixStruct & /* Aview */,
1407 const Epetra_CrsMatrix & B,
1408 CrsMatrixStruct& Bview,
1410 bool call_FillComplete_on_result){
1411 int i,rv;
1412 Epetra_Map* mapunion = 0;
1413 const Epetra_Map * colmap_B = &(B.ColMap());
1414 const Epetra_Map * colmap_C = &(C.ColMap());
1415
1416 std::vector<int> Cremotepids;
1417 std::vector<int> Bcol2Ccol(B.ColMap().NumMyElements());
1418 std::vector<int> Bimportcol2Ccol;
1419 if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1420
1421#ifdef ENABLE_MMM_TIMINGS
1422 using Teuchos::TimeMonitor;
1423 Teuchos::RCP<Teuchos::TimeMonitor> MM;
1424#endif
1425
1426 // If the user doesn't want us to call FillComplete, use the general routine
1427 if(!call_FillComplete_on_result) {
1428#ifdef ENABLE_MMM_TIMINGS
1429 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi General Multiply")));
1430#endif
1431 throw std::runtime_error("jacobi_A_B_general not implemented");
1432 // rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,false);
1433 return rv;
1434 }
1435
1436 // Is this a "clean" matrix
1437 bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
1438
1439 // Does ExtractCrsDataPointers work?
1440 int *C_rowptr, *C_colind;
1441 double * C_vals;
1442 C.ExtractCrsDataPointers(C_rowptr,C_colind,C_vals);
1443 bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
1444
1445 // It's a new matrix that hasn't been fill-completed, use the general routine
1446 if(!NewFlag && ExtractFailFlag){
1447#ifdef ENABLE_MMM_TIMINGS
1448 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi General Multiply")));
1449#endif
1450 throw std::runtime_error("jacobi_A_B_general not implemented");
1451 // rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result);
1452 return rv;
1453 }
1454
1455#ifdef ENABLE_MMM_TIMINGS
1456 if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Newmatrix CMap")));
1457 else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse CMap")));
1458#endif
1459
1460 // If new, build & clobber a colmap for C
1461 if(NewFlag){
1462 if(Bview.importMatrix) {
1463 EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
1464 EPETRA_CHK_ERR( C.ReplaceColMap(*mapunion) );
1465 }
1466 else {
1467 EPETRA_CHK_ERR( C.ReplaceColMap(B.ColMap()) );
1468 for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i;
1469
1470 // Copy B's remote list (if any)
1471 if(B.Importer())
1472 EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*B.Importer(),Cremotepids));
1473 }
1474 }
1475
1476#ifdef ENABLE_MMM_TIMINGS
1477 if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Newmatrix Lookups")));
1478 else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse Lookups")));
1479#endif
1480
1481 // ********************************************
1482 // Setup Bcol2Ccol / Bimportcol2Ccol lookups
1483 // ********************************************
1484 // Note: If we ran the map_union, we have this information already
1485
1486 if(!NewFlag) {
1487 if(colmap_B->SameAs(*colmap_C)){
1488 // Maps are the same: Use local IDs as the hash
1489 for(i=0;i<colmap_B->NumMyElements();i++)
1490 Bcol2Ccol[i]=i;
1491 }
1492 else {
1493 // Maps are not the same: Use the map's hash
1494 for(i=0;i<colmap_B->NumMyElements();i++){
1495 Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i));
1496 if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
1497 }
1498 }
1499
1500 if(Bview.importMatrix){
1501 Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1502 for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
1503 Bimportcol2Ccol[i]=colmap_C->LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
1504 if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
1505 }
1506
1507 }
1508 }
1509
1510
1511 // Call the appropriate core routine
1512 if(NewFlag) {
1513 EPETRA_CHK_ERR(jacobi_A_B_newmatrix<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C));
1514 }
1515 else {
1516 // This always has a real map
1517 EPETRA_CHK_ERR(jacobi_A_B_reuse<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C));
1518 }
1519
1520 // Cleanup
1521 delete mapunion;
1522 return 0;
1523}
1524
1525
1526/*****************************************************************************/
1527/*****************************************************************************/
1528/*****************************************************************************/
1529int MatrixMatrix::jacobi_A_B(double omega,
1530 const Epetra_Vector & Dinv,
1531 const Epetra_CrsMatrix & A,
1532 CrsMatrixStruct & Aview,
1533 const Epetra_CrsMatrix & B,
1534 CrsMatrixStruct& Bview,
1536 bool call_FillComplete_on_result)
1537{
1538#ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1539 if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
1540 return Tjacobi_A_B<int>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
1541 }
1542 else
1543#endif
1544#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1545 if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
1546 return Tjacobi_A_B<long long>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
1547 }
1548 else
1549#endif
1550 throw "EpetraExt::MatrixMatrix::jacobi_A_B: GlobalIndices type unknown";
1551}
1552
1553
1554
1555/*****************************************************************************/
1556/*****************************************************************************/
1557/*****************************************************************************/
1558template<typename int_type>
1559int MatrixMatrix::Tmult_AT_B_newmatrix(const CrsMatrixStruct & Atransview, const CrsMatrixStruct & Bview, Epetra_CrsMatrix & C,bool keep_all_hard_zeros) {
1560 using Teuchos::RCP;
1561 using Teuchos::rcp;
1562
1563#ifdef ENABLE_MMM_TIMINGS
1564 using Teuchos::TimeMonitor;
1565 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T Transpose")));
1566#endif
1567
1568
1569 /*************************************************************/
1570 /* 2/3) Call mult_A_B_newmatrix w/ fillComplete */
1571 /*************************************************************/
1572#ifdef ENABLE_MMM_TIMINGS
1573 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T AB-core")));
1574#endif
1575 RCP<Epetra_CrsMatrix> Ctemp;
1576
1577 // If Atrans has no Exporter, we can use C instead of having to create a temp matrix
1578 bool needs_final_export = Atransview.origMatrix->Exporter() != 0;
1579 if(needs_final_export)
1580 Ctemp = rcp(new Epetra_CrsMatrix(Copy,Atransview.origMatrix->RowMap(),Bview.origMatrix->ColMap(),0));
1581 else {
1582 EPETRA_CHK_ERR( C.ReplaceColMap(Bview.origMatrix->ColMap()) );
1583 Ctemp = rcp(&C,false);// don't allow deallocation
1584 }
1585
1586 // Multiply
1587 std::vector<int> Bcol2Ccol(Bview.origMatrix->NumMyCols());
1588 for(int i=0; i<Bview.origMatrix->NumMyCols(); i++)
1589 Bcol2Ccol[i]=i;
1590 std::vector<int> Bimportcol2Ccol,Cremotepids;
1591 if(Bview.origMatrix->Importer())
1592 EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*Bview.origMatrix->Importer(),Cremotepids));
1593
1594 EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(*Atransview.origMatrix,*Bview.origMatrix,Bview,
1595 Bcol2Ccol,Bimportcol2Ccol,Cremotepids,
1596 *Ctemp,keep_all_hard_zeros));
1597
1598 /*************************************************************/
1599 /* 4) ExportAndFillComplete matrix (if needed) */
1600 /*************************************************************/
1601#ifdef ENABLE_MMM_TIMINGS
1602 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T ESFC")));
1603#endif
1604
1605 if(needs_final_export)
1606 C.FusedExport(*Ctemp,*Ctemp->Exporter(),&Bview.origMatrix->DomainMap(),&Atransview.origMatrix->RangeMap(),false);
1607
1608 return 0;
1609}
1610
1611
1612
1613/*****************************************************************************/
1614/*****************************************************************************/
1615/*****************************************************************************/
1616int MatrixMatrix::mult_AT_B_newmatrix(const CrsMatrixStruct & Atransview, const CrsMatrixStruct & Bview, Epetra_CrsMatrix & C, bool keep_all_hard_zeros) {
1617
1618#ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1619 if(Atransview.origMatrix->RowMap().GlobalIndicesInt() && Bview.origMatrix->RowMap().GlobalIndicesInt()) {
1620 return Tmult_AT_B_newmatrix<int>(Atransview,Bview,C,keep_all_hard_zeros);
1621 }
1622 else
1623#endif
1624#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1625 if(Atransview.origMatrix->RowMap().GlobalIndicesLongLong() && Bview.origMatrix->RowMap().GlobalIndicesLongLong()) {
1626 return Tmult_AT_B_newmatrix<long long>(Atransview,Bview,C,keep_all_hard_zeros);
1627 }
1628 else
1629#endif
1630 throw "EpetraExt::MatrixMatrix::mult_AT_B_newmatrix: GlobalIndices type unknown";
1631}
1632
1633
1634
1635}//namespace EpetraExt
int MyGlobalElements(int *MyGlobalElementList) const
int LID(int GID) const
int NumMyElements() const
bool Filled() const
virtual int SumIntoGlobalValues(int GlobalRow, int NumEntries, const double *Values, const int *Indices)
const Epetra_Map & RowMap() const
virtual int InsertGlobalValues(int GlobalRow, int NumEntries, const double *Values, const int *Indices)
int FillComplete(bool OptimizeDataStorage=true)
int ExpertStaticFillComplete(const Epetra_Map &DomainMap, const Epetra_Map &RangeMap, const Epetra_Import *Importer=0, const Epetra_Export *Exporter=0, int NumMyDiagonals=-1)
int NumMyNonzeros() const
int ReplaceColMap(const Epetra_BlockMap &newmap)
const Epetra_Comm & Comm() const
const Epetra_Map & RangeMap() const
int ExtractCrsDataPointers(int *&IndexOffset, int *&Indices, double *&Values_in) const
const Epetra_Map & ColMap() const
Epetra_IntSerialDenseVector & ExpertExtractIndexOffset()
bool IndicesAreLocal() const
int ExpertMakeUniqueCrsGraphData()
const Epetra_Map & DomainMap() const
int NumMyRows() const
const Epetra_Import * Importer() const
Epetra_IntSerialDenseVector & ExpertExtractIndices()
bool IndicesAreGlobal() const
double *& ExpertExtractValues()
static int SortCrsEntries(int NumRows, const int *CRS_rowptr, int *CRS_colind, double *CRS_vals)
static int GetRemotePIDs(const Epetra_Import &Importer, std::vector< int > &RemotePIDs)
EpetraExt::BlockCrsMatrix: A class for constructing a distributed block matrix.
void resize_doubles(int nold, int nnew, double *&d)
int jacobi_A_B_newmatrix(double omega, const Epetra_Vector &Dinv, const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, std::vector< int > &Cremotepids, Epetra_CrsMatrix &C)
int mult_A_B_general(const Epetra_CrsMatrix &A, CrsMatrixStruct &Aview, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, Epetra_CrsMatrix &C, bool call_FillComplete_on_result, bool keep_all_hard_zeros)
int mult_A_B_newmatrix(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, const CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, std::vector< int > &Cremotepids, Epetra_CrsMatrix &C, bool keep_all_hard_zeros)
static int C_estimate_nnz(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B)
int aztecoo_and_ml_compatible_map_union(const Epetra_CrsMatrix &B, const LightweightCrsMatrix &Bimport, Epetra_Map *&unionmap, std::vector< int > &Cremotepids, std::vector< int > &Bcols2Ccols, std::vector< int > &Icols2Ccols)
int jacobi_A_B_reuse(double omega, const Epetra_Vector &Dinv, const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, Epetra_CrsMatrix &C)
int mult_A_B_reuse(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, Epetra_CrsMatrix &C, bool keep_all_hard_zeros)