Amesos Package Browser (Single Doxygen Collection) Development
Loading...
Searching...
No Matches
Superludist2_OO.cpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Amesos: Direct Sparse Solver Package
5// Copyright (2004) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// This library is free software; you can redistribute it and/or modify
11// it under the terms of the GNU Lesser General Public License as
12// published by the Free Software Foundation; either version 2.1 of the
13// License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23// USA
24// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
25//
26// ***********************************************************************
27// @HEADER
28
29#define KLOPTER
30#include "Superludist2_OO.h"
31#ifdef EPETRA_MPI
32#include "Epetra_MpiComm.h"
33#else
34 This code cannot be compiled without mpi.h.
35#include "Epetra_Comm.h"
36#endif
37#include "Epetra_Map.h"
38#include "Epetra_LocalMap.h"
39#include "Epetra_Vector.h"
40#include "Epetra_RowMatrix.h"
41#include "Epetra_Operator.h"
42#include "Epetra_Import.h"
43#include "Epetra_Export.h"
44#include "Epetra_CrsMatrix.h"
45#include "CrsMatrixTranspose.h"
46#include <vector>
47
48#ifdef DEBUG
49#include "Comm_assert_equal.h"
50 // #include "CrsMatricesAreIdentical.h"
51#endif
52#ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
53#include "ExtractCrsFromRowMatrix.h"
54#endif
55/*
56 Returns the largest number of rows that allows NumProcs to be used within a
57 rectangular grid with no more rows than columns.
58
59 i.e. max i such that there exists j > i such that i*j = NumProcs
60*/
61int SLU_NumProcRows( int NumProcs ) {
62#ifdef TFLOP
63 // Else, parameter 6 of DTRSV CTNLU is incorrect
64 return 1;
65#else
66 int i;
67 int NumProcRows ;
68 for ( i = 1; i*i <= NumProcs; i++ )
69 ;
70 bool done = false ;
71 for ( NumProcRows = i-1 ; done == false ; ) {
72 int NumCols = NumProcs / NumProcRows ;
73 if ( NumProcRows * NumCols == NumProcs )
74 done = true;
75 else
76 NumProcRows-- ;
77 }
78 return NumProcRows;
79#endif
80}
81
82//=============================================================================
83Superludist2_OO::Superludist2_OO(const Epetra_LinearProblem &prob ) {
84 // AllocAzArrays();
85
86 Problem_ = &prob ;
87 Transpose_ = false;
88 A_and_LU_built = false ;
89 Factored_ = false ;
90 FirstCallToSolve_ = true ;
91 //
92 // The following are initialized just on general principle
93 //
94 numprocs = -13 ;
95 nprow = -13 ;
96 npcol = -13 ;
97 numrows = -13 ;
98
99}
100
101//=============================================================================
103 // DeleteMemory();
104 // DeleteAzArrays();
105
106 if ( A_and_LU_built ) {
107 // Destroy_CompRowLoc_Matrix_dist(&A);
108 SUPERLU_FREE( A.Store );
109 ScalePermstructFree(&ScalePermstruct);
110 Destroy_LU(numrows, &grid, &LUstruct);
111 LUstructFree(&LUstruct);
112 if ( options.SolveInitialized ) {
113 dSolveFinalize(&options, &SOLVEstruct ) ;
114 }
115 superlu_gridexit(&grid);
116 }
117
118}
119
120//=============================================================================
121
122//
123// Solve() uses several intermediate matrices to convert the input matrix
124// to one that we can pass to the Sparse Direct Solver
125//
126// Epetra_RowMatrix *RowMatrixA - The input matrix
127// Epetra_CrsMatrix *CastCrsMatrixA - The input matrix casted to a crs matrix
128// Epetra_CrsMatrix ExtractCrsMatrixA - Converted to a Crs matrix
129// (Unused if RowMatrix is an Epetra_CrsMatrix)
130// Epetra_CrsMatrix *Phase2Mat - Guaranteed to be a CrsMatrix
131// Epetra_CrsMatrix SerialCrsMatrixA - Phase2Mat coalesced to one process
132// Epetra_CrsMatrix *Phase3Mat - A pseudo-distributed CrsMatrix
133// (i.e. stored exclusively on one process)
134// Epetra_CrsMatrix Phase3MatTrans - The transpose of Phase3Mat
135// Epetra_CrsMatrix *Phase4Mat - A pseudo-serial CrsMatrix with the
136// proper transposition
137// Epetra_CrsMatrix Phase5Mat - A replicated CrsMatrix with the
138// proper transposition
139//
140// This is what the code does:
141// Step 1) Convert the matrix to an Epetra_CrsMatrix
142// Step 2) Coalesce the matrix onto process 0
143// Step 3) Transpose the matrix
144// Step 4) Replicate the matrix
145// Step 5) Convert vector b to a replicated vector
146// Step 6) Convert the matrix to Ap, Ai, Aval
147// Step 7) Call SuperLUdist
148// Step 8) Convert vector x back to a distributed vector
149//
150// Remaining tasks:
151// 1) I still need to make it possible for Superludist2_OO to accept a
152// replicated matrix without using any additional memory.
153// I believe that all that I need is to move the definition
154// of ExtractCrsMatrixA, SerialCrsMatrixA and Phase3MatTrans up
155// to the top of the code and add a conditional which tests to
156// see if RowMatrixA is actually in the exact format that we need,
157// and if so, skip all the matrix transformations. Also, Phase5Mat
158// needs to be changed to Phase4Replicated with an added pointer, named
159// *Phase5Mat.
160// 2) Can we handle a transposed matrix any cleaner? We build Ap,
161// Ai and Avalues - can we build that as a transpose more efficiently
162// than doing a full CrsMatrix to CrsMatrix transpose?
163//
164// Memory usage:
165// ExtractCrsMatrixA - 1 if RowMAtrixA is not a CrsMatrix
166// SerialCrsMatrixA - 1 if RowMatrixA is not a serial matrix
167// Phase3MatTrans - 1 if RowMatrixA unless a transpose solve is requested
168// Phase5Mat - 1
169// If we need SerialCrsMAttrixA, then ExtractCrsMatrixA will not be a
170// serial matrix, hence three extra serial copies is the maximum.
171//
172
173#undef EPETRA_CHK_ERR
174#define EPETRA_CHK_ERR(xxx) assert( (xxx) == 0 )
175
176int Superludist2_OO::Solve(bool factor) {
177 //
178 // I am going to put these here until I determine that I need them in
179 // Superludist2_OO.h
180 //
181
182
183 bool CheckExtraction = false; // Set to true to force extraction for unit test
184
185 assert( GetTrans() == false ) ;
186
187 Epetra_RowMatrix *RowMatrixA =
188 dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
189
190 EPETRA_CHK_ERR( RowMatrixA == 0 ) ;
191
192 Epetra_CrsMatrix *CastCrsMatrixA = dynamic_cast<Epetra_CrsMatrix*>(RowMatrixA) ;
193#ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
194 Epetra_CrsMatrix *ExtractCrsMatrixA = 0;
195#endif
196 Epetra_CrsMatrix *Phase2Mat = 0 ;
197 const Epetra_Comm &Comm = RowMatrixA->Comm();
198
199 int iam = Comm.MyPID() ;
200#if 0
201 //
202 // The following lines allow us time to attach the debugger
203 //
204 int hatever;
205 if ( iam == 0 ) cin >> hatever ;
206 Comm.Barrier();
207#endif
208
209
210 //
211 // Step 1) Convert the matrix to an Epetra_CrsMatrix
212 //
213 // If RowMatrixA is not a CrsMatrix, i.e. the dynamic cast fails,
214 // extract a CrsMatrix from the RowMatrix.
215 //
216 if ( CastCrsMatrixA != 0 && ! CheckExtraction ) {
217 Phase2Mat = CastCrsMatrixA ;
218 } else {
219#ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
220 ExtractCrsMatrixA = new Epetra_CrsMatrix( *RowMatrixA ) ;
221
222 Phase2Mat = ExtractCrsMatrixA ;
223#ifdef DEBUG
224 if ( CheckExtraction )
225 assert( CrsMatricesAreIdentical( CastCrsMatrixA, ExtractCrsMatrixA ) ) ;
226#endif
227#else
228 assert( false ) ;
229#endif
230 }
231 assert( Phase2Mat != NULL ) ;
232 const Epetra_Map &Phase2Matmap = Phase2Mat->RowMap() ;
233
234 //
235 // Old Glossary:
236 // numrows, numcols = m,n, i.e. the size of the full, global, matrix
237 // m_loc = the number of rows owned by this process
238 // nnz_loc = the number of non zeros in the rows owned by this process
239 // Ap, Aval, Ai, = rowptr, nzval, colind, = sparse row storage
240 // MyRowPtr = a redundant computation of Ap (rowptr)
241
242 //
243 // Compute nnz_loc, m_loc, and MyRowPtr from the map
244 //
245
246#if 0
247 cout << " A Comm.NumProc() = " << Comm.NumProc() << endl ;
248
249 cout << " true = " << true << " LInMap = " << Phase2Matmap.LinearMap() << endl ; // Map must be contiguously divided
250 cout << " Superludist2_OO.cpp:: traceback mode = " << Epetra_Object::GetTracebackMode() << endl ;
251 cerr << " Send this to cerr cerr cerr traceback mode = " << Epetra_Object::GetTracebackMode() << endl ;
252#endif
253
254 numrows = Phase2Matmap.NumGlobalElements() ;
255 assert( numrows == Phase2Mat->NumGlobalRows() ) ;
256 int numcols = Phase2Mat->NumGlobalCols() ;
257 assert( numrows == numcols ) ;
258
259 //
260 // Here I compute what a uniform distribution should look like
261 // so that I can compare what I think MyFirstElement should be
262 // against what it really is.
263 //
264 int m_per_p = numrows / Comm.NumProc() ;
265 int remainder = numrows - ( m_per_p * Comm.NumProc() );
266 int MyFirstElement = iam * m_per_p + EPETRA_MIN( iam, remainder );
267 int MyFirstNonElement = (iam+1) * m_per_p + EPETRA_MIN( iam+1, remainder );
268 int NumExpectedElements = MyFirstNonElement - MyFirstElement ;
269
270
271 int IsLocal = ( Phase2Matmap.NumMyElements() ==
272 Phase2Matmap.NumGlobalElements() )?1:0;
273 Comm.Broadcast( &IsLocal, 1, 0 ) ;
274 //
275 // Step ?) Convert to a distributed matrix, if appropriate
276 //
277 Epetra_CrsMatrix *Phase3Mat = 0 ;
278 Epetra_Map DistMap( Phase2Matmap.NumGlobalElements(), NumExpectedElements, 0, Comm );
279 Epetra_CrsMatrix DistCrsMatrixA(Copy, DistMap, 0);
280
281 bool redistribute = true ;
282 if ( redistribute ) {
283
284 Epetra_Export export_to_dist( Phase2Matmap, DistMap);
285
286 DistCrsMatrixA.Export( *Phase2Mat, export_to_dist, Add );
287
288 DistCrsMatrixA.FillComplete() ;
289 Phase3Mat = &DistCrsMatrixA ;
290
291
292 } else {
293 {
294 EPETRA_CHK_ERR( ! ( Phase2Matmap.LinearMap()) ) ; // Map must be contiguously divided
295 //
296 // This is another way to check that the distribution is as pdgssvx expects it
297 // (i.e. a linear map)
298 //
299 int Phase2NumElements = Phase2Matmap.NumMyElements() ;
300 vector <int> MyRows( Phase2NumElements ) ;
301 Phase2Matmap.MyGlobalElements( &MyRows[0] ) ;
302 for (int row = 0 ; row < Phase2NumElements ; row++ ) {
303 EPETRA_CHK_ERR( MyFirstElement+row != MyRows[row] ) ;
304 }
305 }
306 Phase3Mat = Phase2Mat ;
307 }
308
309#if 0
310 assert( MyFirstElement == Phase3Mat->RowMap().MinMyGID() ) ;
311 assert( NumExpectedElements == Phase3Mat->RowMap().NumMyElements() ) ;
312 assert( MyFirstElement+NumExpectedElements-1 == Phase3Mat->RowMap().MaxMyGID() ) ;
313#endif
314 // Comm.Barrier();
315 int MyActualFirstElement = Phase3Mat->RowMap().MinMyGID() ;
316 int NumMyElements = Phase3Mat->NumMyRows() ;
317 vector <int> MyRowPtr( NumMyElements+1 ) ;
318 //
319 // This is actually redundant with the Ap below
320 //
321 MyRowPtr[0] = 0 ;
322 int CurrentRowPtr = 0 ;
323 for ( int i = 0; i < NumMyElements ; i++ ) {
324 CurrentRowPtr += Phase3Mat->NumMyEntries( i ) ;
325 MyRowPtr[i+1] = CurrentRowPtr ;
326 }
327
328 //
329 // Extract nzval(Aval) and colind(Ai) from the CrsMatrix (Phase3Mat)
330 //
331
332 int nnz_loc = Phase3Mat->NumMyNonzeros() ;
333 if ( factor ) {
334 //
335 // Step 6) Convert the matrix to Ap, Ai, Aval
336 //
337 Ap.resize( NumMyElements+1 );
338 Ai.resize( EPETRA_MAX( NumMyElements, nnz_loc) ) ;
339 Aval.resize( EPETRA_MAX( NumMyElements, nnz_loc) ) ;
340
341 int NzThisRow ;
342 double *RowValues;
343 int *ColIndices;
344 int Ai_index = 0 ;
345 int MyRow;
346 int num_my_cols = Phase3Mat->NumMyCols() ;
347 vector <int>Global_Columns( num_my_cols ) ;
348 for ( int i = 0 ; i < num_my_cols ; i ++ ) {
349 Global_Columns[i] = Phase3Mat->GCID( i ) ;
350 }
351
352 for ( MyRow = 0; MyRow < NumMyElements ; MyRow++ ) {
353 int status = Phase3Mat->ExtractMyRowView( MyRow, NzThisRow, RowValues, ColIndices ) ;
354 assert( status == 0 ) ;
355 Ap[MyRow] = Ai_index ;
356 assert( Ap[MyRow] == MyRowPtr[MyRow] ) ;
357 for ( int j = 0; j < NzThisRow; j++ ) {
358 Ai[Ai_index] = Global_Columns[ColIndices[j]] ;
359 Aval[Ai_index] = RowValues[j] ;
360 Ai_index++;
361 }
362 }
363 assert( NumMyElements == MyRow );
364 Ap[ NumMyElements ] = Ai_index ;
365 }
366
367 //
368 // Pull B out of the Epetra_vector
369 //
370
371 Epetra_MultiVector *vecX = Problem_->GetLHS() ;
372 Epetra_MultiVector *vecB = Problem_->GetRHS() ;
373
374 int nrhs;
375 if ( vecX == 0 ) {
376 nrhs = 0 ;
377 EPETRA_CHK_ERR( vecB != 0 ) ;
378 } else {
379 nrhs = vecX->NumVectors() ;
380 EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ;
381 }
382
383 Epetra_MultiVector vecXdistributed( DistMap, nrhs ) ;
384 Epetra_MultiVector vecBdistributed( DistMap, nrhs ) ;
385
386
387 double *bValues ;
388 double *xValues ;
389 int ldb, ldx ;
390
391 Epetra_MultiVector* vecXptr;
392 Epetra_MultiVector* vecBptr;
393
394 if ( redistribute ) {
395 Epetra_Import ImportToDistributed( DistMap, Phase2Matmap);
396
397 vecXdistributed.Import( *vecX, ImportToDistributed, Insert ) ;
398 vecBdistributed.Import( *vecB, ImportToDistributed, Insert ) ;
399
400 vecXptr = &vecXdistributed ;
401 vecBptr = &vecBdistributed ;
402 } else {
403 vecXptr = vecX ;
404 vecBptr = vecB ;
405 }
406
407
408 EPETRA_CHK_ERR( vecBptr->ExtractView( &bValues, &ldb ) ) ;
409 EPETRA_CHK_ERR( vecXptr->ExtractView( &xValues, &ldx ) ) ;
410 EPETRA_CHK_ERR( ! ( ldx == ldb ) ) ;
411 EPETRA_CHK_ERR( ! ( ldx == NumMyElements ) ) ;
412
413 //
414 // Step 7) Call SuperLUdist
415 //
416
417 //
418 // This really belongs somewhere else - perhaps in the constructor
419 //
420 const Epetra_MpiComm & comm1 = dynamic_cast<const Epetra_MpiComm &> (Comm);
421 MPI_Comm MPIC = comm1.Comm() ;
422
423 if ( factor ) {
424 numprocs = Comm.NumProc() ;
426 npcol = numprocs / nprow ;
427 assert ( nprow * npcol == numprocs ) ;
428 superlu_gridinit( MPIC, nprow, npcol, &grid);
429
430
431 } else {
432 assert( numprocs == Comm.NumProc() ) ;
433 }
434
435 /* Bail out if I do not belong in the grid. */
436 if ( iam < nprow * npcol ) {
437
438 if ( factor ) {
439 set_default_options(&options);
440
441 dCreate_CompRowLoc_Matrix_dist( &A, numrows, numcols,
442 nnz_loc, NumMyElements, MyActualFirstElement,
443 &Aval[0], &Ai[0], &Ap[0],
444 SLU_NR_loc, SLU_D, SLU_GE );
445
446 A_and_LU_built = true;
447
448 /* Initialize ScalePermstruct and LUstruct. */
449 ScalePermstructInit(numrows, numcols, &ScalePermstruct);
450 LUstructInit(numrows, numcols, &LUstruct);
451
452 assert( options.Fact == DOFACT );
453 options.Fact = DOFACT ;
454
455 Factored_ = true;
456 } else {
457 assert( Factored_ == true ) ;
458 EPETRA_CHK_ERR( Factored_ == false ) ;
459 options.Fact = FACTORED ;
460 }
461 //
462 // pdgssvx returns x in b, so we copy b into x.
463 //
464 for ( int j = 0 ; j < nrhs; j++ )
465 for ( int i = 0 ; i < NumMyElements; i++ ) xValues[i+j*ldx] = bValues[i+j*ldb];
466
467 PStatInit(&stat); /* Initialize the statistics variables. */
468
469 int info ;
470 vector<double>berr(nrhs);
471 pdgssvx(&options, &A, &ScalePermstruct, &xValues[0], ldx, nrhs, &grid,
472 &LUstruct, &SOLVEstruct, &berr[0], &stat, &info);
473 EPETRA_CHK_ERR( info ) ;
474
475 PStatFree(&stat);
476
477 }
478
479 if ( redistribute ) {
480 Epetra_Import ImportBackToOriginal( Phase2Matmap,DistMap);
481
482 vecX->Import( *vecXptr, ImportBackToOriginal, Insert ) ;
483 }
484
485
486 return(0) ;
487}
int SLU_NumProcRows(int NumProcs)
#define EPETRA_CHK_ERR(xxx)
vector< int > Ai
SuperLUStat_t stat
vector< double > Aval
const Epetra_LinearProblem * Problem_
bool GetTrans() const
Return the transpose flag.
Superludist2_OO(const Epetra_LinearProblem &LinearProblem)
Superludist2_OO Constructor.
int Solve(bool Factor)
All computation is performed during the call to Solve()
SOLVEstruct_t SOLVEstruct
vector< int > Ap
virtual ~Superludist2_OO(void)
Superludist2_OO Destructor.
superlu_options_t options
ScalePermstruct_t ScalePermstruct