Actual source code: ex192.c
1: static char help[] = "Tests MatSolve() and MatMatSolve() with MUMPS or MKL_PARDISO sequential solvers in Schur complement mode.\n\
2: Example: mpiexec -n 1 ./ex192 -f <matrix binary file> -nrhs 4 -symmetric_solve -hermitian_solve -schur_ratio 0.3\n\n";
4: #include <petscmat.h>
6: int main(int argc,char **args)
7: {
8: Mat A,RHS,C,F,X,S;
9: Vec u,x,b;
10: Vec xschur,bschur,uschur;
11: IS is_schur;
13: PetscMPIInt size;
14: PetscInt isolver=0,size_schur,m,n,nfact,nsolve,nrhs;
15: PetscReal norm,tol=PETSC_SQRT_MACHINE_EPSILON;
16: PetscRandom rand;
17: PetscBool data_provided,herm,symm,use_lu,cuda = PETSC_FALSE;
18: PetscReal sratio = 5.1/12.;
19: PetscViewer fd; /* viewer */
20: char solver[256];
21: char file[PETSC_MAX_PATH_LEN]; /* input file name */
23: PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
24: MPI_Comm_size(PETSC_COMM_WORLD, &size);
25: if (size > 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This is a uniprocessor test");
26: /* Determine which type of solver we want to test for */
27: herm = PETSC_FALSE;
28: symm = PETSC_FALSE;
29: PetscOptionsGetBool(NULL,NULL,"-symmetric_solve",&symm,NULL);
30: PetscOptionsGetBool(NULL,NULL,"-hermitian_solve",&herm,NULL);
31: if (herm) symm = PETSC_TRUE;
32: PetscOptionsGetBool(NULL,NULL,"-cuda_solve",&cuda,NULL);
33: PetscOptionsGetReal(NULL,NULL,"-tol",&tol,NULL);
35: /* Determine file from which we read the matrix A */
36: PetscOptionsGetString(NULL,NULL,"-f",file,sizeof(file),&data_provided);
37: if (!data_provided) { /* get matrices from PETSc distribution */
38: PetscStrncpy(file,"${PETSC_DIR}/share/petsc/datafiles/matrices/",sizeof(file));
39: if (symm) {
40: #if defined (PETSC_USE_COMPLEX)
41: PetscStrlcat(file,"hpd-complex-",sizeof(file));
42: #else
43: PetscStrlcat(file,"spd-real-",sizeof(file));
44: #endif
45: } else {
46: #if defined (PETSC_USE_COMPLEX)
47: PetscStrlcat(file,"nh-complex-",sizeof(file));
48: #else
49: PetscStrlcat(file,"ns-real-",sizeof(file));
50: #endif
51: }
52: #if defined(PETSC_USE_64BIT_INDICES)
53: PetscStrlcat(file,"int64-",sizeof(file));
54: #else
55: PetscStrlcat(file,"int32-",sizeof(file));
56: #endif
57: #if defined (PETSC_USE_REAL_SINGLE)
58: PetscStrlcat(file,"float32",sizeof(file));
59: #else
60: PetscStrlcat(file,"float64",sizeof(file));
61: #endif
62: }
63: /* Load matrix A */
64: PetscViewerBinaryOpen(PETSC_COMM_WORLD,file,FILE_MODE_READ,&fd);
65: MatCreate(PETSC_COMM_WORLD,&A);
66: MatLoad(A,fd);
67: PetscViewerDestroy(&fd);
68: MatGetSize(A,&m,&n);
69: if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%d, %d)", m, n);
71: /* Create dense matrix C and X; C holds true solution with identical columns */
72: nrhs = 2;
73: PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);
74: MatCreate(PETSC_COMM_WORLD,&C);
75: MatSetSizes(C,m,PETSC_DECIDE,PETSC_DECIDE,nrhs);
76: MatSetType(C,MATDENSE);
77: MatSetFromOptions(C);
78: MatSetUp(C);
80: PetscRandomCreate(PETSC_COMM_WORLD,&rand);
81: PetscRandomSetFromOptions(rand);
82: MatSetRandom(C,rand);
83: MatDuplicate(C,MAT_DO_NOT_COPY_VALUES,&X);
85: /* Create vectors */
86: VecCreate(PETSC_COMM_WORLD,&x);
87: VecSetSizes(x,n,PETSC_DECIDE);
88: VecSetFromOptions(x);
89: VecDuplicate(x,&b);
90: VecDuplicate(x,&u); /* save the true solution */
92: PetscOptionsGetInt(NULL,NULL,"-solver",&isolver,NULL);
93: switch (isolver) {
94: #if defined(PETSC_HAVE_MUMPS)
95: case 0:
96: PetscStrcpy(solver,MATSOLVERMUMPS);
97: break;
98: #endif
99: #if defined(PETSC_HAVE_MKL_PARDISO)
100: case 1:
101: PetscStrcpy(solver,MATSOLVERMKL_PARDISO);
102: break;
103: #endif
104: default:
105: PetscStrcpy(solver,MATSOLVERPETSC);
106: break;
107: }
109: #if defined (PETSC_USE_COMPLEX)
110: if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for hermitian matrices, so make them symmetric */
111: PetscScalar im = PetscSqrtScalar((PetscScalar)-1.);
112: PetscScalar val = -1.0;
113: val = val + im;
114: MatSetValue(A,1,0,val,INSERT_VALUES);
115: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
116: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
117: }
118: #endif
120: PetscOptionsGetReal(NULL,NULL,"-schur_ratio",&sratio,NULL);
121: if (sratio < 0. || sratio > 1.) {
122: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "Invalid ratio for schur degrees of freedom %f", sratio);
123: }
124: size_schur = (PetscInt)(sratio*m);
126: PetscPrintf(PETSC_COMM_SELF,"Solving with %s: nrhs %D, sym %d, herm %d, size schur %D, size mat %D\n",solver,nrhs,symm,herm,size_schur,m);
128: /* Test LU/Cholesky Factorization */
129: use_lu = PETSC_FALSE;
130: if (!symm) use_lu = PETSC_TRUE;
131: #if defined (PETSC_USE_COMPLEX)
132: if (isolver == 1) use_lu = PETSC_TRUE;
133: #endif
134: if (cuda && symm && !herm) use_lu = PETSC_TRUE;
136: if (herm && !use_lu) { /* test also conversion routines inside the solver packages */
137: MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
138: MatConvert(A,MATSEQSBAIJ,MAT_INPLACE_MATRIX,&A);
139: }
141: if (use_lu) {
142: MatGetFactor(A,solver,MAT_FACTOR_LU,&F);
143: } else {
144: if (herm) {
145: MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
146: MatSetOption(A,MAT_SPD,PETSC_TRUE);
147: } else {
148: MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
149: MatSetOption(A,MAT_SPD,PETSC_FALSE);
150: }
151: MatGetFactor(A,solver,MAT_FACTOR_CHOLESKY,&F);
152: }
153: ISCreateStride(PETSC_COMM_SELF,size_schur,m-size_schur,1,&is_schur);
154: MatFactorSetSchurIS(F,is_schur);
156: ISDestroy(&is_schur);
157: if (use_lu) {
158: MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
159: } else {
160: MatCholeskyFactorSymbolic(F,A,NULL,NULL);
161: }
163: for (nfact = 0; nfact < 3; nfact++) {
164: Mat AD;
166: if (!nfact) {
167: VecSetRandom(x,rand);
168: if (symm && herm) {
169: VecAbs(x);
170: }
171: MatDiagonalSet(A,x,ADD_VALUES);
172: }
173: if (use_lu) {
174: MatLUFactorNumeric(F,A,NULL);
175: } else {
176: MatCholeskyFactorNumeric(F,A,NULL);
177: }
178: if (cuda) {
179: MatFactorGetSchurComplement(F,&S,NULL);
180: MatSetType(S,MATSEQDENSECUDA);
181: MatCreateVecs(S,&xschur,&bschur);
182: MatFactorRestoreSchurComplement(F,&S,MAT_FACTOR_SCHUR_UNFACTORED);
183: }
184: MatFactorCreateSchurComplement(F,&S,NULL);
185: if (!cuda) {
186: MatCreateVecs(S,&xschur,&bschur);
187: }
188: VecDuplicate(xschur,&uschur);
189: if (nfact == 1 && (!cuda || (herm && symm))) {
190: MatFactorInvertSchurComplement(F);
191: }
192: for (nsolve = 0; nsolve < 2; nsolve++) {
193: VecSetRandom(x,rand);
194: VecCopy(x,u);
196: if (nsolve) {
197: MatMult(A,x,b);
198: MatSolve(F,b,x);
199: } else {
200: MatMultTranspose(A,x,b);
201: MatSolveTranspose(F,b,x);
202: }
203: /* Check the error */
204: VecAXPY(u,-1.0,x); /* u <- (-1.0)x + u */
205: VecNorm(u,NORM_2,&norm);
206: if (norm > tol) {
207: PetscReal resi;
208: if (nsolve) {
209: MatMult(A,x,u); /* u = A*x */
210: } else {
211: MatMultTranspose(A,x,u); /* u = A*x */
212: }
213: VecAXPY(u,-1.0,b); /* u <- (-1.0)b + u */
214: VecNorm(u,NORM_2,&resi);
215: if (nsolve) {
216: PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatSolve error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);
217: } else {
218: PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatSolveTranspose error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);
219: }
220: }
221: VecSetRandom(xschur,rand);
222: VecCopy(xschur,uschur);
223: if (nsolve) {
224: MatMult(S,xschur,bschur);
225: MatFactorSolveSchurComplement(F,bschur,xschur);
226: } else {
227: MatMultTranspose(S,xschur,bschur);
228: MatFactorSolveSchurComplementTranspose(F,bschur,xschur);
229: }
230: /* Check the error */
231: VecAXPY(uschur,-1.0,xschur); /* u <- (-1.0)x + u */
232: VecNorm(uschur,NORM_2,&norm);
233: if (norm > tol) {
234: PetscReal resi;
235: if (nsolve) {
236: MatMult(S,xschur,uschur); /* u = A*x */
237: } else {
238: MatMultTranspose(S,xschur,uschur); /* u = A*x */
239: }
240: VecAXPY(uschur,-1.0,bschur); /* u <- (-1.0)b + u */
241: VecNorm(uschur,NORM_2,&resi);
242: if (nsolve) {
243: PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatFactorSolveSchurComplement error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);
244: } else {
245: PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n",nfact,nsolve,norm,resi);
246: }
247: }
248: }
249: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&AD);
250: if (!nfact) {
251: MatMatMult(AD,C,MAT_INITIAL_MATRIX,2.0,&RHS);
252: } else {
253: MatMatMult(AD,C,MAT_REUSE_MATRIX,2.0,&RHS);
254: }
255: MatDestroy(&AD);
256: for (nsolve = 0; nsolve < 2; nsolve++) {
257: MatMatSolve(F,RHS,X);
259: /* Check the error */
260: MatAXPY(X,-1.0,C,SAME_NONZERO_PATTERN);
261: MatNorm(X,NORM_FROBENIUS,&norm);
262: if (norm > tol) {
263: PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) MatMatSolve: Norm of error %g\n",nfact,nsolve,norm);
264: }
265: }
266: if (isolver == 0) {
267: Mat spRHS,spRHST,RHST;
269: MatTranspose(RHS,MAT_INITIAL_MATRIX,&RHST);
270: MatConvert(RHST,MATSEQAIJ,MAT_INITIAL_MATRIX,&spRHST);
271: MatCreateTranspose(spRHST,&spRHS);
272: for (nsolve = 0; nsolve < 2; nsolve++) {
273: MatMatSolve(F,spRHS,X);
275: /* Check the error */
276: MatAXPY(X,-1.0,C,SAME_NONZERO_PATTERN);
277: MatNorm(X,NORM_FROBENIUS,&norm);
278: if (norm > tol) {
279: PetscPrintf(PETSC_COMM_SELF,"(f %D, s %D) sparse MatMatSolve: Norm of error %g\n",nfact,nsolve,norm);
280: }
281: }
282: MatDestroy(&spRHST);
283: MatDestroy(&spRHS);
284: MatDestroy(&RHST);
285: }
286: MatDestroy(&S);
287: VecDestroy(&xschur);
288: VecDestroy(&bschur);
289: VecDestroy(&uschur);
290: }
291: /* Free data structures */
292: MatDestroy(&A);
293: MatDestroy(&C);
294: MatDestroy(&F);
295: MatDestroy(&X);
296: MatDestroy(&RHS);
297: PetscRandomDestroy(&rand);
298: VecDestroy(&x);
299: VecDestroy(&b);
300: VecDestroy(&u);
301: PetscFinalize();
302: return ierr;
303: }
305: /*TEST
307: testset:
308: requires: mkl_pardiso double !complex
309: args: -solver 1
311: test:
312: suffix: mkl_pardiso
313: test:
314: requires: cuda
315: suffix: mkl_pardiso_cuda
316: args: -cuda_solve
317: output_file: output/ex192_mkl_pardiso.out
318: test:
319: suffix: mkl_pardiso_1
320: args: -symmetric_solve
321: output_file: output/ex192_mkl_pardiso_1.out
322: test:
323: requires: cuda
324: suffix: mkl_pardiso_cuda_1
325: args: -symmetric_solve -cuda_solve
326: output_file: output/ex192_mkl_pardiso_1.out
327: test:
328: suffix: mkl_pardiso_3
329: args: -symmetric_solve -hermitian_solve
330: output_file: output/ex192_mkl_pardiso_3.out
331: test:
332: requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
333: suffix: mkl_pardiso_cuda_3
334: args: -symmetric_solve -hermitian_solve -cuda_solve
335: output_file: output/ex192_mkl_pardiso_3.out
337: testset:
338: requires: mumps double !complex
339: args: -solver 0
341: test:
342: suffix: mumps
343: test:
344: requires: cuda
345: suffix: mumps_cuda
346: args: -cuda_solve
347: output_file: output/ex192_mumps.out
348: test:
349: suffix: mumps_2
350: args: -symmetric_solve
351: output_file: output/ex192_mumps_2.out
352: test:
353: requires: cuda
354: suffix: mumps_cuda_2
355: args: -symmetric_solve -cuda_solve
356: output_file: output/ex192_mumps_2.out
357: test:
358: suffix: mumps_3
359: args: -symmetric_solve -hermitian_solve
360: output_file: output/ex192_mumps_3.out
361: test:
362: requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
363: suffix: mumps_cuda_3
364: args: -symmetric_solve -hermitian_solve -cuda_solve
365: output_file: output/ex192_mumps_3.out
367: TEST*/