Actual source code: comb.c


  2: /*
  3:       Split phase global vector reductions with support for combining the
  4:    communication portion of several operations. Using MPI-1.1 support only

  6:       The idea for this and much of the initial code is contributed by
  7:    Victor Eijkhout.

  9:        Usage:
 10:              VecDotBegin(Vec,Vec,PetscScalar *);
 11:              VecNormBegin(Vec,NormType,PetscReal *);
 12:              ....
 13:              VecDotEnd(Vec,Vec,PetscScalar *);
 14:              VecNormEnd(Vec,NormType,PetscReal *);

 16:        Limitations:
 17:          - The order of the xxxEnd() functions MUST be in the same order
 18:            as the xxxBegin(). There is extensive error checking to try to
 19:            insure that the user calls the routines in the correct order
 20: */

 22: #include <petsc/private/vecimpl.h>

 24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
 25: {

 29: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
 30:   MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 31: #else
 32:   MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
 33:   *request = MPI_REQUEST_NULL;
 34: #endif
 35:   return(0);
 36: }

 38: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);

 40: /*
 41:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 42: */
 43: static PetscErrorCode  PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
 44: {

 48:   PetscNew(sr);
 49:   (*sr)->numopsbegin = 0;
 50:   (*sr)->numopsend   = 0;
 51:   (*sr)->state       = STATE_BEGIN;
 52: #define MAXOPS 32
 53:   (*sr)->maxops      = MAXOPS;
 54:   PetscMalloc6(MAXOPS,&(*sr)->lvalues,MAXOPS,&(*sr)->gvalues,MAXOPS,&(*sr)->invecs,MAXOPS,&(*sr)->reducetype,MAXOPS,&(*sr)->lvalues_mix,MAXOPS,&(*sr)->gvalues_mix);
 55: #undef MAXOPS
 56:   (*sr)->comm        = comm;
 57:   (*sr)->request     = MPI_REQUEST_NULL;
 58:   (*sr)->mix         = PETSC_FALSE;
 59:   (*sr)->async       = PETSC_FALSE;
 60: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
 61:   (*sr)->async = PETSC_TRUE;    /* Enable by default */
 62: #endif
 63:   /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
 64:   PetscOptionsGetBool(NULL,NULL,"-splitreduction_async",&(*sr)->async,NULL);
 65:   return(0);
 66: }

 68: /*
 69:        This function is the MPI reduction operation used when there is
 70:    a combination of sums and max in the reduction. The call below to
 71:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the
 72:    MPI operator PetscSplitReduction_Op.
 73: */
 74: MPI_Op PetscSplitReduction_Op = 0;

 76: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
 77: {
 78:   struct PetscScalarInt { PetscScalar v; PetscInt i; };
 79:   struct PetscScalarInt *xin = (struct PetscScalarInt*)in;
 80:   struct PetscScalarInt *xout = (struct PetscScalarInt*)out;
 81:   PetscInt              i,count = (PetscInt)*cnt;

 84:   if (*datatype != MPIU_SCALAR_INT) {
 85:     (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types");
 86:     PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
 87:   }
 88:   for (i=0; i<count; i++) {
 89:     if      (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
 90:     else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v),PetscRealPart(xin[i].v));
 91:     else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v),PetscRealPart(xin[i].v));
 92:     else {
 93:       (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN");
 94:       PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
 95:     }
 96:   }
 97:   PetscFunctionReturnVoid();
 98: }

100: /*@
101:    PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

103:    Collective but not synchronizing

105:    Input Parameter:
106:    comm - communicator on which split reduction has been queued

108:    Level: advanced

110:    Note:
111:    Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
112:    VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).

114: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
115: @*/
116: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
117: {
118:   PetscErrorCode      ierr;
119:   PetscSplitReduction *sr;

122:   PetscSplitReductionGet(comm,&sr);
123:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
124:   if (sr->async) {              /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
125:     PetscInt    i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
126:     PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
127:     PetscInt    sum_flg = 0,max_flg = 0, min_flg = 0;
128:     MPI_Comm    comm = sr->comm;
129:     PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);

131:     PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
132:     MPI_Comm_size(sr->comm,&size);
133:     if (size == 1) {
134:       PetscArraycpy(gvalues,lvalues,numops);
135:     } else {
136:       /* determine if all reductions are sum, max, or min */
137:       for (i=0; i<numops; i++) {
138:         if      (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
139:         else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
140:         else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
141:         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
142:       }
143:       if (sum_flg + max_flg + min_flg > 1 && sr->mix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
144:       if (sum_flg + max_flg + min_flg > 1) {
145:         sr->mix = PETSC_TRUE;
146:         for (i=0; i<numops; i++) { sr->lvalues_mix[i].v = lvalues[i]; sr->lvalues_mix[i].i = reducetype[i]; }
147:         MPIPetsc_Iallreduce(sr->lvalues_mix,sr->gvalues_mix,numops,MPIU_SCALAR_INT,PetscSplitReduction_Op,comm,&sr->request);
148:       } else if (max_flg) {   /* Compute max of real and imag parts separately, presumably only the real part is used */
149:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
150:       } else if (min_flg) {
151:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
152:       } else {
153:         MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
154:       }
155:     }
156:     sr->state     = STATE_PENDING;
157:     sr->numopsend = 0;
158:     PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
159:   } else {
160:     PetscSplitReductionApply(sr);
161:   }
162:   return(0);
163: }

165: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
166: {

170:   switch (sr->state) {
171:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
172:     PetscSplitReductionApply(sr);
173:     break;
174:   case STATE_PENDING:
175:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
176:     PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
177:     if (sr->request != MPI_REQUEST_NULL) {
178:       MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
179:     }
180:     sr->state = STATE_END;
181:     if (sr->mix) {
182:       PetscInt i;
183:       for (i=0; i<sr->numopsbegin; i++) { sr->gvalues[i] = sr->gvalues_mix[i].v; }
184:       sr->mix = PETSC_FALSE;
185:     }
186:     PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
187:     break;
188:   default: break;            /* everything is already done */
189:   }
190:   return(0);
191: }

193: /*
194:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
195: */
196: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
197: {
199:   PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
200:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
201:   PetscInt       sum_flg  = 0,max_flg = 0, min_flg = 0;
202:   MPI_Comm       comm     = sr->comm;
203:   PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);

206:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
207:   PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
208:   MPI_Comm_size(sr->comm,&size);
209:   if (size == 1) {
210:     PetscArraycpy(gvalues,lvalues,numops);
211:   } else {
212:     /* determine if all reductions are sum, max, or min */
213:     for (i=0; i<numops; i++) {
214:       if      (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
215:       else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
216:       else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
217:       else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
218:     }
219:     if (sum_flg + max_flg + min_flg > 1) {
220:       if (sr->mix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
221:       for (i=0; i<numops; i++) { sr->lvalues_mix[i].v = lvalues[i]; sr->lvalues_mix[i].i = reducetype[i]; }
222:       MPIU_Allreduce(sr->lvalues_mix,sr->gvalues_mix,numops,MPIU_SCALAR_INT,PetscSplitReduction_Op,comm);
223:       for (i=0; i<numops; i++) { sr->gvalues[i] = sr->gvalues_mix[i].v; }
224:     } else if (max_flg) {     /* Compute max of real and imag parts separately, presumably only the real part is used */
225:       MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
226:     } else if (min_flg) {
227:       MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
228:     } else {
229:       MPIU_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
230:     }
231:   }
232:   sr->state     = STATE_END;
233:   sr->numopsend = 0;
234:   PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
235:   return(0);
236: }

238: /*
239:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
240: */
241: PetscErrorCode  PetscSplitReductionExtend(PetscSplitReduction *sr)
242: {
243:   struct PetscScalarInt { PetscScalar v; PetscInt i; };
244:   PetscErrorCode        ierr;
245:   PetscInt              maxops   = sr->maxops,*reducetype = sr->reducetype;
246:   PetscScalar           *lvalues = sr->lvalues,*gvalues = sr->gvalues;
247:   struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt*)sr->lvalues_mix;
248:   struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt*)sr->gvalues_mix;
249:   void                  **invecs = sr->invecs;

252:   sr->maxops = 2*maxops;
253:   PetscMalloc6(2*maxops,&sr->lvalues,2*maxops,&sr->gvalues,2*maxops,&sr->reducetype,2*maxops,&sr->invecs,2*maxops,&sr->lvalues_mix,2*maxops,&sr->gvalues_mix);
254:   PetscArraycpy(sr->lvalues,lvalues,maxops);
255:   PetscArraycpy(sr->gvalues,gvalues,maxops);
256:   PetscArraycpy(sr->reducetype,reducetype,maxops);
257:   PetscArraycpy(sr->invecs,invecs,maxops);
258:   PetscArraycpy(sr->lvalues_mix,lvalues_mix,maxops);
259:   PetscArraycpy(sr->gvalues_mix,gvalues_mix,maxops);
260:   PetscFree6(lvalues,gvalues,reducetype,invecs,lvalues_mix,gvalues_mix);
261:   return(0);
262: }

264: PetscErrorCode  PetscSplitReductionDestroy(PetscSplitReduction *sr)
265: {

269:   PetscFree6(sr->lvalues,sr->gvalues,sr->reducetype,sr->invecs,sr->lvalues_mix,sr->gvalues_mix);
270:   PetscFree(sr);
271:   return(0);
272: }

274: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

276: /*
277:    Private routine to delete internal storage when a communicator is freed.
278:   This is called by MPI, not by users.

280:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
281:   it was MPI_Comm *comm.
282: */
283: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
284: {

288:   PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
289:   PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);
290:   return(0);
291: }

293: /*
294:      PetscSplitReductionGet - Gets the split reduction object from a
295:         PETSc vector, creates if it does not exit.

297: */
298: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
299: {
301:   PetscMPIInt    flag;

304:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
305:     /*
306:        The calling sequence of the 2nd argument to this function changed
307:        between MPI Standard 1.0 and the revisions 1.1 Here we match the
308:        new standard, if you are using an MPI implementation that uses
309:        the older version you will get a warning message about the next line;
310:        it is only a warning message and should do no harm.
311:     */
312:     MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,NULL);
313:   }
314:   MPI_Comm_get_attr(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
315:   if (!flag) {  /* doesn't exist yet so create it and put it in */
316:     PetscSplitReductionCreate(comm,sr);
317:     MPI_Comm_set_attr(comm,Petsc_Reduction_keyval,*sr);
318:     PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
319:   }
320:   return(0);
321: }

323: /* ----------------------------------------------------------------------------------------------------*/

325: /*@
326:    VecDotBegin - Starts a split phase dot product computation.

328:    Input Parameters:
329: +   x - the first vector
330: .   y - the second vector
331: -   result - where the result will go (can be NULL)

333:    Level: advanced

335:    Notes:
336:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

338: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
339:          VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
340: @*/
341: PetscErrorCode  VecDotBegin(Vec x,Vec y,PetscScalar *result)
342: {
343:   PetscErrorCode      ierr;
344:   PetscSplitReduction *sr;
345:   MPI_Comm            comm;

350:   PetscObjectGetComm((PetscObject)x,&comm);
351:   PetscSplitReductionGet(comm,&sr);
352:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
353:   if (sr->numopsbegin >= sr->maxops) {
354:     PetscSplitReductionExtend(sr);
355:   }
356:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
357:   sr->invecs[sr->numopsbegin]     = (void*)x;
358:   if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local dots");
359:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
360:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
361:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
362:   return(0);
363: }

365: /*@
366:    VecDotEnd - Ends a split phase dot product computation.

368:    Input Parameters:
369: +  x - the first vector (can be NULL)
370: .  y - the second vector (can be NULL)
371: -  result - where the result will go

373:    Level: advanced

375:    Notes:
376:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

378: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
379:          VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()

381: @*/
382: PetscErrorCode  VecDotEnd(Vec x,Vec y,PetscScalar *result)
383: {
384:   PetscErrorCode      ierr;
385:   PetscSplitReduction *sr;
386:   MPI_Comm            comm;

389:   PetscObjectGetComm((PetscObject)x,&comm);
390:   PetscSplitReductionGet(comm,&sr);
391:   PetscSplitReductionEnd(sr);

393:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
394:   if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
395:   if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
396:   *result = sr->gvalues[sr->numopsend++];

398:   /*
399:      We are finished getting all the results so reset to no outstanding requests
400:   */
401:   if (sr->numopsend == sr->numopsbegin) {
402:     sr->state       = STATE_BEGIN;
403:     sr->numopsend   = 0;
404:     sr->numopsbegin = 0;
405:     sr->mix         = PETSC_FALSE;
406:   }
407:   return(0);
408: }

410: /*@
411:    VecTDotBegin - Starts a split phase transpose dot product computation.

413:    Input Parameters:
414: +  x - the first vector
415: .  y - the second vector
416: -  result - where the result will go (can be NULL)

418:    Level: advanced

420:    Notes:
421:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

423: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
424:          VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

426: @*/
427: PetscErrorCode  VecTDotBegin(Vec x,Vec y,PetscScalar *result)
428: {
429:   PetscErrorCode      ierr;
430:   PetscSplitReduction *sr;
431:   MPI_Comm            comm;

434:   PetscObjectGetComm((PetscObject)x,&comm);
435:   PetscSplitReductionGet(comm,&sr);
436:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
437:   if (sr->numopsbegin >= sr->maxops) {
438:     PetscSplitReductionExtend(sr);
439:   }
440:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
441:   sr->invecs[sr->numopsbegin]     = (void*)x;
442:   if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local dots");
443:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
444:   (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
445:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
446:   return(0);
447: }

449: /*@
450:    VecTDotEnd - Ends a split phase transpose dot product computation.

452:    Input Parameters:
453: +  x - the first vector (can be NULL)
454: .  y - the second vector (can be NULL)
455: -  result - where the result will go

457:    Level: advanced

459:    Notes:
460:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

462: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
463:          VecDotBegin(), VecDotEnd()
464: @*/
465: PetscErrorCode  VecTDotEnd(Vec x,Vec y,PetscScalar *result)
466: {

470:   /*
471:       TDotEnd() is the same as DotEnd() so reuse the code
472:   */
473:   VecDotEnd(x,y,result);
474:   return(0);
475: }

477: /* -------------------------------------------------------------------------*/

479: /*@
480:    VecNormBegin - Starts a split phase norm computation.

482:    Input Parameters:
483: +  x - the first vector
484: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
485: -  result - where the result will go (can be NULL)

487:    Level: advanced

489:    Notes:
490:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

492: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

494: @*/
495: PetscErrorCode  VecNormBegin(Vec x,NormType ntype,PetscReal *result)
496: {
497:   PetscErrorCode      ierr;
498:   PetscSplitReduction *sr;
499:   PetscReal           lresult[2];
500:   MPI_Comm            comm;

504:   PetscObjectGetComm((PetscObject)x,&comm);
505:   PetscSplitReductionGet(comm,&sr);
506:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
507:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
508:     PetscSplitReductionExtend(sr);
509:   }

511:   sr->invecs[sr->numopsbegin] = (void*)x;
512:   if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
513:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
514:   (*x->ops->norm_local)(x,ntype,lresult);
515:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
516:   if (ntype == NORM_2)         lresult[0]                = lresult[0]*lresult[0];
517:   if (ntype == NORM_1_AND_2)   lresult[1]                = lresult[1]*lresult[1];
518:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
519:   else                   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
520:   sr->lvalues[sr->numopsbegin++] = lresult[0];
521:   if (ntype == NORM_1_AND_2) {
522:     sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
523:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
524:   }
525:   return(0);
526: }

528: /*@
529:    VecNormEnd - Ends a split phase norm computation.

531:    Input Parameters:
532: +  x - the first vector
533: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
534: -  result - where the result will go

536:    Level: advanced

538:    Notes:
539:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

541:    The x vector is not allowed to be NULL, otherwise the vector would not have its correctly cached norm value

543: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

545: @*/
546: PetscErrorCode  VecNormEnd(Vec x,NormType ntype,PetscReal *result)
547: {
548:   PetscErrorCode      ierr;
549:   PetscSplitReduction *sr;
550:   MPI_Comm            comm;

554:   PetscObjectGetComm((PetscObject)x,&comm);
555:   PetscSplitReductionGet(comm,&sr);
556:   PetscSplitReductionEnd(sr);

558:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
559:   if ((void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
560:   if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
561:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

563:   if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
564:   else if (ntype == NORM_1_AND_2) {
565:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
566:     result[1] = PetscSqrtReal(result[1]);
567:   }
568:   if (ntype!=NORM_1_AND_2) {
569:     PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
570:   }

572:   if (sr->numopsend == sr->numopsbegin) {
573:     sr->state       = STATE_BEGIN;
574:     sr->numopsend   = 0;
575:     sr->numopsbegin = 0;
576:   }
577:   return(0);
578: }

580: /*
581:    Possibly add

583:      PetscReductionSumBegin/End()
584:      PetscReductionMaxBegin/End()
585:      PetscReductionMinBegin/End()
586:    or have more like MPI with a single function with flag for Op? Like first better
587: */

589: /*@
590:    VecMDotBegin - Starts a split phase multiple dot product computation.

592:    Input Parameters:
593: +   x - the first vector
594: .   nv - number of vectors
595: .   y - array of vectors
596: -   result - where the result will go (can be NULL)

598:    Level: advanced

600:    Notes:
601:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

603: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
604:          VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
605: @*/
606: PetscErrorCode  VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
607: {
608:   PetscErrorCode      ierr;
609:   PetscSplitReduction *sr;
610:   MPI_Comm            comm;
611:   int                 i;

614:   PetscObjectGetComm((PetscObject)x,&comm);
615:   PetscSplitReductionGet(comm,&sr);
616:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
617:   for (i=0; i<nv; i++) {
618:     if (sr->numopsbegin+i >= sr->maxops) {
619:       PetscSplitReductionExtend(sr);
620:     }
621:     sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
622:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
623:   }
624:   if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local mdots");
625:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
626:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
627:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
628:   sr->numopsbegin += nv;
629:   return(0);
630: }

632: /*@
633:    VecMDotEnd - Ends a split phase multiple dot product computation.

635:    Input Parameters:
636: +   x - the first vector (can be NULL)
637: .   nv - number of vectors
638: -   y - array of vectors (can be NULL)

640:    Output Parameters:
641: .   result - where the result will go

643:    Level: advanced

645:    Notes:
646:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

648: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
649:          VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()

651: @*/
652: PetscErrorCode  VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
653: {
654:   PetscErrorCode      ierr;
655:   PetscSplitReduction *sr;
656:   MPI_Comm            comm;
657:   int                 i;

660:   PetscObjectGetComm((PetscObject)x,&comm);
661:   PetscSplitReductionGet(comm,&sr);
662:   PetscSplitReductionEnd(sr);

664:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
665:   if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
666:   if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
667:   for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];

669:   /*
670:      We are finished getting all the results so reset to no outstanding requests
671:   */
672:   if (sr->numopsend == sr->numopsbegin) {
673:     sr->state       = STATE_BEGIN;
674:     sr->numopsend   = 0;
675:     sr->numopsbegin = 0;
676:   }
677:   return(0);
678: }

680: /*@
681:    VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

683:    Input Parameters:
684: +  x - the first vector
685: .  nv - number of vectors
686: .  y - array of  vectors
687: -  result - where the result will go (can be NULL)

689:    Level: advanced

691:    Notes:
692:    Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().

694: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
695:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()

697: @*/
698: PetscErrorCode  VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
699: {
700:   PetscErrorCode      ierr;
701:   PetscSplitReduction *sr;
702:   MPI_Comm            comm;
703:   int                 i;

706:   PetscObjectGetComm((PetscObject)x,&comm);
707:   PetscSplitReductionGet(comm,&sr);
708:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
709:   for (i=0; i<nv; i++) {
710:     if (sr->numopsbegin+i >= sr->maxops) {
711:       PetscSplitReductionExtend(sr);
712:     }
713:     sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
714:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
715:   }
716:   if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local mdots");
717:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
718:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
719:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
720:   sr->numopsbegin += nv;
721:   return(0);
722: }

724: /*@
725:    VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

727:    Input Parameters:
728: +  x - the first vector (can be NULL)
729: .  nv - number of vectors
730: -  y - array of  vectors (can be NULL)

732:    Output Parameters:
733: .  result - where the result will go

735:    Level: advanced

737:    Notes:
738:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

740: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
741:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
742: @*/
743: PetscErrorCode  VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
744: {

748:   /*
749:       MTDotEnd() is the same as MDotEnd() so reuse the code
750:   */
751:   VecMDotEnd(x,nv,y,result);
752:   return(0);
753: }