Actual source code: vecstash.c
2: #include <petsc/private/vecimpl.h>
4: #define DEFAULT_STASH_SIZE 100
6: /*
7: VecStashCreate_Private - Creates a stash,currently used for all the parallel
8: matrix implementations. The stash is where elements of a matrix destined
9: to be stored on other processors are kept until matrix assembly is done.
11: This is a simple minded stash. Simply adds entries to end of stash.
13: Input Parameters:
14: comm - communicator, required for scatters.
15: bs - stash block size. used when stashing blocks of values
17: Output Parameters:
18: stash - the newly created stash
19: */
20: PetscErrorCode VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash *stash)
21: {
23: PetscInt max,*opt,nopt;
24: PetscBool flg;
27: /* Require 2 tags, get the second using PetscCommGetNewTag() */
28: stash->comm = comm;
29: PetscCommGetNewTag(stash->comm,&stash->tag1);
30: PetscCommGetNewTag(stash->comm,&stash->tag2);
31: MPI_Comm_size(stash->comm,&stash->size);
32: MPI_Comm_rank(stash->comm,&stash->rank);
34: nopt = stash->size;
35: PetscMalloc1(nopt,&opt);
36: PetscOptionsGetIntArray(NULL,NULL,"-vecstash_initial_size",opt,&nopt,&flg);
37: if (flg) {
38: if (nopt == 1) max = opt[0];
39: else if (nopt == stash->size) max = opt[stash->rank];
40: else if (stash->rank < nopt) max = opt[stash->rank];
41: else max = 0; /* use default */
42: stash->umax = max;
43: } else {
44: stash->umax = 0;
45: }
46: PetscFree(opt);
48: if (bs <= 0) bs = 1;
50: stash->bs = bs;
51: stash->nmax = 0;
52: stash->oldnmax = 0;
53: stash->n = 0;
54: stash->reallocs = -1;
55: stash->idx = NULL;
56: stash->array = NULL;
58: stash->send_waits = NULL;
59: stash->recv_waits = NULL;
60: stash->send_status = NULL;
61: stash->nsends = 0;
62: stash->nrecvs = 0;
63: stash->svalues = NULL;
64: stash->rvalues = NULL;
65: stash->rmax = 0;
66: stash->nprocs = NULL;
67: stash->nprocessed = 0;
68: stash->donotstash = PETSC_FALSE;
69: stash->ignorenegidx = PETSC_FALSE;
70: return(0);
71: }
73: /*
74: VecStashDestroy_Private - Destroy the stash
75: */
76: PetscErrorCode VecStashDestroy_Private(VecStash *stash)
77: {
81: PetscFree2(stash->array,stash->idx);
82: PetscFree(stash->bowners);
83: return(0);
84: }
86: /*
87: VecStashScatterEnd_Private - This is called as the fial stage of
88: scatter. The final stages of message passing is done here, and
89: all the memory used for message passing is cleanedu up. This
90: routine also resets the stash, and deallocates the memory used
91: for the stash. It also keeps track of the current memory usage
92: so that the same value can be used the next time through.
93: */
94: PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
95: {
97: PetscInt nsends=stash->nsends,oldnmax;
98: MPI_Status *send_status;
101: /* wait on sends */
102: if (nsends) {
103: PetscMalloc1(2*nsends,&send_status);
104: MPI_Waitall(2*nsends,stash->send_waits,send_status);
105: PetscFree(send_status);
106: }
108: /* Now update nmaxold to be app 10% more than max n, this way the
109: wastage of space is reduced the next time this stash is used.
110: Also update the oldmax, only if it increases */
111: if (stash->n) {
112: oldnmax = ((PetscInt)(stash->n * 1.1) + 5)*stash->bs;
113: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
114: }
116: stash->nmax = 0;
117: stash->n = 0;
118: stash->reallocs = -1;
119: stash->rmax = 0;
120: stash->nprocessed = 0;
122: PetscFree2(stash->array,stash->idx);
123: stash->array = NULL;
124: stash->idx = NULL;
125: PetscFree(stash->send_waits);
126: PetscFree(stash->recv_waits);
127: PetscFree2(stash->svalues,stash->sindices);
128: PetscFree2(stash->rvalues,stash->rindices);
129: PetscFree(stash->nprocs);
130: return(0);
131: }
133: /*
134: VecStashGetInfo_Private - Gets the relavant statistics of the stash
136: Input Parameters:
137: stash - the stash
138: nstash - the size of the stash
139: reallocs - the number of additional mallocs incurred.
141: */
142: PetscErrorCode VecStashGetInfo_Private(VecStash *stash,PetscInt *nstash,PetscInt *reallocs)
143: {
145: if (nstash) *nstash = stash->n*stash->bs;
146: if (reallocs) {
147: if (stash->reallocs < 0) *reallocs = 0;
148: else *reallocs = stash->reallocs;
149: }
150: return(0);
151: }
153: /*
154: VecStashSetInitialSize_Private - Sets the initial size of the stash
156: Input Parameters:
157: stash - the stash
158: max - the value that is used as the max size of the stash.
159: this value is used while allocating memory. It specifies
160: the number of vals stored, even with the block-stash
161: */
162: PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash,PetscInt max)
163: {
165: stash->umax = max;
166: return(0);
167: }
169: /* VecStashExpand_Private - Expand the stash. This function is called
170: when the space in the stash is not sufficient to add the new values
171: being inserted into the stash.
173: Input Parameters:
174: stash - the stash
175: incr - the minimum increase requested
177: Notes:
178: This routine doubles the currently used memory.
179: */
180: PetscErrorCode VecStashExpand_Private(VecStash *stash,PetscInt incr)
181: {
183: PetscInt *n_idx,newnmax,bs=stash->bs;
184: PetscScalar *n_array;
187: /* allocate a larger stash. */
188: if (!stash->oldnmax && !stash->nmax) { /* new stash */
189: if (stash->umax) newnmax = stash->umax/bs;
190: else newnmax = DEFAULT_STASH_SIZE/bs;
191: } else if (!stash->nmax) { /* resuing stash */
192: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs;
193: else newnmax = stash->oldnmax/bs;
194: } else newnmax = stash->nmax*2;
196: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
198: PetscMalloc2(bs*newnmax,&n_array,newnmax,&n_idx);
199: PetscMemcpy(n_array,stash->array,bs*stash->nmax*sizeof(PetscScalar));
200: PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));
201: PetscFree2(stash->array,stash->idx);
203: stash->array = n_array;
204: stash->idx = n_idx;
205: stash->nmax = newnmax;
206: stash->reallocs++;
207: return(0);
208: }
209: /*
210: VecStashScatterBegin_Private - Initiates the transfer of values to the
211: correct owners. This function goes through the stash, and check the
212: owners of each stashed value, and sends the values off to the owner
213: processors.
215: Input Parameters:
216: stash - the stash
217: owners - an array of size 'no-of-procs' which gives the ownership range
218: for each node.
220: Notes:
221: The 'owners' array in the cased of the blocked-stash has the
222: ranges specified blocked global indices, and for the regular stash in
223: the proper global indices.
224: */
225: PetscErrorCode VecStashScatterBegin_Private(VecStash *stash,PetscInt *owners)
226: {
228: PetscMPIInt size = stash->size,tag1=stash->tag1,tag2=stash->tag2;
229: PetscInt *owner,*start,*nprocs,nsends,nreceives;
230: PetscInt nmax,count,*sindices,*rindices,i,j,idx,bs=stash->bs,lastidx;
231: PetscScalar *rvalues,*svalues;
232: MPI_Comm comm = stash->comm;
233: MPI_Request *send_waits,*recv_waits;
236: /* first count number of contributors to each processor */
237: PetscCalloc1(2*size,&nprocs);
238: PetscMalloc1(stash->n,&owner);
240: j = 0;
241: lastidx = -1;
242: for (i=0; i<stash->n; i++) {
243: /* if indices are NOT locally sorted, need to start search at the beginning */
244: if (lastidx > (idx = stash->idx[i])) j = 0;
245: lastidx = idx;
246: for (; j<size; j++) {
247: if (idx >= owners[j] && idx < owners[j+1]) {
248: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
249: }
250: }
251: }
252: nsends = 0;
253: for (i=0; i<size; i++) nsends += nprocs[2*i+1];
255: /* inform other processors of number of messages and max length*/
256: PetscMaxSum(comm,nprocs,&nmax,&nreceives);
258: /* post receives:
259: since we don't know how long each individual message is we
260: allocate the largest needed buffer for each receive. Potentially
261: this is a lot of wasted space.
262: */
263: PetscMalloc2(nreceives*nmax*bs,&rvalues,nreceives*nmax,&rindices);
264: PetscMalloc1(2*nreceives,&recv_waits);
265: for (i=0,count=0; i<nreceives; i++) {
266: MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
267: MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
268: }
270: /* do sends:
271: 1) starts[i] gives the starting index in svalues for stuff going to
272: the ith processor
273: */
274: PetscMalloc2(stash->n*bs,&svalues,stash->n,&sindices);
275: PetscMalloc1(2*nsends,&send_waits);
276: PetscMalloc1(size,&start);
277: /* use 2 sends the first with all_v, the next with all_i */
278: start[0] = 0;
279: for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
281: for (i=0; i<stash->n; i++) {
282: j = owner[i];
283: if (bs == 1) svalues[start[j]] = stash->array[i];
284: else {
285: PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(PetscScalar));
286: }
287: sindices[start[j]] = stash->idx[i];
288: start[j]++;
289: }
290: start[0] = 0;
291: for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
293: for (i=0,count=0; i<size; i++) {
294: if (nprocs[2*i+1]) {
295: MPI_Isend(svalues+bs*start[i],bs*nprocs[2*i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);
296: MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);
297: }
298: }
299: PetscFree(owner);
300: PetscFree(start);
301: /* This memory is reused in scatter end for a different purpose*/
302: for (i=0; i<2*size; i++) nprocs[i] = -1;
304: stash->nprocs = nprocs;
305: stash->svalues = svalues;
306: stash->sindices = sindices;
307: stash->rvalues = rvalues;
308: stash->rindices = rindices;
309: stash->nsends = nsends;
310: stash->nrecvs = nreceives;
311: stash->send_waits = send_waits;
312: stash->recv_waits = recv_waits;
313: stash->rmax = nmax;
314: return(0);
315: }
317: /*
318: VecStashScatterGetMesg_Private - This function waits on the receives posted
319: in the function VecStashScatterBegin_Private() and returns one message at
320: a time to the calling function. If no messages are left, it indicates this
321: by setting flg = 0, else it sets flg = 1.
323: Input Parameters:
324: stash - the stash
326: Output Parameters:
327: nvals - the number of entries in the current message.
328: rows - an array of row indices (or blocked indices) corresponding to the values
329: cols - an array of columnindices (or blocked indices) corresponding to the values
330: vals - the values
331: flg - 0 indicates no more message left, and the current call has no values associated.
332: 1 indicates that the current call successfully received a message, and the
333: other output parameters nvals,rows,cols,vals are set appropriately.
334: */
335: PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscScalar **vals,PetscInt *flg)
336: {
338: PetscMPIInt i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
339: PetscInt *flg_v;
340: PetscInt i1,i2,bs=stash->bs;
341: MPI_Status recv_status;
342: PetscBool match_found = PETSC_FALSE;
345: *flg = 0; /* When a message is discovered this is reset to 1 */
346: /* Return if no more messages to process */
347: if (stash->nprocessed == stash->nrecvs) return(0);
349: flg_v = stash->nprocs;
350: /* If a matching pair of receives are found, process them, and return the data to
351: the calling function. Until then keep receiving messages */
352: while (!match_found) {
353: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
354: /* Now pack the received message into a structure which is useable by others */
355: if (i % 2) {
356: MPI_Get_count(&recv_status,MPIU_INT,nvals);
357: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
358: } else {
359: MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
360: flg_v[2*recv_status.MPI_SOURCE] = i/2;
361: *nvals = *nvals/bs;
362: }
364: /* Check if we have both the messages from this proc */
365: i1 = flg_v[2*recv_status.MPI_SOURCE];
366: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
367: if (i1 != -1 && i2 != -1) {
368: *rows = stash->rindices + i2*stash->rmax;
369: *vals = stash->rvalues + i1*bs*stash->rmax;
370: *flg = 1;
371: stash->nprocessed++;
372: match_found = PETSC_TRUE;
373: }
374: }
375: return(0);
376: }
378: /*
379: * Sort the stash, removing duplicates (combining as appropriate).
380: */
381: PetscErrorCode VecStashSortCompress_Private(VecStash *stash)
382: {
384: PetscInt i,j,bs = stash->bs;
387: if (!stash->n) return(0);
388: if (bs == 1) {
389: PetscSortIntWithScalarArray(stash->n,stash->idx,stash->array);
390: for (i=1,j=0; i<stash->n; i++) {
391: if (stash->idx[i] == stash->idx[j]) {
392: switch (stash->insertmode) {
393: case ADD_VALUES:
394: stash->array[j] += stash->array[i];
395: break;
396: case INSERT_VALUES:
397: stash->array[j] = stash->array[i];
398: break;
399: default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",stash->insertmode);
400: }
401: } else {
402: j++;
403: stash->idx[j] = stash->idx[i];
404: stash->array[j] = stash->array[i];
405: }
406: }
407: stash->n = j + 1;
408: } else { /* block stash */
409: PetscInt *perm = NULL;
410: PetscScalar *arr;
411: PetscMalloc2(stash->n,&perm,stash->n*bs,&arr);
412: for (i=0; i<stash->n; i++) perm[i] = i;
413: PetscSortIntWithArray(stash->n,stash->idx,perm);
415: /* Out-of-place copy of arr */
416: PetscMemcpy(arr,stash->array+perm[0]*bs,bs*sizeof(PetscScalar));
417: for (i=1,j=0; i<stash->n; i++) {
418: PetscInt k;
419: if (stash->idx[i] == stash->idx[j]) {
420: switch (stash->insertmode) {
421: case ADD_VALUES:
422: for (k=0; k<bs; k++) arr[j*bs+k] += stash->array[perm[i]*bs+k];
423: break;
424: case INSERT_VALUES:
425: for (k=0; k<bs; k++) arr[j*bs+k] = stash->array[perm[i]*bs+k];
426: break;
427: default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",stash->insertmode);
428: }
429: } else {
430: j++;
431: stash->idx[j] = stash->idx[i];
432: for (k=0; k<bs; k++) arr[j*bs+k] = stash->array[perm[i]*bs+k];
433: }
434: }
435: stash->n = j + 1;
436: PetscMemcpy(stash->array,arr,stash->n*bs*sizeof(PetscScalar));
437: PetscFree2(perm,arr);
438: }
439: return(0);
440: }
442: PetscErrorCode VecStashGetOwnerList_Private(VecStash *stash,PetscLayout map,PetscMPIInt *nowners,PetscMPIInt **owners)
443: {
444: PetscInt i,bs = stash->bs;
445: PetscMPIInt r;
446: PetscSegBuffer seg;
450: if (bs != 1 && bs != map->bs) SETERRQ2(map->comm,PETSC_ERR_PLIB,"Stash block size %D does not match layout block size %D",bs,map->bs);
451: PetscSegBufferCreate(sizeof(PetscMPIInt),50,&seg);
452: *nowners = 0;
453: for (i=0,r=-1; i<stash->n; i++) {
454: if (stash->idx[i]*bs >= map->range[r+1]) {
455: PetscMPIInt *rank;
456: PetscSegBufferGet(seg,1,&rank);
457: PetscLayoutFindOwner(map,stash->idx[i]*bs,&r);
458: *rank = r;
459: (*nowners)++;
460: }
461: }
462: PetscSegBufferExtractAlloc(seg,owners);
463: PetscSegBufferDestroy(&seg);
464: return(0);
465: }