Actual source code: mmsbaij.c
  1: /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/
  3: /*
  4:    Support for the parallel SBAIJ matrix vector multiply
  5: */
 6:  #include src/mat/impls/sbaij/mpi/mpisbaij.h
 7:  #include src/vec/vecimpl.h
  8: extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
 10: int MatSetUpMultiply_MPISBAIJ(Mat mat)
 11: {
 12:   Mat_MPISBAIJ       *sbaij = (Mat_MPISBAIJ*)mat->data;
 13:   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(sbaij->B->data);
 14:   int                Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray;
 15:   int                bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
 16:   IS                 from,to;
 17:   Vec                gvec;
 18:   int                rank=sbaij->rank,lsize,size=sbaij->size;
 19:   int                *owners=sbaij->rowners,*sowners,*ec_owner,k;
 20:   PetscMap           vecmap;
 21:   PetscScalar        *ptr;
 24:   if (sbaij->lvec) {
 25:     VecDestroy(sbaij->lvec);
 26:     sbaij->lvec = 0;
 27:   }
 28:   if (sbaij->Mvctx) {
 29:     VecScatterDestroy(sbaij->Mvctx);
 30:     sbaij->Mvctx = 0;
 31:   }
 33:   /* For the first stab we make an array as long as the number of columns */
 34:   /* mark those columns that are in sbaij->B */
 35:   PetscMalloc((Nbs+1)*sizeof(int),&indices);
 36:   PetscMemzero(indices,Nbs*sizeof(int));
 37:   for (i=0; i<mbs; i++) {
 38:     for (j=0; j<B->ilen[i]; j++) {
 39:       if (!indices[aj[B->i[i] + j]]) ec++;
 40:       indices[aj[B->i[i] + j] ] = 1;
 41:     }
 42:   }
 44:   /* form arrays of columns we need */
 45:   PetscMalloc((ec+1)*sizeof(int),&garray);
 46:   PetscMalloc((3*ec+1)*sizeof(int),&sgarray);
 47:   ec_owner = sgarray + 2*ec;
 48: 
 49:   ec = 0;
 50:   for (j=0; j<size; j++){
 51:     for (i=owners[j]; i<owners[j+1]; i++){
 52:       if (indices[i]) {
 53:         garray[ec]   = i;
 54:         ec_owner[ec] = j;
 55:         ec++;
 56:       }
 57:     }
 58:   }
 60:   /* make indices now point into garray */
 61:   for (i=0; i<ec; i++) indices[garray[i]] = i;
 63:   /* compact out the extra columns in B */
 64:   for (i=0; i<mbs; i++) {
 65:     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
 66:   }
 67:   B->nbs       = ec;
 68:   sbaij->B->n   = ec*B->bs;
 69:   PetscFree(indices);
 71:   /* create local vector that is used to scatter into */
 72:   VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);
 74:   /* create two temporary index sets for building scatter-gather */
 75:   PetscMalloc((2*ec+1)*sizeof(int),&stmp);
 76:   for (i=0; i<ec; i++) stmp[i] = bs*garray[i];
 77:   ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);
 78: 
 79:   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
 80:   ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);
 82:   /* generate the scatter context */
 83:   VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);
 84:   VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);
 85:   VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);
 87:   sbaij->garray = garray;
 88:   PetscLogObjectParent(mat,sbaij->Mvctx);
 89:   PetscLogObjectParent(mat,sbaij->lvec);
 90:   PetscLogObjectParent(mat,from);
 91:   PetscLogObjectParent(mat,to);
 93:   ISDestroy(from);
 94:   ISDestroy(to);
 96:   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
 97:   lsize = (mbs + ec)*bs;
 98:   VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);
 99:   VecDuplicate(sbaij->slvec0,&sbaij->slvec1);
100:   VecGetSize(sbaij->slvec0,&vec_size);
102:   VecGetPetscMap(sbaij->slvec0,&vecmap);
103:   PetscMapGetGlobalRange(vecmap,&sowners);
104: 
105:   /* x index in the IS sfrom */
106:   for (i=0; i<ec; i++) {
107:     j = ec_owner[i];
108:     sgarray[i]  = garray[i] + (sowners[j]/bs - owners[j]);
109:   }
110:   /* b index in the IS sfrom */
111:   k = sowners[rank]/bs + mbs;
112:   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
113: 
114:   for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i];
115:   ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);
116: 
117:   /* x index in the IS sto */
118:   k = sowners[rank]/bs + mbs;
119:   for (i=0; i<ec; i++) stmp[i] = bs*(k + i);
120:   /* b index in the IS sto */
121:   for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec];
123:   ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);
125:   /* gnerate the SBAIJ scatter context */
126:   VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);
127: 
128:    /*
129:       Post the receives for the first matrix vector product. We sync-chronize after
130:     this on the chance that the user immediately calls MatMult() after assemblying 
131:     the matrix.
132:   */
133:   VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);
135:   VecGetLocalSize(sbaij->slvec1,&nt);
136:   VecGetArray(sbaij->slvec1,&ptr);
137:   VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);
138:   VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);
139:   VecRestoreArray(sbaij->slvec1,&ptr);
141:   VecGetArray(sbaij->slvec0,&ptr);
142:   VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);
143:   VecRestoreArray(sbaij->slvec0,&ptr);
145:   PetscFree(stmp);
146:   MPI_Barrier(mat->comm);
147: 
148:   PetscLogObjectParent(mat,sbaij->sMvctx);
149:   PetscLogObjectParent(mat,sbaij->slvec0);
150:   PetscLogObjectParent(mat,sbaij->slvec1);
151:   PetscLogObjectParent(mat,sbaij->slvec0b);
152:   PetscLogObjectParent(mat,sbaij->slvec1a);
153:   PetscLogObjectParent(mat,sbaij->slvec1b);
154:   PetscLogObjectParent(mat,from);
155:   PetscLogObjectParent(mat,to);
156: 
157:   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
158:   ISDestroy(from);
159:   ISDestroy(to);
160:   VecDestroy(gvec);
161:   PetscFree(sgarray);
163:   return(0);
164: }
166: int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
167: {
168:   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
169:   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
170:   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
171:   int                bs = baij->bs,*stmp;
172:   IS                 from,to;
173:   Vec                gvec;
174: #if defined (PETSC_USE_CTABLE)
175:   PetscTable         gid1_lid1;
176:   PetscTablePosition tpos;
177:   int                gid,lid;
178: #endif  
182: #if defined (PETSC_USE_CTABLE)
183:   /* use a table - Mark Adams */
184:   PetscTableCreate(B->mbs,&gid1_lid1);
185:   for (i=0; i<B->mbs; i++) {
186:     for (j=0; j<B->ilen[i]; j++) {
187:       int data,gid1 = aj[B->i[i]+j] + 1;
188:       PetscTableFind(gid1_lid1,gid1,&data) ;
189:       if (!data) {
190:         /* one based table */
191:         PetscTableAdd(gid1_lid1,gid1,++ec);
192:       }
193:     }
194:   }
195:   /* form array of columns we need */
196:   PetscMalloc((ec+1)*sizeof(int),&garray);
197:   PetscTableGetHeadPosition(gid1_lid1,&tpos);
198:   while (tpos) {
199:     PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);
200:     gid--; lid--;
201:     garray[lid] = gid;
202:   }
203:   PetscSortInt(ec,garray);
204:   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
205:   PetscTableRemoveAll(gid1_lid1);
206:   for (i=0; i<ec; i++) {
207:     PetscTableAdd(gid1_lid1,garray[i]+1,i+1);
208:   }
209:   /* compact out the extra columns in B */
210:   for (i=0; i<B->mbs; i++) {
211:     for (j=0; j<B->ilen[i]; j++) {
212:       int gid1 = aj[B->i[i] + j] + 1;
213:       PetscTableFind(gid1_lid1,gid1,&lid);
214:       lid --;
215:       aj[B->i[i]+j] = lid;
216:     }
217:   }
218:   B->nbs     = ec;
219:   baij->B->n = ec*B->bs;
220:   PetscTableDelete(gid1_lid1);
221:   /* Mark Adams */
222: #else
223:   /* For the first stab we make an array as long as the number of columns */
224:   /* mark those columns that are in baij->B */
225:   PetscMalloc((Nbs+1)*sizeof(int),&indices);
226:   PetscMemzero(indices,Nbs*sizeof(int));
227:   for (i=0; i<B->mbs; i++) {
228:     for (j=0; j<B->ilen[i]; j++) {
229:       if (!indices[aj[B->i[i] + j]]) ec++;
230:       indices[aj[B->i[i] + j] ] = 1;
231:     }
232:   }
234:   /* form array of columns we need */
235:   PetscMalloc((ec+1)*sizeof(int),&garray);
236:   ec = 0;
237:   for (i=0; i<Nbs; i++) {
238:     if (indices[i]) {
239:       garray[ec++] = i;
240:     }
241:   }
243:   /* make indices now point into garray */
244:   for (i=0; i<ec; i++) {
245:     indices[garray[i]] = i;
246:   }
248:   /* compact out the extra columns in B */
249:   for (i=0; i<B->mbs; i++) {
250:     for (j=0; j<B->ilen[i]; j++) {
251:       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
252:     }
253:   }
254:   B->nbs       = ec;
255:   baij->B->n   = ec*B->bs;
256:   PetscFree(indices);
257: #endif  
258: 
259:   /* create local vector that is used to scatter into */
260:   VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);
262:   /* create two temporary index sets for building scatter-gather */
263:   for (i=0; i<ec; i++) {
264:     garray[i] = bs*garray[i];
265:   }
266:   ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);
267:   for (i=0; i<ec; i++) {
268:     garray[i] = garray[i]/bs;
269:   }
271:   PetscMalloc((ec+1)*sizeof(int),&stmp);
272:   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
273:   ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);
274:   PetscFree(stmp);
276:   /* create temporary global vector to generate scatter context */
277:   /* this is inefficient, but otherwise we must do either 
278:      1) save garray until the first actual scatter when the vector is known or
279:      2) have another way of generating a scatter context without a vector.*/
280:   VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);
282:   /* gnerate the scatter context */
283:   VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);
285:   /*
286:       Post the receives for the first matrix vector product. We sync-chronize after
287:     this on the chance that the user immediately calls MatMult() after assemblying 
288:     the matrix.
289:   */
290:   VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);
291:   MPI_Barrier(mat->comm);
293:   PetscLogObjectParent(mat,baij->Mvctx);
294:   PetscLogObjectParent(mat,baij->lvec);
295:   PetscLogObjectParent(mat,from);
296:   PetscLogObjectParent(mat,to);
297:   baij->garray = garray;
298:   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
299:   ISDestroy(from);
300:   ISDestroy(to);
301:   VecDestroy(gvec);
302:   return(0);
303: }
306: /*
307:      Takes the local part of an already assembled MPIBAIJ matrix
308:    and disassembles it. This is to allow new nonzeros into the matrix
309:    that require more communication in the matrix vector multiply. 
310:    Thus certain data-structures must be rebuilt.
312:    Kind of slow! But that's what application programmers get when 
313:    they are sloppy.
314: */
315: int DisAssemble_MPISBAIJ(Mat A)
316: {
317:   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
318:   Mat           B = baij->B,Bnew;
319:   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
320:   int           ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
321:   int           k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
322:   MatScalar     *a = Bbaij->a;
323:   PetscScalar   *atmp;
324: #if defined(PETSC_USE_MAT_SINGLE)
325:   int           l;
326: #endif
329: #if defined(PETSC_USE_MAT_SINGLE)
330:   PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp);
331: #endif
332:   /* free stuff related to matrix-vec multiply */
333:   VecGetSize(baij->lvec,&ec); /* needed for PetscLogObjectMemory below */
334:   VecDestroy(baij->lvec); baij->lvec = 0;
335:   VecScatterDestroy(baij->Mvctx); baij->Mvctx = 0;
336:   if (baij->colmap) {
337: #if defined (PETSC_USE_CTABLE)
338:     PetscTableDelete(baij->colmap); baij->colmap = 0;
339: #else
340:     PetscFree(baij->colmap);
341:     baij->colmap = 0;
342:     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
343: #endif
344:   }
346:   /* make sure that B is assembled so we can access its values */
347:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
348:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
350:   /* invent new B and copy stuff over */
351:   PetscMalloc(mbs*sizeof(int),&nz);
352:   for (i=0; i<mbs; i++) {
353:     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
354:   }
355:   MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);
356:   PetscFree(nz);
357: 
358:   PetscMalloc(bs*sizeof(int),&rvals);
359:   for (i=0; i<mbs; i++) {
360:     rvals[0] = bs*i;
361:     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
362:     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
363:       col = garray[Bbaij->j[j]]*bs;
364:       for (k=0; k<bs; k++) {
365: #if defined(PETSC_USE_MAT_SINGLE)
366:         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
367: #else
368:         atmp = a+j*bs2 + k*bs;
369: #endif
370:         MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);
371:         col++;
372:       }
373:     }
374:   }
375: #if defined(PETSC_USE_MAT_SINGLE)
376:   PetscFree(atmp);
377: #endif
378:   PetscFree(baij->garray);
379:   baij->garray = 0;
380:   PetscFree(rvals);
381:   PetscLogObjectMemory(A,-ec*sizeof(int));
382:   MatDestroy(B);
383:   PetscLogObjectParent(A,Bnew);
384:   baij->B = Bnew;
385:   A->was_assembled = PETSC_FALSE;
386:   return(0);
387: }