Actual source code: matmatmult.c
  1: /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/
  2: /*
  3:   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
  4:           C = A * B
  5:           C = P * A * P^T
  6: */
 8:  #include src/mat/impls/aij/seq/aij.h
 9:  #include src/mat/utils/freespace.h
 11: static int logkey_matmatmult            = 0;
 12: static int logkey_matmatmult_symbolic   = 0;
 13: static int logkey_matmatmult_numeric    = 0;
 15: static int logkey_matapplypapt          = 0;
 16: static int logkey_matapplypapt_symbolic = 0;
 17: static int logkey_matapplypapt_numeric  = 0;
 19: /*
 20:      MatMatMult_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
 21:            C = A * B;
 23:      Note: C is assumed to be uncreated.
 24:            If this is not the case, Destroy C before calling this routine.
 25: */
 26: int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
 27: {
 28:   int            ierr;
 29:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
 30:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
 31:   int            aishift=a->indexshift,bishift=b->indexshift;
 32:   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
 33:   int            *ci,*cj,*denserow,*sparserow;
 34:   int            an=A->N,am=A->M,bn=B->N,bm=B->M;
 35:   int            i,j,k,anzi,brow,bnzj,cnzi;
 36:   MatScalar      *ca;
 39:   /* some error checking which could be moved into interface layer */
 40:   if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
 41:   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
 42: 
 43:   /* Set up timers */
 44:   if (!logkey_matmatmult_symbolic) {
 45:     PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);
 46:   }
 47:   PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);
 49:   /* Set up */
 50:   /* Allocate ci array, arrays for fill computation and */
 51:   /* free space for accumulating nonzero column info */
 52:   PetscMalloc(((am+1)+1)*sizeof(int),&ci);
 53:   ci[0] = 0;
 55:   PetscMalloc((2*bn+1)*sizeof(int),&denserow);
 56:   PetscMemzero(denserow,(2*bn+1)*sizeof(int));
 57:   sparserow = denserow + bn;
 59:   /* Initial FreeSpace size is nnz(B)=bi[bm] */
 60:   ierr          = GetMoreSpace(bi[bm],&free_space);
 61:   current_space = free_space;
 63:   /* Determine symbolic info for each row of the product: */
 64:   for (i=0;i<am;i++) {
 65:     anzi = ai[i+1] - ai[i];
 66:     cnzi = 0;
 67:     for (j=0;j<anzi;j++) {
 68:       brow = *aj++;
 69:       bnzj = bi[brow+1] - bi[brow];
 70:       bjj  = bj + bi[brow];
 71:       for (k=0;k<bnzj;k++) {
 72:         /* If column is not marked, mark it in compressed and uncompressed locations. */
 73:         /* For simplicity, leave uncompressed row unsorted until finished with row, */
 74:         /* and increment nonzero count for this row. */
 75:         if (!denserow[bjj[k]]) {
 76:           denserow[bjj[k]]  = -1;
 77:           sparserow[cnzi++] = bjj[k];
 78:         }
 79:       }
 80:     }
 82:     /* sort sparserow */
 83:     PetscSortInt(cnzi,sparserow);
 85:     /* If free space is not available, make more free space */
 86:     /* Double the amount of total space in the list */
 87:     if (current_space->local_remaining<cnzi) {
 88:       GetMoreSpace(current_space->total_array_size,¤t_space);
 89:     }
 91:     /* Copy data into free space, and zero out denserow */
 92:     PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
 93:     current_space->array           += cnzi;
 94:     current_space->local_used      += cnzi;
 95:     current_space->local_remaining -= cnzi;
 96:     for (j=0;j<cnzi;j++) {
 97:       denserow[sparserow[j]] = 0;
 98:     }
 99:     ci[i+1] = ci[i] + cnzi;
100:   }
102:   /* Column indices are in the list of free space */
103:   /* Allocate space for cj, initialize cj, and */
104:   /* destroy list of free space and other temporary array(s) */
105:   PetscMalloc((ci[am]+1)*sizeof(int),&cj);
106:   MakeSpaceContiguous(&free_space,cj);
107:   PetscFree(denserow);
108: 
109:   /* Allocate space for ca */
110:   PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);
111:   PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));
112: 
113:   /* put together the new matrix */
114:   MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);
116:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
117:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
118:   c = (Mat_SeqAIJ *)((*C)->data);
119:   c->freedata = PETSC_TRUE;
120:   c->nonew    = 0;
122:   PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);
123:   return(0);
124: }
126: /*
127:      MatMatMult_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
128:            C=A*B;
129:      Note: C must have been created by calling MatMatMult_Symbolic_SeqAIJ_SeqAIJ.
130: */
131: int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
132: {
133:   int        ierr,flops=0;
134:   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
135:   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
136:   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
137:   int        aishift=a->indexshift,bishift=b->indexshift,cishift=c->indexshift;
138:   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
139:   int        an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M;
140:   int        i,j,k,anzi,bnzi,cnzi,brow;
141:   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
145:   /* This error checking should be unnecessary if the symbolic was performed */
146:   if (aishift || bishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
147:   if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm);
148:   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
149:   if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn);
151:   /* Set up timers */
152:   if (!logkey_matmatmult_numeric) {
153:     PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);
154:   }
155:   PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);
157:   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
158:   PetscMalloc((cn+1)*sizeof(MatScalar),&temp);
159:   PetscMemzero(temp,cn*sizeof(MatScalar));
160:   /* Traverse A row-wise. */
161:   /* Build the ith row in C by summing over nonzero columns in A, */
162:   /* the rows of B corresponding to nonzeros of A. */
163:   for (i=0;i<am;i++) {
164:     anzi = ai[i+1] - ai[i];
165:     for (j=0;j<anzi;j++) {
166:       brow = *aj++;
167:       bnzi = bi[brow+1] - bi[brow];
168:       bjj  = bj + bi[brow];
169:       baj  = ba + bi[brow];
170:       for (k=0;k<bnzi;k++) {
171:         temp[bjj[k]] += (*aa)*baj[k];
172:       }
173:       flops += 2*bnzi;
174:       aa++;
175:     }
176:     /* Store row back into C, and re-zero temp */
177:     cnzi = ci[i+1] - ci[i];
178:     for (j=0;j<cnzi;j++) {
179:       ca[j] = temp[cj[j]];
180:       temp[cj[j]] = 0.0;
181:     }
182:     ca += cnzi;
183:     cj += cnzi;
184:   }
185:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
186:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
187: 
188:   /* Free temp */
189:   PetscFree(temp);
190:   PetscLogFlops(flops);
191:   PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);
192:   return(0);
193: }
195: int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) {
199:   if (!logkey_matmatmult) {
200:     PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);
201:   }
202:   PetscLogEventBegin(logkey_matmatmult,A,B,0,0);
203:   MatMatMult_Symbolic_SeqAIJ_SeqAIJ(A,B,C);
204:   MatMatMult_Numeric_SeqAIJ_SeqAIJ(A,B,*C);
205:   PetscLogEventEnd(logkey_matmatmult,A,B,0,0);
206:   return(0);
207: }
210: /*
211:      MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
212:            C = P * A * P^T;
214:      Note: C is assumed to be uncreated.
215:            If this is not the case, Destroy C before calling this routine.
216: */
217: int MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
218:   /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
219:   /*        and MatMatMult_SeqAIJ_SeqAIJ_Symbolic.  Perhaps they could be merged nicely. */
220:   int            ierr;
221:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
222:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
223:   int            aishift=a->indexshift,pishift=p->indexshift;
224:   int            *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
225:   int            *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
226:   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
227:   int            i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
228:   MatScalar      *ca;
232:   /* some error checking which could be moved into interface layer */
233:   if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
234:   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
235:   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
237:   /* Set up timers */
238:   if (!logkey_matapplypapt_symbolic) {
239:     PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);
240:   }
241:   PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);
243:   /* Create ij structure of P^T */
244:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
246:   /* Allocate ci array, arrays for fill computation and */
247:   /* free space for accumulating nonzero column info */
248:   PetscMalloc(((pm+1)*1)*sizeof(int),&ci);
249:   ci[0] = 0;
251:   PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);
252:   PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));
253:   pasparserow  = padenserow  + an;
254:   denserow     = pasparserow + an;
255:   sparserow    = denserow    + pm;
257:   /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
258:   /* This should be reasonable if sparsity of PAPt is similar to that of A. */
259:   ierr          = GetMoreSpace((ai[am]/pn)*pm,&free_space);
260:   current_space = free_space;
262:   /* Determine fill for each row of C: */
263:   for (i=0;i<pm;i++) {
264:     pnzi  = pi[i+1] - pi[i];
265:     panzi = 0;
266:     /* Get symbolic sparse row of PA: */
267:     for (j=0;j<pnzi;j++) {
268:       arow = *pj++;
269:       anzj = ai[arow+1] - ai[arow];
270:       ajj  = aj + ai[arow];
271:       for (k=0;k<anzj;k++) {
272:         if (!padenserow[ajj[k]]) {
273:           padenserow[ajj[k]]   = -1;
274:           pasparserow[panzi++] = ajj[k];
275:         }
276:       }
277:     }
278:     /* Using symbolic row of PA, determine symbolic row of C: */
279:     paj    = pasparserow;
280:     cnzi   = 0;
281:     for (j=0;j<panzi;j++) {
282:       ptrow = *paj++;
283:       ptnzj = pti[ptrow+1] - pti[ptrow];
284:       ptjj  = ptj + pti[ptrow];
285:       for (k=0;k<ptnzj;k++) {
286:         if (!denserow[ptjj[k]]) {
287:           denserow[ptjj[k]] = -1;
288:           sparserow[cnzi++] = ptjj[k];
289:         }
290:       }
291:     }
293:     /* sort sparse representation */
294:     PetscSortInt(cnzi,sparserow);
296:     /* If free space is not available, make more free space */
297:     /* Double the amount of total space in the list */
298:     if (current_space->local_remaining<cnzi) {
299:       GetMoreSpace(current_space->total_array_size,¤t_space);
300:     }
302:     /* Copy data into free space, and zero out dense row */
303:     PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
304:     current_space->array           += cnzi;
305:     current_space->local_used      += cnzi;
306:     current_space->local_remaining -= cnzi;
308:     for (j=0;j<panzi;j++) {
309:       padenserow[pasparserow[j]] = 0;
310:     }
311:     for (j=0;j<cnzi;j++) {
312:       denserow[sparserow[j]] = 0;
313:     }
314:     ci[i+1] = ci[i] + cnzi;
315:   }
316:   /* column indices are in the list of free space */
317:   /* Allocate space for cj, initialize cj, and */
318:   /* destroy list of free space and other temporary array(s) */
319:   PetscMalloc((ci[pm]+1)*sizeof(int),&cj);
320:   MakeSpaceContiguous(&free_space,cj);
321:   PetscFree(padenserow);
322: 
323:   /* Allocate space for ca */
324:   PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);
325:   PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));
326: 
327:   /* put together the new matrix */
328:   MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);
330:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
331:   /* Since these are PETSc arrays, change flags to free them as necessary. */
332:   c = (Mat_SeqAIJ *)((*C)->data);
333:   c->freedata = PETSC_TRUE;
334:   c->nonew    = 0;
336:   /* Clean up. */
337:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
339:   PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);
340:   return(0);
341: }
343: /*
344:      MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
345:            C = P * A * P^T;
346:      Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
347: */
348: int MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
349:   int        ierr,flops=0;
350:   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
351:   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
352:   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
353:   int        aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift;
354:   int        *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
355:   int        *ci=c->i,*cj=c->j;
356:   int        an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M;
357:   int        i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
358:   MatScalar  *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;
362:   /* This error checking should be unnecessary if the symbolic was performed */
363:   if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
364:   if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm);
365:   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
366:   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
367:   if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn);
369:   /* Set up timers */
370:   if (!logkey_matapplypapt_numeric) {
371:     PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);
372:   }
373:   PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);
375:   PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);
376:   PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));
377:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
379:   paj      = (int *)(paa + an);
380:   pajdense = paj + an;
382:   for (i=0;i<pm;i++) {
383:     /* Form sparse row of P*A */
384:     pnzi  = pi[i+1] - pi[i];
385:     panzj = 0;
386:     for (j=0;j<pnzi;j++) {
387:       arow = *pj++;
388:       anzj = ai[arow+1] - ai[arow];
389:       ajj  = aj + ai[arow];
390:       aaj  = aa + ai[arow];
391:       for (k=0;k<anzj;k++) {
392:         if (!pajdense[ajj[k]]) {
393:           pajdense[ajj[k]] = -1;
394:           paj[panzj++]     = ajj[k];
395:         }
396:         paa[ajj[k]] += (*pa)*aaj[k];
397:       }
398:       flops += 2*anzj;
399:       pa++;
400:     }
402:     /* Sort the j index array for quick sparse axpy. */
403:     PetscSortInt(panzj,paj);
405:     /* Compute P*A*P^T using sparse inner products. */
406:     /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
407:     cnzi = ci[i+1] - ci[i];
408:     for (j=0;j<cnzi;j++) {
409:       /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
410:       ptcol = *cj++;
411:       ptnzj = pi[ptcol+1] - pi[ptcol];
412:       ptj   = pjj + pi[ptcol];
413:       ptaj  = pta + pi[ptcol];
414:       sum   = 0.;
415:       k1    = 0;
416:       k2    = 0;
417:       while ((k1<panzj) && (k2<ptnzj)) {
418:         if (paj[k1]==ptj[k2]) {
419:           sum += paa[paj[k1++]]*ptaj[k2++];
420:         } else if (paj[k1] < ptj[k2]) {
421:           k1++;
422:         } else /* if (paj[k1] > ptj[k2]) */ {
423:           k2++;
424:         }
425:       }
426:       *ca++ = sum;
427:     }
429:     /* Zero the current row info for P*A */
430:     for (j=0;j<panzj;j++) {
431:       paa[paj[j]]      = 0.;
432:       pajdense[paj[j]] = 0;
433:     }
434:   }
436:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
437:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
438:   PetscLogFlops(flops);
439:   PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);
440:   return(0);
441: }
442: 
443: int MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
447:   if (!logkey_matapplypapt) {
448:     PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);
449:   }
450:   PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);
451:   MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);
452:   MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);
453:   PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);
454:   return(0);
455: }