Actual source code: pbjacobi.c
  1: /*$Id: pbjacobi.c,v 1.4 2001/08/07 03:03:42 balay Exp $*/
  3: /* 
  4:    Include files needed for the PBJacobi preconditioner:
  5:      pcimpl.h - private include file intended for use by all preconditioners 
  6: */
 8:  #include src/sles/pc/pcimpl.h
 10: /* 
 11:    Private context (data structure) for the PBJacobi preconditioner.  
 12: */
 13: typedef struct {
 14:   PetscScalar *diag;
 15:   int    bs,mbs;
 16: } PC_PBJacobi;
 18: /*
 19:    Currently only implemented for baij matrices and directly access baij
 20:   data structures.
 21: */
 22:  #include src/mat/impls/baij/mpi/mpibaij.h
 23:  #include src/inline/ilu.h
 25: static int PCApply_PBJacobi_2(PC pc,Vec x,Vec y)
 26: {
 27:   PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
 28:   int         ierr,i,m = jac->mbs;
 29:   PetscScalar *diag = jac->diag,x0,x1,*xx,*yy;
 30: 
 32:   VecGetArray(x,&xx);
 33:   VecGetArray(y,&yy);
 34:   for (i=0; i<m; i++) {
 35:     x0 = xx[2*i]; x1 = xx[2*i+1];
 36:     yy[2*i]   = diag[0]*x0 + diag[2]*x1;
 37:     yy[2*i+1] = diag[1]*x0 + diag[3]*x1;
 38:     diag     += 4;
 39:   }
 40:   VecRestoreArray(x,&xx);
 41:   VecRestoreArray(y,&yy);
 42:   PetscLogFlops(6*m);
 43:   return(0);
 44: }
 45: static int PCApply_PBJacobi_3(PC pc,Vec x,Vec y)
 46: {
 47:   PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
 48:   int         ierr,i,m = jac->mbs;
 49:   PetscScalar *diag = jac->diag,x0,x1,x2,*xx,*yy;
 50: 
 52:   VecGetArray(x,&xx);
 53:   VecGetArray(y,&yy);
 54:   for (i=0; i<m; i++) {
 55:     x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2];
 56:     yy[3*i]   = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
 57:     yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
 58:     yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
 59:     diag     += 9;
 60:   }
 61:   VecRestoreArray(x,&xx);
 62:   VecRestoreArray(y,&yy);
 63:   PetscLogFlops(15*m);
 64:   return(0);
 65: }
 66: static int PCApply_PBJacobi_4(PC pc,Vec x,Vec y)
 67: {
 68:   PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
 69:   int         ierr,i,m = jac->mbs;
 70:   PetscScalar *diag = jac->diag,x0,x1,x2,x3,*xx,*yy;
 71: 
 73:   VecGetArray(x,&xx);
 74:   VecGetArray(y,&yy);
 75:   for (i=0; i<m; i++) {
 76:     x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3];
 77:     yy[4*i]   = diag[0]*x0 + diag[4]*x1 + diag[8]*x2  + diag[12]*x3;
 78:     yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2  + diag[13]*x3;
 79:     yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3;
 80:     yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3;
 81:     diag     += 16;
 82:   }
 83:   VecRestoreArray(x,&xx);
 84:   VecRestoreArray(y,&yy);
 85:   PetscLogFlops(28*m);
 86:   return(0);
 87: }
 88: static int PCApply_PBJacobi_5(PC pc,Vec x,Vec y)
 89: {
 90:   PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
 91:   int         ierr,i,m = jac->mbs;
 92:   PetscScalar *diag = jac->diag,x0,x1,x2,x3,x4,*xx,*yy;
 93: 
 95:   VecGetArray(x,&xx);
 96:   VecGetArray(y,&yy);
 97:   for (i=0; i<m; i++) {
 98:     x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4];
 99:     yy[5*i]   = diag[0]*x0 + diag[5]*x1 + diag[10]*x2  + diag[15]*x3 + diag[20]*x4;
100:     yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2  + diag[16]*x3 + diag[21]*x4;
101:     yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
102:     yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
103:     yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
104:     diag     += 25;
105:   }
106:   VecRestoreArray(x,&xx);
107:   VecRestoreArray(y,&yy);
108:   PetscLogFlops(45*m);
109:   return(0);
110: }
111: /* -------------------------------------------------------------------------- */
112: static int PCSetUp_PBJacobi(PC pc)
113: {
114:   PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
115:   int         ierr,i,*diag_offset,bs2;
116:   PetscTruth  seqbaij,mpibaij;
117:   Mat         A = pc->pmat;
118:   PetscScalar *diag,*odiag,*v;
119:   Mat_SeqBAIJ *a;
122:   PetscTypeCompare((PetscObject)pc->pmat,MATSEQBAIJ,&seqbaij);
123:   PetscTypeCompare((PetscObject)pc->pmat,MATMPIBAIJ,&mpibaij);
124:   if (!seqbaij && !mpibaij) {
125:     SETERRQ(1,"Currently only supports BAIJ matrices");
126:   }
127:   if (mpibaij) A = ((Mat_MPIBAIJ*)A->data)->A;
128:   if (A->m != A->n) SETERRQ(1,"Supported only for square matrices and square storage");
129:   ierr        = MatMarkDiagonal_SeqBAIJ(A);
130:   a           = (Mat_SeqBAIJ*)A->data;
131:   bs2         = a->bs*a->bs;
132:   diag_offset = a->diag;
133:   v           = a->a;
134:   ierr        = PetscMalloc((bs2*a->mbs+1)*sizeof(PetscScalar),&diag);
135:   PetscLogObjectMemory(pc,bs2*a->mbs*sizeof(PetscScalar));
136:   jac->diag   = diag;
137:   jac->bs     = a->bs;
138:   jac->mbs    = a->mbs;
140:   /* factor and invert each block */
141:   switch (a->bs){
142:     case 2:
143:       for (i=0; i<a->mbs; i++) {
144:         odiag   = v + 4*diag_offset[i];
145:         diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
146:         ierr    = Kernel_A_gets_inverse_A_2(diag);
147:         diag   += 4;
148:       }
149:       pc->ops->apply = PCApply_PBJacobi_2;
150:       break;
151:     case 3:
152:       for (i=0; i<a->mbs; i++) {
153:         odiag   = v + 9*diag_offset[i];
154:         diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
155:         diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
156:         diag[8] = odiag[8]; diag[9] = odiag[9];
157:         ierr    = Kernel_A_gets_inverse_A_3(diag);
158:         diag   += 9;
159:       }
160:       pc->ops->apply = PCApply_PBJacobi_3;
161:       break;
162:     case 4:
163:       for (i=0; i<a->mbs; i++) {
164:         odiag = v + 16*diag_offset[i];
165:         ierr  = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
166:         ierr  = Kernel_A_gets_inverse_A_4(diag);
167:         diag += 16;
168:       }
169:       pc->ops->apply = PCApply_PBJacobi_4;
170:       break;
171:     case 5:
172:       for (i=0; i<a->mbs; i++) {
173:         odiag = v + 25*diag_offset[i];
174:         ierr  = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
175:         ierr  = Kernel_A_gets_inverse_A_5(diag);
176:         diag += 25;
177:       }
178:       pc->ops->apply = PCApply_PBJacobi_5;
179:       break;
180:     default:
181:       SETERRQ1(1,"not supported for block size %d",a->bs);
182:   }
184:   return(0);
185: }
186: /* -------------------------------------------------------------------------- */
187: static int PCDestroy_PBJacobi(PC pc)
188: {
189:   PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
190:   int         ierr;
193:   if (jac->diag)     {PetscFree(jac->diag);}
195:   /*
196:       Free the private data structure that was hanging off the PC
197:   */
198:   PetscFree(jac);
199:   return(0);
200: }
201: /* -------------------------------------------------------------------------- */
202: EXTERN_C_BEGIN
203: int PCCreate_PBJacobi(PC pc)
204: {
205:   PC_PBJacobi *jac;
206:   int       ierr;
210:   /*
211:      Creates the private data structure for this preconditioner and
212:      attach it to the PC object.
213:   */
214:   ierr      = PetscNew(PC_PBJacobi,&jac);
215:   pc->data  = (void*)jac;
217:   /*
218:      Logs the memory usage; this is not needed but allows PETSc to 
219:      monitor how much memory is being used for various purposes.
220:   */
221:   PetscLogObjectMemory(pc,sizeof(PC_PBJacobi));
223:   /*
224:      Initialize the pointers to vectors to ZERO; these will be used to store
225:      diagonal entries of the matrix for fast preconditioner application.
226:   */
227:   jac->diag          = 0;
229:   /*
230:       Set the pointers for the functions that are provided above.
231:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
232:       are called, they will automatically call these functions.  Note we
233:       choose not to provide a couple of these functions since they are
234:       not needed.
235:   */
236:   pc->ops->apply               = 0; /*set depending on the block size */
237:   pc->ops->applytranspose      = 0;
238:   pc->ops->setup               = PCSetUp_PBJacobi;
239:   pc->ops->destroy             = PCDestroy_PBJacobi;
240:   pc->ops->setfromoptions      = 0;
241:   pc->ops->view                = 0;
242:   pc->ops->applyrichardson     = 0;
243:   pc->ops->applysymmetricleft  = 0;
244:   pc->ops->applysymmetricright = 0;
245:   return(0);
246: }
247: EXTERN_C_END