Actual source code: denseqn.c

  1: #include <../src/ksp/ksp/utils/lmvm/dense/denseqn.h>
  2: #include <../src/ksp/ksp/utils/lmvm/blas_cyclic/blas_cyclic.h>
  3: #include <petscblaslapack.h>
  4: #include <petscmat.h>
  5: #include <petscsys.h>
  6: #include <petscsystypes.h>
  7: #include <petscis.h>
  8: #include <petscoptions.h>
  9: #include <petscdevice.h>
 10: #include <petsc/private/deviceimpl.h>

 12: static PetscErrorCode MatMult_LMVMDQN(Mat, Vec, Vec);
 13: static PetscErrorCode MatMult_LMVMDBFGS(Mat, Vec, Vec);
 14: static PetscErrorCode MatMult_LMVMDDFP(Mat, Vec, Vec);
 15: static PetscErrorCode MatSolve_LMVMDQN(Mat, Vec, Vec);
 16: static PetscErrorCode MatSolve_LMVMDBFGS(Mat, Vec, Vec);
 17: static PetscErrorCode MatSolve_LMVMDDFP(Mat, Vec, Vec);

 19: static inline PetscInt recycle_index(PetscInt m, PetscInt idx)
 20: {
 21:   return idx % m;
 22: }

 24: static inline PetscInt history_index(PetscInt m, PetscInt num_updates, PetscInt idx)
 25: {
 26:   return (idx - num_updates) + PetscMin(m, num_updates);
 27: }

 29: static inline PetscInt oldest_update(PetscInt m, PetscInt idx)
 30: {
 31:   return PetscMax(0, idx - m);
 32: }

 34: static PetscErrorCode MatView_LMVMDQN(Mat B, PetscViewer pv)
 35: {
 36:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
 37:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

 39:   PetscBool isascii;

 41:   PetscFunctionBegin;
 42:   PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERASCII, &isascii));
 43:   PetscCall(MatView_LMVM(B, pv));
 44:   PetscCall(SymBroydenRescaleView(lqn->rescale, pv));
 45:   if (isascii) PetscCall(PetscViewerASCIIPrintf(pv, "Counts: S x : %" PetscInt_FMT ", S^T x : %" PetscInt_FMT ", Y x : %" PetscInt_FMT ",  Y^T x: %" PetscInt_FMT "\n", lqn->S_count, lqn->St_count, lqn->Y_count, lqn->Yt_count));
 46:   PetscFunctionReturn(PETSC_SUCCESS);
 47: }

 49: static PetscErrorCode MatLMVMDQNResetDestructive(Mat B)
 50: {
 51:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
 52:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

 54:   PetscFunctionBegin;
 55:   PetscCall(MatDestroy(&lqn->HY));
 56:   PetscCall(MatDestroy(&lqn->BS));
 57:   PetscCall(MatDestroy(&lqn->StY_triu));
 58:   PetscCall(MatDestroy(&lqn->YtS_triu));
 59:   PetscCall(VecDestroy(&lqn->StFprev));
 60:   PetscCall(VecDestroy(&lqn->Fprev_ref));
 61:   lqn->Fprev_state = 0;
 62:   PetscCall(MatDestroy(&lqn->YtS_triu_strict));
 63:   PetscCall(MatDestroy(&lqn->StY_triu_strict));
 64:   PetscCall(MatDestroy(&lqn->StBS));
 65:   PetscCall(MatDestroy(&lqn->YtHY));
 66:   PetscCall(MatDestroy(&lqn->J));
 67:   PetscCall(MatDestroy(&lqn->temp_mat));
 68:   PetscCall(VecDestroy(&lqn->diag_vec));
 69:   PetscCall(VecDestroy(&lqn->diag_vec_recycle_order));
 70:   PetscCall(VecDestroy(&lqn->inv_diag_vec));
 71:   PetscCall(VecDestroy(&lqn->column_work));
 72:   PetscCall(VecDestroy(&lqn->column_work2));
 73:   PetscCall(VecDestroy(&lqn->rwork1));
 74:   PetscCall(VecDestroy(&lqn->rwork2));
 75:   PetscCall(VecDestroy(&lqn->rwork3));
 76:   PetscCall(VecDestroy(&lqn->rwork2_local));
 77:   PetscCall(VecDestroy(&lqn->rwork3_local));
 78:   PetscCall(VecDestroy(&lqn->cyclic_work_vec));
 79:   PetscCall(VecDestroyVecs(lmvm->m, &lqn->PQ));
 80:   PetscCall(PetscFree(lqn->stp));
 81:   PetscCall(PetscFree(lqn->yts));
 82:   PetscCall(PetscFree(lqn->ytq));
 83:   lqn->allocated = PETSC_FALSE;
 84:   PetscFunctionReturn(PETSC_SUCCESS);
 85: }

 87: static PetscErrorCode MatReset_LMVMDQN_Internal(Mat B, MatLMVMResetMode mode)
 88: {
 89:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
 90:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

 92:   PetscFunctionBegin;
 93:   lqn->watchdog         = 0;
 94:   lqn->needPQ           = PETSC_TRUE;
 95:   lqn->num_updates      = 0;
 96:   lqn->num_mult_updates = 0;
 97:   if (MatLMVMResetClearsBases(mode)) PetscCall(MatLMVMDQNResetDestructive(B));
 98:   else {
 99:     if (lqn->BS) PetscCall(MatZeroEntries(lqn->BS));
100:     if (lqn->HY) PetscCall(MatZeroEntries(lqn->HY));
101:     if (lqn->StY_triu) { /* Set to identity by default so it is invertible */
102:       PetscCall(MatZeroEntries(lqn->StY_triu));
103:       PetscCall(MatShift(lqn->StY_triu, 1.0));
104:     }
105:     if (lqn->YtS_triu) {
106:       PetscCall(MatZeroEntries(lqn->YtS_triu));
107:       PetscCall(MatShift(lqn->YtS_triu, 1.0));
108:     }
109:     if (lqn->YtS_triu_strict) PetscCall(MatZeroEntries(lqn->YtS_triu_strict));
110:     if (lqn->StY_triu_strict) PetscCall(MatZeroEntries(lqn->StY_triu_strict));
111:     if (lqn->StBS) {
112:       PetscCall(MatZeroEntries(lqn->StBS));
113:       PetscCall(MatShift(lqn->StBS, 1.0));
114:     }
115:     if (lqn->YtHY) {
116:       PetscCall(MatZeroEntries(lqn->YtHY));
117:       PetscCall(MatShift(lqn->YtHY, 1.0));
118:     }
119:     PetscCall(VecDestroy(&lqn->Fprev_ref));
120:     lqn->Fprev_state = 0;
121:     if (lqn->StFprev) PetscCall(VecZeroEntries(lqn->StFprev));
122:   }
123:   PetscFunctionReturn(PETSC_SUCCESS);
124: }

126: static PetscErrorCode MatReset_LMVMDQN(Mat B, MatLMVMResetMode mode)
127: {
128:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
129:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

131:   PetscFunctionBegin;
132:   PetscCall(SymBroydenRescaleReset(B, lqn->rescale, mode));
133:   PetscCall(MatReset_LMVMDQN_Internal(B, mode));
134:   PetscFunctionReturn(PETSC_SUCCESS);
135: }

137: static PetscErrorCode MatAllocate_LMVMDQN_Internal(Mat B)
138: {
139:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
140:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

142:   PetscFunctionBegin;
143:   if (!lqn->allocated) {
144:     if (lmvm->m > 0) {
145:       PetscMPIInt rank;
146:       PetscInt    n, N, m, M;
147:       PetscBool   is_dbfgs, is_ddfp, is_dqn;
148:       VecType     vec_type;
149:       MPI_Comm    comm  = PetscObjectComm((PetscObject)B);
150:       Mat         Sfull = lmvm->basis[LMBASIS_S]->vecs;

152:       PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
153:       PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
154:       PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));

156:       PetscCallMPI(MPI_Comm_rank(comm, &rank));
157:       PetscCall(MatGetSize(B, &N, NULL));
158:       PetscCall(MatGetLocalSize(B, &n, NULL));
159:       M = lmvm->m;
160:       m = (rank == 0) ? M : 0;

162:       /* For DBFGS: Create data needed for MatSolve() eagerly; data needed for MatMult() will be created on demand
163:        * For DDFP : Create data needed for MatMult() eagerly; data needed for MatSolve() will be created on demand
164:        * For DQN  : Create all data eagerly */
165:       PetscCall(VecGetType(lmvm->Xprev, &vec_type));
166:       if (is_dqn) {
167:         PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
168:         PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
169:         PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
170:         PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
171:       } else if (is_ddfp) {
172:         PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
173:         PetscCall(MatDuplicate(Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->HY));
174:         PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->diag_vec, &lqn->rwork1));
175:         PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->rwork2, &lqn->rwork3));
176:       } else if (is_dbfgs) {
177:         PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
178:         PetscCall(MatDuplicate(Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->BS));
179:         PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
180:         PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
181:       } else {
182:         SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatAllocate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
183:       }
184:       /* initialize StY_triu and YtS_triu to identity, if they exist, so it is invertible */
185:       if (lqn->StY_triu) {
186:         PetscCall(MatZeroEntries(lqn->StY_triu));
187:         PetscCall(MatShift(lqn->StY_triu, 1.0));
188:       }
189:       if (lqn->YtS_triu) {
190:         PetscCall(MatZeroEntries(lqn->YtS_triu));
191:         PetscCall(MatShift(lqn->YtS_triu, 1.0));
192:       }
193:       if (lqn->use_recursive && (is_dbfgs || is_ddfp)) {
194:         PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lqn->PQ));
195:         PetscCall(VecDuplicate(lmvm->Xprev, &lqn->column_work2));
196:         PetscCall(PetscMalloc1(lmvm->m, &lqn->yts));
197:         if (is_dbfgs) PetscCall(PetscMalloc1(lmvm->m, &lqn->stp));
198:         else if (is_ddfp) PetscCall(PetscMalloc1(lmvm->m, &lqn->ytq));
199:       }
200:       PetscCall(VecDuplicate(lqn->rwork2, &lqn->cyclic_work_vec));
201:       PetscCall(VecZeroEntries(lqn->rwork1));
202:       PetscCall(VecZeroEntries(lqn->rwork2));
203:       PetscCall(VecZeroEntries(lqn->rwork3));
204:       PetscCall(VecZeroEntries(lqn->diag_vec));
205:     }
206:     PetscCall(VecDuplicate(lmvm->Xprev, &lqn->column_work));
207:     lqn->allocated = PETSC_TRUE;
208:   }
209:   PetscFunctionReturn(PETSC_SUCCESS);
210: }

212: static PetscErrorCode MatSetUp_LMVMDQN(Mat B)
213: {
214:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
215:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

217:   PetscFunctionBegin;
218:   PetscCall(MatSetUp_LMVM(B));
219:   PetscCall(SymBroydenRescaleInitializeJ0(B, lqn->rescale));
220:   PetscCall(MatAllocate_LMVMDQN_Internal(B));
221:   PetscFunctionReturn(PETSC_SUCCESS);
222: }

224: static PetscErrorCode MatSetFromOptions_LMVMDQN(Mat B, PetscOptionItems PetscOptionsObject)
225: {
226:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
227:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;
228:   PetscBool is_dbfgs, is_ddfp, is_dqn;

230:   PetscFunctionBegin;
231:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
232:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
233:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
234:   PetscCall(MatSetFromOptions_LMVM(B, PetscOptionsObject));
235:   PetscOptionsHeadBegin(PetscOptionsObject, "Dense symmetric Broyden method for approximating SPD Jacobian actions");
236:   if (is_dqn) {
237:     PetscCall(PetscOptionsEnum("-mat_lqn_type", "Implementation options for L-QN", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
238:   } else if (is_dbfgs) {
239:     PetscCall(PetscOptionsBool("-mat_lbfgs_recursive", "Use recursive formulation for MatMult_LMVMDBFGS, instead of Cholesky", "", lqn->use_recursive, &lqn->use_recursive, NULL));
240:     PetscCall(PetscOptionsEnum("-mat_lbfgs_type", "Implementation options for L-BFGS", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
241:   } else if (is_ddfp) {
242:     PetscCall(PetscOptionsBool("-mat_ldfp_recursive", "Use recursive formulation for MatSolve_LMVMDDFP, instead of Cholesky", "", lqn->use_recursive, &lqn->use_recursive, NULL));
243:     PetscCall(PetscOptionsEnum("-mat_ldfp_type", "Implementation options for L-DFP", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
244:   } else {
245:     SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatSetFromOptions_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
246:   }
247:   PetscCall(SymBroydenRescaleSetFromOptions(B, lqn->rescale, PetscOptionsObject));
248:   PetscOptionsHeadEnd();
249:   PetscFunctionReturn(PETSC_SUCCESS);
250: }

252: static PetscErrorCode MatDestroy_LMVMDQN(Mat B)
253: {
254:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
255:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

257:   PetscFunctionBegin;
258:   PetscCall(SymBroydenRescaleDestroy(&lqn->rescale));
259:   PetscCall(MatReset_LMVMDQN_Internal(B, MAT_LMVM_RESET_ALL));
260:   PetscCall(PetscFree(lqn->workscalar));
261:   PetscCall(PetscFree(lmvm->ctx));
262:   PetscCall(MatDestroy_LMVM(B));
263:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatLMVMSymBroydenSetDelta_C", NULL));
264:   PetscFunctionReturn(PETSC_SUCCESS);
265: }

267: static PetscErrorCode MatUpdate_LMVMDQN(Mat B, Vec X, Vec F)
268: {
269:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
270:   Mat_DQN  *lqn   = (Mat_DQN *)lmvm->ctx;
271:   Mat       Sfull = lmvm->basis[LMBASIS_S]->vecs;
272:   Mat       Yfull = lmvm->basis[LMBASIS_Y]->vecs;

274:   PetscBool          is_ddfp, is_dbfgs, is_dqn;
275:   PetscDeviceContext dctx;

277:   PetscFunctionBegin;
278:   if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
279:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
280:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
281:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
282:   PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
283:   if (lmvm->prev_set) {
284:     Vec         FX[2];
285:     PetscScalar dotFX[2];
286:     PetscScalar stFprev;
287:     PetscScalar curvature, yTy;
288:     PetscReal   curvtol;

290:     /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
291:     PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
292:     /* Test if the updates can be accepted */
293:     FX[0] = lmvm->Fprev; /* dotFX[0] = s^T Fprev */
294:     FX[1] = F;           /* dotFX[1] = s^T F     */
295:     PetscCall(VecMDot(lmvm->Xprev, 2, FX, dotFX));
296:     PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
297:     PetscCall(VecDot(lmvm->Fprev, lmvm->Fprev, &yTy));
298:     stFprev   = PetscConj(dotFX[0]);
299:     curvature = PetscConj(dotFX[1] - dotFX[0]); /* s^T y */
300:     if (PetscRealPart(yTy) < lmvm->eps) {
301:       curvtol = 0.0;
302:     } else {
303:       curvtol = lmvm->eps * PetscRealPart(yTy);
304:     }
305:     if (PetscRealPart(curvature) > curvtol) {
306:       PetscInt m     = lmvm->m;
307:       PetscInt k     = lmvm->k;
308:       PetscInt h_old = k - oldest_update(m, k);
309:       PetscInt h_new = k + 1 - oldest_update(m, k + 1);
310:       PetscInt idx   = recycle_index(m, k);

312:       /* Update is good, accept it */
313:       PetscCall(MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev));
314:       lqn->num_updates++;
315:       lqn->watchdog = 0;
316:       lqn->needPQ   = PETSC_TRUE;

318:       if (h_old == m && lqn->strategy == MAT_LMVM_DENSE_REORDER) {
319:         if (is_dqn) {
320:           PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
321:           PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
322:         } else if (is_dbfgs) {
323:           PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
324:         } else if (is_ddfp) {
325:           PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
326:         } else {
327:           SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
328:         }
329:       }

331:       if (lqn->use_recursive && (is_dbfgs || is_ddfp)) lqn->yts[idx] = PetscRealPart(curvature);

333:       if (is_dqn || is_dbfgs) { /* implement the scheme of Byrd, Nocedal, and Schnabel to save a MatMultTranspose call in the common case the       *
334:          * H_k is immediately applied to F after begin updated.   The S^T y computation can be split up as S^T (F - F_prev) */
335:         PetscInt     local_n;
336:         PetscScalar *StFprev;
337:         PetscMemType memtype;
338:         PetscInt     StYidx;

340:         StYidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;
341:         if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
342:         PetscCall(VecGetLocalSize(lqn->StFprev, &local_n));
343:         PetscCall(VecGetArrayAndMemType(lqn->StFprev, &StFprev, &memtype));
344:         if (local_n) {
345:           if (PetscMemTypeHost(memtype)) {
346:             StFprev[idx] = stFprev;
347:           } else {
348:             PetscCall(PetscDeviceRegisterMemory(&stFprev, PETSC_MEMTYPE_HOST, 1 * sizeof(stFprev)));
349:             PetscCall(PetscDeviceRegisterMemory(StFprev, memtype, local_n * sizeof(*StFprev)));
350:             PetscCall(PetscDeviceArrayCopy(dctx, &StFprev[idx], &stFprev, 1));
351:           }
352:         }
353:         PetscCall(VecRestoreArrayAndMemType(lqn->StFprev, &StFprev));

355:         {
356:           Vec this_sy_col;
357:           /* Now StFprev is updated for the new S vector.  Write -StFprev into the appropriate row */
358:           PetscCall(MatDenseGetColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
359:           PetscCall(VecAXPBY(this_sy_col, -1.0, 0.0, lqn->StFprev));

361:           /* Now compute the new StFprev */
362:           PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, lqn->StFprev, 0, h_new));
363:           lqn->St_count++;

365:           /* Now add StFprev: this_sy_col == S^T (F - Fprev) == S^T y */
366:           PetscCall(VecAXPY(this_sy_col, 1.0, lqn->StFprev));

368:           if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_sy_col, lqn->num_updates, lqn->cyclic_work_vec));
369:           PetscCall(MatDenseRestoreColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
370:         }
371:       }

373:       if (is_ddfp || is_dqn) {
374:         PetscInt YtSidx;

376:         YtSidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;

378:         {
379:           Vec this_ys_col;

381:           PetscCall(MatDenseGetColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
382:           PetscCall(MatMultHermitianTransposeColumnRange(Yfull, lmvm->Xprev, this_ys_col, 0, h_new));
383:           lqn->Yt_count++;

385:           if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_ys_col, lqn->num_updates, lqn->cyclic_work_vec));
386:           PetscCall(MatDenseRestoreColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
387:         }
388:       }

390:       if (is_dbfgs || is_dqn) {
391:         PetscCall(MatGetDiagonal(lqn->StY_triu, lqn->diag_vec));
392:       } else if (is_ddfp) {
393:         PetscCall(MatGetDiagonal(lqn->YtS_triu, lqn->diag_vec));
394:       } else {
395:         SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
396:       }

398:       if (lqn->strategy == MAT_LMVM_DENSE_REORDER) {
399:         if (!lqn->diag_vec_recycle_order) PetscCall(VecDuplicate(lqn->diag_vec, &lqn->diag_vec_recycle_order));
400:         PetscCall(VecCopy(lqn->diag_vec, lqn->diag_vec_recycle_order));
401:         PetscCall(VecHistoryOrderToRecycleOrder(B, lqn->diag_vec_recycle_order, lqn->num_updates, lqn->cyclic_work_vec));
402:       } else {
403:         if (!lqn->diag_vec_recycle_order) {
404:           PetscCall(PetscObjectReference((PetscObject)lqn->diag_vec));
405:           lqn->diag_vec_recycle_order = lqn->diag_vec;
406:         }
407:       }

409:       PetscCall(SymBroydenRescaleUpdate(B, lqn->rescale));
410:     } else {
411:       /* Update is bad, skip it */
412:       ++lmvm->nrejects;
413:       ++lqn->watchdog;
414:       PetscInt m = lmvm->m;
415:       PetscInt k = lmvm->k;
416:       PetscInt h = k - oldest_update(m, k);

418:       /* we still have to maintain StFprev */
419:       if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
420:       PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, lqn->StFprev, 0, h));
421:       lqn->St_count++;
422:     }
423:   }

425:   if (lqn->watchdog > lqn->max_seq_rejects) PetscCall(MatLMVMReset(B, PETSC_FALSE));

427:   /* Save the solution and function to be used in the next update */
428:   PetscCall(VecCopy(X, lmvm->Xprev));
429:   PetscCall(VecCopy(F, lmvm->Fprev));
430:   PetscCall(PetscObjectReference((PetscObject)F));
431:   PetscCall(VecDestroy(&lqn->Fprev_ref));
432:   lqn->Fprev_ref = F;
433:   PetscCall(PetscObjectStateGet((PetscObject)F, &lqn->Fprev_state));
434:   lmvm->prev_set = PETSC_TRUE;
435:   PetscFunctionReturn(PETSC_SUCCESS);
436: }

438: static PetscErrorCode MatDestroyThenCopy(Mat src, Mat *dst)
439: {
440:   PetscFunctionBegin;
441:   PetscCall(MatDestroy(dst));
442:   if (src) PetscCall(MatDuplicate(src, MAT_COPY_VALUES, dst));
443:   PetscFunctionReturn(PETSC_SUCCESS);
444: }

446: static PetscErrorCode VecDestroyThenCopy(Vec src, Vec *dst)
447: {
448:   PetscFunctionBegin;
449:   PetscCall(VecDestroy(dst));
450:   if (src) {
451:     PetscCall(VecDuplicate(src, dst));
452:     PetscCall(VecCopy(src, *dst));
453:   }
454:   PetscFunctionReturn(PETSC_SUCCESS);
455: }

457: static PetscErrorCode MatCopy_LMVMDQN(Mat B, Mat M, MatStructure str)
458: {
459:   Mat_LMVM *bdata = (Mat_LMVM *)B->data;
460:   Mat_DQN  *blqn  = (Mat_DQN *)bdata->ctx;
461:   Mat_LMVM *mdata = (Mat_LMVM *)M->data;
462:   Mat_DQN  *mlqn  = (Mat_DQN *)mdata->ctx;
463:   PetscInt  i;
464:   PetscBool is_dbfgs, is_ddfp, is_dqn;

466:   PetscFunctionBegin;
467:   PetscCall(SymBroydenRescaleCopy(blqn->rescale, mlqn->rescale));
468:   mlqn->num_updates      = blqn->num_updates;
469:   mlqn->num_mult_updates = blqn->num_mult_updates;
470:   mlqn->dense_type       = blqn->dense_type;
471:   mlqn->strategy         = blqn->strategy;
472:   mlqn->S_count          = 0;
473:   mlqn->St_count         = 0;
474:   mlqn->Y_count          = 0;
475:   mlqn->Yt_count         = 0;
476:   mlqn->watchdog         = blqn->watchdog;
477:   mlqn->max_seq_rejects  = blqn->max_seq_rejects;
478:   mlqn->use_recursive    = blqn->use_recursive;
479:   mlqn->needPQ           = blqn->needPQ;
480:   if (blqn->allocated) {
481:     PetscCall(MatAllocate_LMVMDQN_Internal(M));
482:     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
483:     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
484:     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
485:     PetscCall(MatDestroyThenCopy(blqn->HY, &mlqn->BS));
486:     PetscCall(VecDestroyThenCopy(blqn->StFprev, &mlqn->StFprev));
487:     PetscCall(MatDestroyThenCopy(blqn->StY_triu, &mlqn->StY_triu));
488:     PetscCall(MatDestroyThenCopy(blqn->StY_triu_strict, &mlqn->StY_triu_strict));
489:     PetscCall(MatDestroyThenCopy(blqn->YtS_triu, &mlqn->YtS_triu));
490:     PetscCall(MatDestroyThenCopy(blqn->YtS_triu_strict, &mlqn->YtS_triu_strict));
491:     PetscCall(MatDestroyThenCopy(blqn->YtHY, &mlqn->YtHY));
492:     PetscCall(MatDestroyThenCopy(blqn->StBS, &mlqn->StBS));
493:     PetscCall(MatDestroyThenCopy(blqn->J, &mlqn->J));
494:     PetscCall(VecDestroyThenCopy(blqn->diag_vec, &mlqn->diag_vec));
495:     PetscCall(VecDestroyThenCopy(blqn->diag_vec_recycle_order, &mlqn->diag_vec_recycle_order));
496:     PetscCall(VecDestroyThenCopy(blqn->inv_diag_vec, &mlqn->inv_diag_vec));
497:     if (blqn->use_recursive && (is_dbfgs || is_ddfp)) {
498:       for (i = 0; i < bdata->m; i++) {
499:         PetscCall(VecDestroyThenCopy(blqn->PQ[i], &mlqn->PQ[i]));
500:         mlqn->yts[i] = blqn->yts[i];
501:         if (is_dbfgs) {
502:           mlqn->stp[i] = blqn->stp[i];
503:         } else if (is_ddfp) {
504:           mlqn->ytq[i] = blqn->ytq[i];
505:         }
506:       }
507:     }
508:   }
509:   PetscCall(PetscObjectReference((PetscObject)blqn->Fprev_ref));
510:   PetscCall(VecDestroy(&mlqn->Fprev_ref));
511:   mlqn->Fprev_ref   = blqn->Fprev_ref;
512:   mlqn->Fprev_state = blqn->Fprev_state;
513:   PetscFunctionReturn(PETSC_SUCCESS);
514: }

516: static PetscErrorCode MatMult_LMVMDQN(Mat B, Vec X, Vec Z)
517: {
518:   PetscFunctionBegin;
519:   PetscCall(MatMult_LMVMDDFP(B, X, Z));
520:   PetscFunctionReturn(PETSC_SUCCESS);
521: }

523: static PetscErrorCode MatSolve_LMVMDQN(Mat H, Vec F, Vec dX)
524: {
525:   PetscFunctionBegin;
526:   PetscCall(MatSolve_LMVMDBFGS(H, F, dX));
527:   PetscFunctionReturn(PETSC_SUCCESS);
528: }

530: static PetscErrorCode MatLMVMSymBroydenSetDelta_LMVMDQN(Mat B, PetscScalar delta)
531: {
532:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
533:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

535:   PetscFunctionBegin;
536:   PetscCall(SymBroydenRescaleSetDelta(B, lqn->rescale, PetscAbsReal(PetscRealPart(delta))));
537:   PetscFunctionReturn(PETSC_SUCCESS);
538: }

540: /*
541:   This dense representation uses Davidon-Fletcher-Powell (DFP) for MatMult,
542:   and Broyden-Fletcher-Goldfarb-Shanno (BFGS) for MatSolve. This implementation
543:   results in avoiding costly Cholesky factorization, at the cost of duality cap.
544:   Please refer to MatLMVMDDFP and MatLMVMDBFGS for more information.
545: */
546: PetscErrorCode MatCreate_LMVMDQN(Mat B)
547: {
548:   Mat_LMVM *lmvm;
549:   Mat_DQN  *lqn;

551:   PetscFunctionBegin;
552:   PetscCall(MatCreate_LMVM(B));
553:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDQN));
554:   PetscCall(MatSetOption(B, MAT_HERMITIAN, PETSC_TRUE));
555:   PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
556:   PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
557:   B->ops->view           = MatView_LMVMDQN;
558:   B->ops->setup          = MatSetUp_LMVMDQN;
559:   B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
560:   B->ops->destroy        = MatDestroy_LMVMDQN;

562:   lmvm              = (Mat_LMVM *)B->data;
563:   lmvm->ops->reset  = MatReset_LMVMDQN;
564:   lmvm->ops->update = MatUpdate_LMVMDQN;
565:   lmvm->ops->mult   = MatMult_LMVMDQN;
566:   lmvm->ops->solve  = MatSolve_LMVMDQN;
567:   lmvm->ops->copy   = MatCopy_LMVMDQN;

569:   lmvm->ops->multht  = lmvm->ops->mult;
570:   lmvm->ops->solveht = lmvm->ops->solve;

572:   PetscCall(PetscNew(&lqn));
573:   lmvm->ctx            = (void *)lqn;
574:   lqn->allocated       = PETSC_FALSE;
575:   lqn->use_recursive   = PETSC_FALSE;
576:   lqn->needPQ          = PETSC_FALSE;
577:   lqn->watchdog        = 0;
578:   lqn->max_seq_rejects = lmvm->m / 2;
579:   lqn->strategy        = MAT_LMVM_DENSE_INPLACE;

581:   PetscCall(SymBroydenRescaleCreate(&lqn->rescale));
582:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatLMVMSymBroydenSetDelta_C", MatLMVMSymBroydenSetDelta_LMVMDQN));
583:   PetscFunctionReturn(PETSC_SUCCESS);
584: }

586: /*@
587:   MatCreateLMVMDQN - Creates a dense representation of the limited-memory
588:   Quasi-Newton approximation to a Hessian.

590:   Collective

592:   Input Parameters:
593: + comm - MPI communicator
594: . n    - number of local rows for storage vectors
595: - N    - global size of the storage vectors

597:   Output Parameter:
598: . B - the matrix

600:   Level: advanced

602:   Note:
603:   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
604:   paradigm instead of this routine directly.

606: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatCreateLMVMDDFP()`, `MatCreateLMVMDBFGS()`
607: @*/
608: PetscErrorCode MatCreateLMVMDQN(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
609: {
610:   PetscFunctionBegin;
611:   PetscCall(KSPInitializePackage());
612:   PetscCall(MatCreate(comm, B));
613:   PetscCall(MatSetSizes(*B, n, n, N, N));
614:   PetscCall(MatSetType(*B, MATLMVMDQN));
615:   PetscCall(MatSetUp(*B));
616:   PetscFunctionReturn(PETSC_SUCCESS);
617: }

619: static PetscErrorCode MatDQNApplyJ0Fwd(Mat B, Vec X, Vec Z)
620: {
621:   PetscFunctionBegin;
622:   PetscCall(MatLMVMApplyJ0Fwd(B, X, Z));
623:   PetscFunctionReturn(PETSC_SUCCESS);
624: }

626: static PetscErrorCode MatDQNApplyJ0Inv(Mat B, Vec F, Vec dX)
627: {
628:   PetscFunctionBegin;
629:   PetscCall(MatLMVMApplyJ0Inv(B, F, dX));
630:   PetscFunctionReturn(PETSC_SUCCESS);
631: }

633: /* This is not Bunch-Kaufman LDLT: here L is strictly lower triangular part of STY */
634: static PetscErrorCode MatGetLDLT(Mat B, Mat result)
635: {
636:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
637:   Mat_DQN  *lbfgs = (Mat_DQN *)lmvm->ctx;
638:   PetscInt  m_local;

640:   PetscFunctionBegin;
641:   if (!lbfgs->temp_mat) PetscCall(MatDuplicate(lbfgs->YtS_triu_strict, MAT_SHARE_NONZERO_PATTERN, &lbfgs->temp_mat));
642:   PetscCall(MatCopy(lbfgs->YtS_triu_strict, lbfgs->temp_mat, SAME_NONZERO_PATTERN));
643:   PetscCall(MatDiagonalScale(lbfgs->temp_mat, lbfgs->inv_diag_vec, NULL));
644:   PetscCall(MatGetLocalSize(result, &m_local, NULL));
645:   // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
646:   PetscCall(MatConjugate(lbfgs->temp_mat));
647:   if (m_local) {
648:     Mat temp_local, YtS_local, result_local;
649:     PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
650:     PetscCall(MatDenseGetLocalMatrix(lbfgs->temp_mat, &temp_local));
651:     PetscCall(MatDenseGetLocalMatrix(result, &result_local));
652:     PetscCall(MatTransposeMatMult(YtS_local, temp_local, MAT_REUSE_MATRIX, PETSC_DETERMINE, &result_local));
653:   }
654:   PetscCall(MatConjugate(result));
655:   PetscFunctionReturn(PETSC_SUCCESS);
656: }

658: static PetscErrorCode MatLMVMDBFGSUpdateMultData(Mat B)
659: {
660:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
661:   Mat_DQN  *lbfgs = (Mat_DQN *)lmvm->ctx;
662:   PetscInt  m     = lmvm->m, m_local;
663:   PetscInt  k     = lmvm->k;
664:   PetscInt  h     = k - oldest_update(m, k);
665:   PetscInt  j_0;
666:   PetscInt  prev_oldest;
667:   Mat       J_local;
668:   Mat       Sfull = lmvm->basis[LMBASIS_S]->vecs;
669:   Mat       Yfull = lmvm->basis[LMBASIS_Y]->vecs;

671:   PetscFunctionBegin;
672:   if (!lbfgs->YtS_triu_strict) {
673:     PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->YtS_triu_strict));
674:     PetscCall(MatDestroy(&lbfgs->StBS));
675:     PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->StBS));
676:     PetscCall(MatDestroy(&lbfgs->J));
677:     PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->J));
678:     PetscCall(MatDestroy(&lbfgs->BS));
679:     PetscCall(MatDuplicate(Yfull, MAT_SHARE_NONZERO_PATTERN, &lbfgs->BS));
680:     PetscCall(MatShift(lbfgs->StBS, 1.0));
681:     lbfgs->num_mult_updates = oldest_update(m, k);
682:   }
683:   if (lbfgs->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);

685:   /* B_0 may have been updated, we must recompute B_0 S and S^T B_0 S */
686:   for (PetscInt j = oldest_update(m, k); j < k; j++) {
687:     Vec      s_j;
688:     Vec      Bs_j;
689:     Vec      StBs_j;
690:     PetscInt S_idx    = recycle_index(m, j);
691:     PetscInt StBS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);

693:     PetscCall(MatDenseGetColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
694:     PetscCall(MatDenseGetColumnVecRead(Sfull, S_idx, &s_j));
695:     PetscCall(MatDQNApplyJ0Fwd(B, s_j, Bs_j));
696:     PetscCall(MatDenseRestoreColumnVecRead(Sfull, S_idx, &s_j));
697:     PetscCall(MatDenseGetColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
698:     PetscCall(MatMultHermitianTransposeColumnRange(Sfull, Bs_j, StBs_j, 0, h));
699:     lbfgs->St_count++;
700:     if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, StBs_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
701:     PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
702:     PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
703:   }
704:   prev_oldest = oldest_update(m, lbfgs->num_mult_updates);
705:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
706:     /* move the YtS entries that have been computed and need to be kept back up */
707:     PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);

709:     PetscCall(MatMove_LR3(B, lbfgs->YtS_triu_strict, m_keep));
710:   }
711:   PetscCall(MatGetLocalSize(lbfgs->YtS_triu_strict, &m_local, NULL));
712:   j_0 = PetscMax(lbfgs->num_mult_updates, oldest_update(m, k));
713:   for (PetscInt j = j_0; j < k; j++) {
714:     PetscInt S_idx   = recycle_index(m, j);
715:     PetscInt YtS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);
716:     Vec      s_j, Yts_j;

718:     PetscCall(MatDenseGetColumnVecRead(Sfull, S_idx, &s_j));
719:     PetscCall(MatDenseGetColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
720:     PetscCall(MatMultHermitianTransposeColumnRange(Yfull, s_j, Yts_j, 0, h));
721:     lbfgs->Yt_count++;
722:     if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Yts_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
723:     PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
724:     PetscCall(MatDenseRestoreColumnVecRead(Sfull, S_idx, &s_j));
725:     /* zero the corresponding row */
726:     if (m_local > 0) {
727:       Mat YtS_local, YtS_row;

729:       PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
730:       PetscCall(MatDenseGetSubMatrix(YtS_local, YtS_idx, YtS_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &YtS_row));
731:       PetscCall(MatZeroEntries(YtS_row));
732:       PetscCall(MatDenseRestoreSubMatrix(YtS_local, &YtS_row));
733:     }
734:   }
735:   if (!lbfgs->inv_diag_vec) PetscCall(VecDuplicate(lbfgs->diag_vec, &lbfgs->inv_diag_vec));
736:   PetscCall(VecCopy(lbfgs->diag_vec, lbfgs->inv_diag_vec));
737:   PetscCall(VecReciprocal(lbfgs->inv_diag_vec));
738:   PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
739:   PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
740:   PetscCall(MatGetLDLT(B, lbfgs->J));
741:   PetscCall(MatAXPY(lbfgs->J, 1.0, lbfgs->StBS, SAME_NONZERO_PATTERN));
742:   if (m_local) {
743:     PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
744:     PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
745:   }
746:   lbfgs->num_mult_updates = lbfgs->num_updates;
747:   PetscFunctionReturn(PETSC_SUCCESS);
748: }

750: /* Solves for
751:  * [ I | -S R^{-T} ] [  I  | 0 ] [ H_0 | 0 ] [ I | Y ] [      I      ]
752:  *                   [-----+---] [-----+---] [---+---] [-------------]
753:  *                   [ Y^T | I ] [  0  | D ] [ 0 | I ] [ -R^{-1} S^T ]  */

755: static PetscErrorCode MatSolve_LMVMDBFGS(Mat H, Vec F, Vec dX)
756: {
757:   Mat_LMVM        *lmvm   = (Mat_LMVM *)H->data;
758:   Mat_DQN         *lbfgs  = (Mat_DQN *)lmvm->ctx;
759:   Vec              rwork1 = lbfgs->rwork1;
760:   PetscInt         m      = lmvm->m;
761:   PetscInt         k      = lmvm->k;
762:   PetscInt         h      = k - oldest_update(m, k);
763:   Mat              Sfull  = lmvm->basis[LMBASIS_S]->vecs;
764:   Mat              Yfull  = lmvm->basis[LMBASIS_Y]->vecs;
765:   PetscObjectState Fstate;

767:   PetscFunctionBegin;
768:   VecCheckSameSize(F, 2, dX, 3);
769:   VecCheckMatCompatible(H, dX, 3, F, 2);

771:   /* Block Version */
772:   if (!lbfgs->num_updates) {
773:     PetscCall(MatDQNApplyJ0Inv(H, F, dX));
774:     PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
775:   }

777:   PetscCall(PetscObjectStateGet((PetscObject)F, &Fstate));
778:   if (F == lbfgs->Fprev_ref && Fstate == lbfgs->Fprev_state) {
779:     PetscCall(VecCopy(lbfgs->StFprev, rwork1));
780:   } else {
781:     PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, rwork1, 0, h));
782:     lbfgs->St_count++;
783:   }

785:   /* Reordering rwork1, as STY is in history order, while S is in recycled order */
786:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
787:   PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_FALSE, lbfgs->num_updates, lbfgs->strategy));
788:   PetscCall(VecScale(rwork1, -1.0));
789:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));

791:   PetscCall(VecCopy(F, lbfgs->column_work));
792:   PetscCall(MatMultAddColumnRange(Yfull, rwork1, lbfgs->column_work, lbfgs->column_work, 0, h));
793:   lbfgs->Y_count++;

795:   PetscCall(VecPointwiseMult(rwork1, lbfgs->diag_vec_recycle_order, rwork1));
796:   PetscCall(MatDQNApplyJ0Inv(H, lbfgs->column_work, dX));

798:   PetscCall(MatMultHermitianTransposeAddColumnRange(Yfull, dX, rwork1, rwork1, 0, h));
799:   lbfgs->Yt_count++;

801:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
802:   PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_TRUE, lbfgs->num_updates, lbfgs->strategy));
803:   PetscCall(VecScale(rwork1, -1.0));
804:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));

806:   PetscCall(MatMultAddColumnRange(Sfull, rwork1, dX, dX, 0, h));
807:   lbfgs->S_count++;
808:   PetscFunctionReturn(PETSC_SUCCESS);
809: }

811: /* Solves for
812:    B_0 - [ Y | B_0 S] [ -D  |    L^T    ]^-1 [   Y^T   ]
813:                       [-----+-----------]    [---------]
814:                       [  L  | S^T B_0 S ]    [ S^T B_0 ]

816:    Above is equivalent to

818:    B_0 - [ Y | B_0 S] [[     I     | 0 ][ -D  | 0 ][ I | -D^{-1} L^T ]]^-1 [   Y^T   ]
819:                       [[-----------+---][-----+---][---+-------------]]    [---------]
820:                       [[ -L D^{-1} | I ][  0  | J ][ 0 |       I     ]]    [ S^T B_0 ]

822:    where J = S^T B_0 S + L D^{-1} L^T

824:    becomes

826:    B_0 - [ Y | B_0 S] [ I | D^{-1} L^T ][ -D^{-1}  |   0    ][    I     | 0 ] [   Y^T   ]
827:                       [---+------------][----------+--------][----------+---] [---------]
828:                       [ 0 |     I      ][     0    | J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]

830:                       =

832:    B_0 + [ Y | B_0 S] [ D^{-1} | 0 ][ I | L^T ][ I |    0    ][     I    | 0 ] [   Y^T   ]
833:                       [--------+---][---+-----][---+---------][----------+---] [---------]
834:                       [ 0      | I ][ 0 |  I  ][ 0 | -J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]

836:                       (Note that YtS_triu_strict is L^T)
837:    Byrd, Nocedal, Schnabel 1994

839:    Alternative approach: considering the fact that DFP is dual to BFGS, use MatMult of DPF:
840:    (See ddfp.c's MatMult_LMVMDDFP)

842: */
843: static PetscErrorCode MatMult_LMVMDBFGS(Mat B, Vec X, Vec Z)
844: {
845:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
846:   Mat_DQN  *lbfgs = (Mat_DQN *)lmvm->ctx;
847:   Mat       J_local;
848:   PetscInt  idx, i, j, m_local, local_n;
849:   PetscInt  m     = lmvm->m;
850:   PetscInt  k     = lmvm->k;
851:   PetscInt  h     = k - oldest_update(m, k);
852:   Mat       Sfull = lmvm->basis[LMBASIS_S]->vecs;
853:   Mat       Yfull = lmvm->basis[LMBASIS_Y]->vecs;

855:   PetscFunctionBegin;
856:   VecCheckSameSize(X, 2, Z, 3);
857:   VecCheckMatCompatible(B, X, 2, Z, 3);

859:   /* Cholesky Version */
860:   /* Start with the B0 term */
861:   PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
862:   if (!lbfgs->num_updates) PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */

864:   if (lbfgs->use_recursive) {
865:     PetscDeviceContext dctx;
866:     PetscMemType       memtype;
867:     PetscScalar        stz, ytx, stp, sjtpi, yjtsi, *workscalar;
868:     PetscInt           oldest = oldest_update(m, k);

870:     PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
871:     /* Recursive formulation to avoid Cholesky. Not a dense formulation */
872:     PetscCall(MatMultHermitianTransposeColumnRange(Yfull, X, lbfgs->rwork1, 0, h));
873:     lbfgs->Yt_count++;

875:     PetscCall(VecGetLocalSize(lbfgs->rwork1, &local_n));

877:     if (lbfgs->needPQ) {
878:       PetscInt oldest = oldest_update(m, k);
879:       for (i = oldest; i < k; ++i) {
880:         idx = recycle_index(m, i);
881:         /* column_work = S[idx] */
882:         PetscCall(MatGetColumnVector(Sfull, lbfgs->column_work, idx));
883:         PetscCall(MatDQNApplyJ0Fwd(B, lbfgs->column_work, lbfgs->PQ[idx]));
884:         PetscCall(MatMultHermitianTransposeColumnRange(Yfull, lbfgs->column_work, lbfgs->rwork3, 0, h));
885:         PetscCall(VecGetArrayAndMemType(lbfgs->rwork3, &workscalar, &memtype));
886:         for (j = oldest; j < i; ++j) {
887:           PetscInt idx_j = recycle_index(m, j);
888:           /* Copy yjtsi in device-aware manner */
889:           if (local_n) {
890:             if (PetscMemTypeHost(memtype)) {
891:               yjtsi = workscalar[idx_j];
892:             } else {
893:               PetscCall(PetscDeviceRegisterMemory(&yjtsi, PETSC_MEMTYPE_HOST, sizeof(yjtsi)));
894:               PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
895:               PetscCall(PetscDeviceArrayCopy(dctx, &yjtsi, &workscalar[idx_j], 1));
896:             }
897:           }
898:           PetscCallMPI(MPI_Bcast(&yjtsi, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)B)));
899:           /* column_work2 = S[j] */
900:           PetscCall(MatGetColumnVector(Sfull, lbfgs->column_work2, idx_j));
901:           PetscCall(VecDot(lbfgs->PQ[idx], lbfgs->column_work2, &sjtpi));
902:           /* column_work2 = Y[j] */
903:           PetscCall(MatGetColumnVector(Yfull, lbfgs->column_work2, idx_j));
904:           /* Compute the pure BFGS component of the forward product */
905:           PetscCall(VecAXPBYPCZ(lbfgs->PQ[idx], -sjtpi / lbfgs->stp[idx_j], yjtsi / lbfgs->yts[idx_j], 1.0, lbfgs->PQ[idx_j], lbfgs->column_work2));
906:         }
907:         PetscCall(VecDot(lbfgs->PQ[idx], lbfgs->column_work, &stp));
908:         lbfgs->stp[idx] = PetscRealPart(stp);
909:       }
910:       lbfgs->needPQ = PETSC_FALSE;
911:     }

913:     PetscCall(VecGetArrayAndMemType(lbfgs->rwork1, &workscalar, &memtype));
914:     for (i = oldest; i < k; ++i) {
915:       idx = recycle_index(m, i);
916:       /* Copy stz[i], ytx[i] in device-aware manner */
917:       if (local_n) {
918:         if (PetscMemTypeHost(memtype)) {
919:           ytx = workscalar[idx];
920:         } else {
921:           PetscCall(PetscDeviceRegisterMemory(&ytx, PETSC_MEMTYPE_HOST, 1 * sizeof(ytx)));
922:           PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
923:           PetscCall(PetscDeviceArrayCopy(dctx, &ytx, &workscalar[idx], 1));
924:         }
925:       }
926:       PetscCallMPI(MPI_Bcast(&ytx, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)B)));
927:       /* column_work : S[i], column_work2 : Y[i] */
928:       PetscCall(MatGetColumnVector(Sfull, lbfgs->column_work, idx));
929:       PetscCall(MatGetColumnVector(Yfull, lbfgs->column_work2, idx));
930:       PetscCall(VecDot(Z, lbfgs->column_work, &stz));
931:       PetscCall(VecAXPBYPCZ(Z, -stz / lbfgs->stp[idx], ytx / lbfgs->yts[idx], 1.0, lbfgs->PQ[idx], lbfgs->column_work2));
932:     }
933:     PetscCall(VecRestoreArrayAndMemType(lbfgs->rwork1, &workscalar));
934:   } else {
935:     PetscCall(MatLMVMDBFGSUpdateMultData(B));
936:     PetscCall(MatMultHermitianTransposeColumnRange(Yfull, X, lbfgs->rwork1, 0, h));
937:     lbfgs->Yt_count++;
938:     PetscCall(MatMultHermitianTransposeColumnRange(Sfull, Z, lbfgs->rwork2, 0, h));
939:     lbfgs->St_count++;
940:     if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
941:       PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
942:       PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork2, lbfgs->num_updates, lbfgs->cyclic_work_vec));
943:     }

945:     PetscCall(VecPointwiseMult(lbfgs->rwork1, lbfgs->rwork1, lbfgs->inv_diag_vec));
946:     if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate(lbfgs->YtS_triu_strict));
947:     PetscCall(MatMultTransposeAdd(lbfgs->YtS_triu_strict, lbfgs->rwork1, lbfgs->rwork2, lbfgs->rwork2));
948:     if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate(lbfgs->YtS_triu_strict));

950:     if (!lbfgs->rwork2_local) PetscCall(VecCreateLocalVector(lbfgs->rwork2, &lbfgs->rwork2_local));
951:     if (!lbfgs->rwork3_local) PetscCall(VecCreateLocalVector(lbfgs->rwork3, &lbfgs->rwork3_local));
952:     PetscCall(VecGetLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
953:     PetscCall(VecGetLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
954:     PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
955:     PetscCall(VecGetSize(lbfgs->rwork2_local, &m_local));
956:     if (m_local) {
957:       PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
958:       PetscCall(MatSolve(J_local, lbfgs->rwork2_local, lbfgs->rwork3_local));
959:     }
960:     PetscCall(VecRestoreLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
961:     PetscCall(VecRestoreLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
962:     PetscCall(VecScale(lbfgs->rwork3, -1.0));

964:     PetscCall(MatMult(lbfgs->YtS_triu_strict, lbfgs->rwork3, lbfgs->rwork2));
965:     PetscCall(VecPointwiseMult(lbfgs->rwork2, lbfgs->rwork2, lbfgs->inv_diag_vec));
966:     PetscCall(VecAXPY(lbfgs->rwork1, 1.0, lbfgs->rwork2));

968:     if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
969:       PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
970:       PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork3, lbfgs->num_updates, lbfgs->cyclic_work_vec));
971:     }

973:     PetscCall(MatMultAddColumnRange(Yfull, lbfgs->rwork1, Z, Z, 0, h));
974:     lbfgs->Y_count++;
975:     PetscCall(MatMultAddColumnRange(lbfgs->BS, lbfgs->rwork3, Z, Z, 0, h));
976:     lbfgs->S_count++;
977:   }
978:   PetscFunctionReturn(PETSC_SUCCESS);
979: }

981: /*
982:   This dense representation reduces the L-BFGS update to a series of
983:   matrix-vector products with dense matrices in lieu of the conventional matrix-free
984:   two-loop algorithm.
985: */
986: PetscErrorCode MatCreate_LMVMDBFGS(Mat B)
987: {
988:   Mat_LMVM *lmvm;
989:   Mat_DQN  *lbfgs;

991:   PetscFunctionBegin;
992:   PetscCall(MatCreate_LMVM(B));
993:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDBFGS));
994:   PetscCall(MatSetOption(B, MAT_HERMITIAN, PETSC_TRUE));
995:   PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
996:   PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
997:   B->ops->view           = MatView_LMVMDQN;
998:   B->ops->setup          = MatSetUp_LMVMDQN;
999:   B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1000:   B->ops->destroy        = MatDestroy_LMVMDQN;

1002:   lmvm              = (Mat_LMVM *)B->data;
1003:   lmvm->ops->reset  = MatReset_LMVMDQN;
1004:   lmvm->ops->update = MatUpdate_LMVMDQN;
1005:   lmvm->ops->mult   = MatMult_LMVMDBFGS;
1006:   lmvm->ops->solve  = MatSolve_LMVMDBFGS;
1007:   lmvm->ops->copy   = MatCopy_LMVMDQN;

1009:   lmvm->ops->multht  = lmvm->ops->mult;
1010:   lmvm->ops->solveht = lmvm->ops->solve;

1012:   PetscCall(PetscNew(&lbfgs));
1013:   lmvm->ctx              = (void *)lbfgs;
1014:   lbfgs->allocated       = PETSC_FALSE;
1015:   lbfgs->use_recursive   = PETSC_TRUE;
1016:   lbfgs->needPQ          = PETSC_TRUE;
1017:   lbfgs->watchdog        = 0;
1018:   lbfgs->max_seq_rejects = lmvm->m / 2;
1019:   lbfgs->strategy        = MAT_LMVM_DENSE_INPLACE;

1021:   PetscCall(SymBroydenRescaleCreate(&lbfgs->rescale));
1022:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatLMVMSymBroydenSetDelta_C", MatLMVMSymBroydenSetDelta_LMVMDQN));
1023:   PetscFunctionReturn(PETSC_SUCCESS);
1024: }

1026: /*@
1027:   MatCreateLMVMDBFGS - Creates a dense representation of the limited-memory
1028:   Broyden-Fletcher-Goldfarb-Shanno (BFGS) approximation to a Hessian.

1030:   Collective

1032:   Input Parameters:
1033: + comm - MPI communicator
1034: . n    - number of local rows for storage vectors
1035: - N    - global size of the storage vectors

1037:   Output Parameter:
1038: . B - the matrix

1040:   Level: advanced

1042:   Note:
1043:   It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1044:   paradigm instead of this routine directly.

1046: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MatCreateLMVMBFGS()`
1047: @*/
1048: PetscErrorCode MatCreateLMVMDBFGS(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1049: {
1050:   PetscFunctionBegin;
1051:   PetscCall(KSPInitializePackage());
1052:   PetscCall(MatCreate(comm, B));
1053:   PetscCall(MatSetSizes(*B, n, n, N, N));
1054:   PetscCall(MatSetType(*B, MATLMVMDBFGS));
1055:   PetscCall(MatSetUp(*B));
1056:   PetscFunctionReturn(PETSC_SUCCESS);
1057: }

1059: /* here R is strictly upper triangular part of STY */
1060: static PetscErrorCode MatGetRTDR(Mat B, Mat result)
1061: {
1062:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1063:   Mat_DQN  *ldfp = (Mat_DQN *)lmvm->ctx;
1064:   PetscInt  m_local;

1066:   PetscFunctionBegin;
1067:   if (!ldfp->temp_mat) PetscCall(MatDuplicate(ldfp->StY_triu_strict, MAT_SHARE_NONZERO_PATTERN, &ldfp->temp_mat));
1068:   PetscCall(MatCopy(ldfp->StY_triu_strict, ldfp->temp_mat, SAME_NONZERO_PATTERN));
1069:   PetscCall(MatDiagonalScale(ldfp->temp_mat, ldfp->inv_diag_vec, NULL));
1070:   PetscCall(MatGetLocalSize(result, &m_local, NULL));
1071:   // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
1072:   PetscCall(MatConjugate(ldfp->temp_mat));
1073:   if (m_local) {
1074:     Mat temp_local, StY_local, result_local;
1075:     PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1076:     PetscCall(MatDenseGetLocalMatrix(ldfp->temp_mat, &temp_local));
1077:     PetscCall(MatDenseGetLocalMatrix(result, &result_local));
1078:     PetscCall(MatTransposeMatMult(StY_local, temp_local, MAT_REUSE_MATRIX, PETSC_DETERMINE, &result_local));
1079:   }
1080:   PetscCall(MatConjugate(result));
1081:   PetscFunctionReturn(PETSC_SUCCESS);
1082: }

1084: static PetscErrorCode MatLMVMDDFPUpdateSolveData(Mat B)
1085: {
1086:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1087:   Mat_DQN  *ldfp = (Mat_DQN *)lmvm->ctx;
1088:   PetscInt  m    = lmvm->m, m_local;
1089:   PetscInt  k    = lmvm->k;
1090:   PetscInt  h    = k - oldest_update(m, k);
1091:   PetscInt  j_0;
1092:   PetscInt  prev_oldest;
1093:   Mat       Sfull = lmvm->basis[LMBASIS_S]->vecs;
1094:   Mat       Yfull = lmvm->basis[LMBASIS_Y]->vecs;
1095:   Mat       J_local;

1097:   PetscFunctionBegin;
1098:   if (!ldfp->StY_triu_strict) {
1099:     PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->StY_triu_strict));
1100:     PetscCall(MatDestroy(&ldfp->YtHY));
1101:     PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->YtHY));
1102:     PetscCall(MatDestroy(&ldfp->J));
1103:     PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->J));
1104:     PetscCall(MatDestroy(&ldfp->HY));
1105:     PetscCall(MatDuplicate(Yfull, MAT_SHARE_NONZERO_PATTERN, &ldfp->HY));
1106:     PetscCall(MatShift(ldfp->YtHY, 1.0));
1107:     ldfp->num_mult_updates = oldest_update(m, k);
1108:   }
1109:   if (ldfp->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);

1111:   /* H_0 may have been updated, we must recompute H_0 Y and Y^T H_0 Y */
1112:   for (PetscInt j = oldest_update(m, k); j < k; j++) {
1113:     Vec      y_j;
1114:     Vec      Hy_j;
1115:     Vec      YtHy_j;
1116:     PetscInt Y_idx    = recycle_index(m, j);
1117:     PetscInt YtHY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);

1119:     PetscCall(MatDenseGetColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1120:     PetscCall(MatDenseGetColumnVecRead(Yfull, Y_idx, &y_j));
1121:     PetscCall(MatDQNApplyJ0Inv(B, y_j, Hy_j));
1122:     PetscCall(MatDenseRestoreColumnVecRead(Yfull, Y_idx, &y_j));
1123:     PetscCall(MatDenseGetColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1124:     PetscCall(MatMultHermitianTransposeColumnRange(Yfull, Hy_j, YtHy_j, 0, h));
1125:     ldfp->Yt_count++;
1126:     if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, YtHy_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1127:     PetscCall(MatDenseRestoreColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1128:     PetscCall(MatDenseRestoreColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1129:   }
1130:   prev_oldest = oldest_update(m, ldfp->num_mult_updates);
1131:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
1132:     /* move the YtS entries that have been computed and need to be kept back up */
1133:     PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);

1135:     PetscCall(MatMove_LR3(B, ldfp->StY_triu_strict, m_keep));
1136:   }
1137:   PetscCall(MatGetLocalSize(ldfp->StY_triu_strict, &m_local, NULL));
1138:   j_0 = PetscMax(ldfp->num_mult_updates, oldest_update(m, k));
1139:   for (PetscInt j = j_0; j < k; j++) {
1140:     PetscInt Y_idx   = recycle_index(m, j);
1141:     PetscInt StY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);
1142:     Vec      y_j, Sty_j;

1144:     PetscCall(MatDenseGetColumnVecRead(Yfull, Y_idx, &y_j));
1145:     PetscCall(MatDenseGetColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1146:     PetscCall(MatMultHermitianTransposeColumnRange(Sfull, y_j, Sty_j, 0, h));
1147:     ldfp->St_count++;
1148:     if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Sty_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1149:     PetscCall(MatDenseRestoreColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1150:     PetscCall(MatDenseRestoreColumnVecRead(Yfull, Y_idx, &y_j));
1151:     /* zero the corresponding row */
1152:     if (m_local > 0) {
1153:       Mat StY_local, StY_row;

1155:       PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1156:       PetscCall(MatDenseGetSubMatrix(StY_local, StY_idx, StY_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &StY_row));
1157:       PetscCall(MatZeroEntries(StY_row));
1158:       PetscCall(MatDenseRestoreSubMatrix(StY_local, &StY_row));
1159:     }
1160:   }
1161:   if (!ldfp->inv_diag_vec) PetscCall(VecDuplicate(ldfp->diag_vec, &ldfp->inv_diag_vec));
1162:   PetscCall(VecCopy(ldfp->diag_vec, ldfp->inv_diag_vec));
1163:   PetscCall(VecReciprocal(ldfp->inv_diag_vec));
1164:   PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1165:   PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
1166:   PetscCall(MatGetRTDR(B, ldfp->J));
1167:   PetscCall(MatAXPY(ldfp->J, 1.0, ldfp->YtHY, SAME_NONZERO_PATTERN));
1168:   if (m_local) {
1169:     PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
1170:     PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
1171:   }
1172:   ldfp->num_mult_updates = ldfp->num_updates;
1173:   PetscFunctionReturn(PETSC_SUCCESS);
1174: }

1176: /* Solves for

1178:    H_0 - [ S | H_0 Y] [ -D  |    R.T    ]^-1 [   S^T   ]
1179:                       [-----+-----------]    [---------]
1180:                       [  R  | Y^T H_0 Y ]    [ Y^T H_0 ]

1182:    Above is equivalent to

1184:    H_0 - [ S | H_0 Y] [[     I     | 0 ][ -D | 0 ][ I | -D^{-1} R^T ]]^-1 [   S^T   ]
1185:                       [[-----------+---][----+---][---+-------------]]    [---------]
1186:                       [[ -R D^{-1} | I ][  0 | J ][ 0 |      I      ]]    [ Y^T H_0 ]

1188:    where J = Y^T H_0 Y + R D^{-1} R.T

1190:    becomes

1192:    H_0 - [ S | H_0 Y] [ I | D^{-1} R^T ][ -D^{-1}  |   0    ][     I    | 0 ] [   S^T   ]
1193:                       [---+------------][----------+--------][----------+---] [---------]
1194:                       [ 0 |      I     ][     0    | J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]

1196:                       =

1198:    H_0 + [ S | H_0 Y] [ D^{-1} | 0 ][ I | R^T ][ I |    0    ][     I    | 0 ] [   S^T   ]
1199:                       [--------+---][---+-----][---+---------][----------+---] [---------]
1200:                       [ 0      | I ][ 0 |  I  ][ 0 | -J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]

1202:                       (Note that StY_triu_strict is R)
1203:    Byrd, Nocedal, Schnabel 1994

1205: */
1206: static PetscErrorCode MatSolve_LMVMDDFP(Mat H, Vec F, Vec dX)
1207: {
1208:   Mat_LMVM *lmvm = (Mat_LMVM *)H->data;
1209:   Mat_DQN  *ldfp = (Mat_DQN *)lmvm->ctx;
1210:   PetscInt  m    = lmvm->m;
1211:   PetscInt  k    = lmvm->k;
1212:   PetscInt  h    = k - oldest_update(m, k);
1213:   PetscInt  idx, i, j, local_n;
1214:   PetscInt  m_local;
1215:   Mat       J_local;
1216:   Mat       Sfull = lmvm->basis[LMBASIS_S]->vecs;
1217:   Mat       Yfull = lmvm->basis[LMBASIS_Y]->vecs;

1219:   PetscFunctionBegin;
1220:   VecCheckSameSize(F, 2, dX, 3);
1221:   VecCheckMatCompatible(H, dX, 3, F, 2);

1223:   /* Cholesky Version */
1224:   /* Start with the B0 term */
1225:   PetscCall(MatDQNApplyJ0Inv(H, F, dX));
1226:   if (!ldfp->num_updates) PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */

1228:   if (ldfp->use_recursive) {
1229:     PetscDeviceContext dctx;
1230:     PetscMemType       memtype;
1231:     PetscScalar        stf, ytx, ytq, yjtqi, sjtyi, *workscalar;

1233:     PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
1234:     /* Recursive formulation to avoid Cholesky. Not a dense formulation */
1235:     PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, ldfp->rwork1, 0, h));
1236:     ldfp->Yt_count++;

1238:     PetscCall(VecGetLocalSize(ldfp->rwork1, &local_n));

1240:     PetscInt oldest = oldest_update(m, k);

1242:     if (ldfp->needPQ) {
1243:       PetscInt oldest = oldest_update(m, k);
1244:       for (i = oldest; i < k; ++i) {
1245:         idx = recycle_index(m, i);
1246:         /* column_work = S[idx] */
1247:         PetscCall(MatGetColumnVector(Yfull, ldfp->column_work, idx));
1248:         PetscCall(MatDQNApplyJ0Inv(H, ldfp->column_work, ldfp->PQ[idx]));
1249:         PetscCall(MatMultHermitianTransposeColumnRange(Sfull, ldfp->column_work, ldfp->rwork3, 0, h));
1250:         PetscCall(VecGetArrayAndMemType(ldfp->rwork3, &workscalar, &memtype));
1251:         for (j = oldest; j < i; ++j) {
1252:           PetscInt idx_j = recycle_index(m, j);
1253:           /* Copy sjtyi in device-aware manner */
1254:           if (local_n) {
1255:             if (PetscMemTypeHost(memtype)) {
1256:               sjtyi = workscalar[idx_j];
1257:             } else {
1258:               PetscCall(PetscDeviceRegisterMemory(&sjtyi, PETSC_MEMTYPE_HOST, 1 * sizeof(sjtyi)));
1259:               PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1260:               PetscCall(PetscDeviceArrayCopy(dctx, &sjtyi, &workscalar[idx_j], 1));
1261:             }
1262:           }
1263:           PetscCallMPI(MPI_Bcast(&sjtyi, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)H)));
1264:           /* column_work2 = Y[j] */
1265:           PetscCall(MatGetColumnVector(Yfull, ldfp->column_work2, idx_j));
1266:           PetscCall(VecDot(ldfp->PQ[idx], ldfp->column_work2, &yjtqi));
1267:           /* column_work2 = Y[j] */
1268:           PetscCall(MatGetColumnVector(Sfull, ldfp->column_work2, idx_j));
1269:           /* Compute the pure BFGS component of the forward product */
1270:           PetscCall(VecAXPBYPCZ(ldfp->PQ[idx], -yjtqi / ldfp->ytq[idx_j], sjtyi / ldfp->yts[idx_j], 1.0, ldfp->PQ[idx_j], ldfp->column_work2));
1271:         }
1272:         PetscCall(VecDot(ldfp->PQ[idx], ldfp->column_work, &ytq));
1273:         ldfp->ytq[idx] = PetscRealPart(ytq);
1274:       }
1275:       ldfp->needPQ = PETSC_FALSE;
1276:     }

1278:     PetscCall(VecGetArrayAndMemType(ldfp->rwork1, &workscalar, &memtype));
1279:     for (i = oldest; i < k; ++i) {
1280:       idx = recycle_index(m, i);
1281:       /* Copy stz[i], ytx[i] in device-aware manner */
1282:       if (local_n) {
1283:         if (PetscMemTypeHost(memtype)) {
1284:           stf = workscalar[idx];
1285:         } else {
1286:           PetscCall(PetscDeviceRegisterMemory(&stf, PETSC_MEMTYPE_HOST, sizeof(stf)));
1287:           PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1288:           PetscCall(PetscDeviceArrayCopy(dctx, &stf, &workscalar[idx], 1));
1289:         }
1290:       }
1291:       PetscCallMPI(MPI_Bcast(&stf, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)H)));
1292:       /* column_work : S[i], column_work2 : Y[i] */
1293:       PetscCall(MatGetColumnVector(Sfull, ldfp->column_work, idx));
1294:       PetscCall(MatGetColumnVector(Yfull, ldfp->column_work2, idx));
1295:       PetscCall(VecDot(dX, ldfp->column_work2, &ytx));
1296:       PetscCall(VecAXPBYPCZ(dX, -ytx / ldfp->ytq[idx], stf / ldfp->yts[idx], 1.0, ldfp->PQ[idx], ldfp->column_work));
1297:     }
1298:     PetscCall(VecRestoreArrayAndMemType(ldfp->rwork1, &workscalar));
1299:   } else {
1300:     PetscCall(MatLMVMDDFPUpdateSolveData(H));
1301:     PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, ldfp->rwork1, 0, h));
1302:     ldfp->St_count++;
1303:     PetscCall(MatMultHermitianTransposeColumnRange(Yfull, dX, ldfp->rwork2, 0, h));
1304:     ldfp->Yt_count++;
1305:     if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1306:       PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1307:       PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork2, ldfp->num_updates, ldfp->cyclic_work_vec));
1308:     }

1310:     PetscCall(VecPointwiseMult(ldfp->rwork3, ldfp->rwork1, ldfp->inv_diag_vec));
1311:     if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate(ldfp->StY_triu_strict));
1312:     PetscCall(MatMultTransposeAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork2, ldfp->rwork2));
1313:     if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate(ldfp->StY_triu_strict));

1315:     if (!ldfp->rwork2_local) PetscCall(VecCreateLocalVector(ldfp->rwork2, &ldfp->rwork2_local));
1316:     if (!ldfp->rwork3_local) PetscCall(VecCreateLocalVector(ldfp->rwork3, &ldfp->rwork3_local));
1317:     PetscCall(VecGetLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1318:     PetscCall(VecGetLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1319:     PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1320:     PetscCall(VecGetSize(ldfp->rwork2_local, &m_local));
1321:     if (m_local) {
1322:       Mat J_local;

1324:       PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1325:       PetscCall(MatSolve(J_local, ldfp->rwork2_local, ldfp->rwork3_local));
1326:     }
1327:     PetscCall(VecRestoreLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1328:     PetscCall(VecRestoreLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1329:     PetscCall(VecScale(ldfp->rwork3, -1.0));

1331:     PetscCall(MatMultAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork1, ldfp->rwork1));
1332:     PetscCall(VecPointwiseMult(ldfp->rwork1, ldfp->rwork1, ldfp->inv_diag_vec));

1334:     if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1335:       PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1336:       PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork3, ldfp->num_updates, ldfp->cyclic_work_vec));
1337:     }

1339:     PetscCall(MatMultAddColumnRange(Sfull, ldfp->rwork1, dX, dX, 0, h));
1340:     ldfp->S_count++;
1341:     PetscCall(MatMultAddColumnRange(ldfp->HY, ldfp->rwork3, dX, dX, 0, h));
1342:     ldfp->Y_count++;
1343:   }
1344:   PetscFunctionReturn(PETSC_SUCCESS);
1345: }

1347: /* Solves for
1348:    (Theorem 1, Erway, Jain, and Marcia, 2013)

1350:    B_0 - [ Y | B_0 S] [ -R^{-T} (D + S^T B_0 S) R^{-1} | R^{-T} ] [   Y^T   ]
1351:                       ---------------------------------+--------] [---------]
1352:                       [             R^{-1}             |   0    ] [ S^T B_0 ]

1354:    (Note: R above is right triangular part of YTS)
1355:    which becomes,

1357:    [ I | -Y L^{-T} ] [  I  | 0 ] [ B_0 | 0 ] [ I | S ] [      I      ]
1358:                      [-----+---] [-----+---] [---+---] [-------------]
1359:                      [ S^T | I ] [  0  | D ] [ 0 | I ] [ -L^{-1} Y^T ]

1361:    (Note: L above is right triangular part of STY)

1363: */
1364: static PetscErrorCode MatMult_LMVMDDFP(Mat B, Vec X, Vec Z)
1365: {
1366:   Mat_LMVM        *lmvm   = (Mat_LMVM *)B->data;
1367:   Mat_DQN         *ldfp   = (Mat_DQN *)lmvm->ctx;
1368:   Vec              rwork1 = ldfp->rwork1;
1369:   PetscInt         m      = lmvm->m;
1370:   PetscInt         k      = lmvm->k;
1371:   PetscInt         h      = k - oldest_update(m, k);
1372:   Mat              Sfull  = lmvm->basis[LMBASIS_S]->vecs;
1373:   Mat              Yfull  = lmvm->basis[LMBASIS_Y]->vecs;
1374:   PetscObjectState Xstate;

1376:   PetscFunctionBegin;
1377:   VecCheckSameSize(X, 2, Z, 3);
1378:   VecCheckMatCompatible(B, X, 2, Z, 3);

1380:   /* DFP Version. Erway, Jain, Marcia, 2013, Theorem 1 */
1381:   /* Block Version */
1382:   if (!ldfp->num_updates) {
1383:     PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
1384:     PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
1385:   }

1387:   PetscCall(PetscObjectStateGet((PetscObject)X, &Xstate));
1388:   PetscCall(MatMultHermitianTransposeColumnRange(Yfull, X, rwork1, 0, h));

1390:   /* Reordering rwork1, as STY is in history order, while Y is in recycled order */
1391:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1392:   PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_FALSE, ldfp->num_updates, ldfp->strategy));
1393:   PetscCall(VecScale(rwork1, -1.0));
1394:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));

1396:   PetscCall(VecCopy(X, ldfp->column_work));
1397:   PetscCall(MatMultAddColumnRange(Sfull, rwork1, ldfp->column_work, ldfp->column_work, 0, h));
1398:   ldfp->S_count++;

1400:   PetscCall(VecPointwiseMult(rwork1, ldfp->diag_vec_recycle_order, rwork1));
1401:   PetscCall(MatDQNApplyJ0Fwd(B, ldfp->column_work, Z));

1403:   PetscCall(MatMultHermitianTransposeAddColumnRange(Sfull, Z, rwork1, rwork1, 0, h));
1404:   ldfp->St_count++;

1406:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1407:   PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_TRUE, ldfp->num_updates, ldfp->strategy));
1408:   PetscCall(VecScale(rwork1, -1.0));
1409:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));

1411:   PetscCall(MatMultAddColumnRange(Yfull, rwork1, Z, Z, 0, h));
1412:   ldfp->Y_count++;
1413:   PetscFunctionReturn(PETSC_SUCCESS);
1414: }

1416: /*
1417:    This dense representation reduces the L-DFP update to a series of
1418:    matrix-vector products with dense matrices in lieu of the conventional
1419:    matrix-free two-loop algorithm.
1420: */
1421: PetscErrorCode MatCreate_LMVMDDFP(Mat B)
1422: {
1423:   Mat_LMVM *lmvm;
1424:   Mat_DQN  *ldfp;

1426:   PetscFunctionBegin;
1427:   PetscCall(MatCreate_LMVM(B));
1428:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDDFP));
1429:   PetscCall(MatSetOption(B, MAT_HERMITIAN, PETSC_TRUE));
1430:   PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
1431:   PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
1432:   B->ops->view           = MatView_LMVMDQN;
1433:   B->ops->setup          = MatSetUp_LMVMDQN;
1434:   B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1435:   B->ops->destroy        = MatDestroy_LMVMDQN;

1437:   lmvm              = (Mat_LMVM *)B->data;
1438:   lmvm->ops->reset  = MatReset_LMVMDQN;
1439:   lmvm->ops->update = MatUpdate_LMVMDQN;
1440:   lmvm->ops->mult   = MatMult_LMVMDDFP;
1441:   lmvm->ops->solve  = MatSolve_LMVMDDFP;
1442:   lmvm->ops->copy   = MatCopy_LMVMDQN;

1444:   lmvm->ops->multht  = lmvm->ops->mult;
1445:   lmvm->ops->solveht = lmvm->ops->solve;

1447:   PetscCall(PetscNew(&ldfp));
1448:   lmvm->ctx             = (void *)ldfp;
1449:   ldfp->allocated       = PETSC_FALSE;
1450:   ldfp->watchdog        = 0;
1451:   ldfp->max_seq_rejects = lmvm->m / 2;
1452:   ldfp->strategy        = MAT_LMVM_DENSE_INPLACE;
1453:   ldfp->use_recursive   = PETSC_TRUE;
1454:   ldfp->needPQ          = PETSC_TRUE;

1456:   PetscCall(SymBroydenRescaleCreate(&ldfp->rescale));
1457:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatLMVMSymBroydenSetDelta_C", MatLMVMSymBroydenSetDelta_LMVMDQN));
1458:   PetscFunctionReturn(PETSC_SUCCESS);
1459: }

1461: /*@
1462:   MatCreateLMVMDDFP - Creates a dense representation of the limited-memory
1463:   Davidon-Fletcher-Powell (DFP) approximation to a Hessian.

1465:   Collective

1467:   Input Parameters:
1468: + comm - MPI communicator
1469: . n    - number of local rows for storage vectors
1470: - N    - global size of the storage vectors

1472:   Output Parameter:
1473: . B - the matrix

1475:   Level: advanced

1477:   Note:
1478:   It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1479:   paradigm instead of this routine directly.

1481: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDDFP`, `MatCreateLMVMDFP()`
1482: @*/
1483: PetscErrorCode MatCreateLMVMDDFP(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1484: {
1485:   PetscFunctionBegin;
1486:   PetscCall(KSPInitializePackage());
1487:   PetscCall(MatCreate(comm, B));
1488:   PetscCall(MatSetSizes(*B, n, n, N, N));
1489:   PetscCall(MatSetType(*B, MATLMVMDDFP));
1490:   PetscCall(MatSetUp(*B));
1491:   PetscFunctionReturn(PETSC_SUCCESS);
1492: }

1494: /*@
1495:   MatLMVMDenseSetType - Sets the memory storage type for dense `MATLMVM`

1497:   Input Parameters:
1498: + B    - the `MATLMVM` matrix
1499: - type - scale type, see `MatLMVMDenseSetType`

1501:   Options Database Keys:
1502: + -mat_lqn_type   (reorder|inplace) - set the strategy
1503: . -mat_lbfgs_type (reorder|inplace) - set the strategy
1504: - -mat_ldfp_type  (reorder|inplace) - set the strategy

1506:   Level: intermediate

1508:   MatLMVMDenseTypes\:
1509: +   `MAT_LMVM_DENSE_REORDER` - reorders memory to minimize kernel launch
1510: -   `MAT_LMVM_DENSE_INPLACE` - launches kernel inplace to minimize memory movement

1512: .seealso: [](ch_ksp), `MATLMVMDQN`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatLMVMDenseType`
1513: @*/
1514: PetscErrorCode MatLMVMDenseSetType(Mat B, MatLMVMDenseType type)
1515: {
1516:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1517:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

1519:   PetscFunctionBegin;
1521:   lqn->strategy = type;
1522:   PetscFunctionReturn(PETSC_SUCCESS);
1523: }