Sacado Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
rad_fe_adj_fill.cpp
Go to the documentation of this file.
1// $Id$
2// $Source$
3// @HEADER
4// ***********************************************************************
5//
6// Sacado Package
7// Copyright (2006) Sandia Corporation
8//
9// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
10// the U.S. Government retains certain rights in this software.
11//
12// This library is free software; you can redistribute it and/or modify
13// it under the terms of the GNU Lesser General Public License as
14// published by the Free Software Foundation; either version 2.1 of the
15// License, or (at your option) any later version.
16//
17// This library is distributed in the hope that it will be useful, but
18// WITHOUT ANY WARRANTY; without even the implied warranty of
19// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20// Lesser General Public License for more details.
21//
22// You should have received a copy of the GNU Lesser General Public
23// License along with this library; if not, write to the Free Software
24// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25// USA
26// Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
27// (etphipp@sandia.gov).
28//
29// ***********************************************************************
30// @HEADER
31
32#include "Sacado_No_Kokkos.hpp"
33#include "Sacado_tradvec.hpp"
34
35#include "Teuchos_Time.hpp"
36#include "Teuchos_CommandLineProcessor.hpp"
37
38// ADOL-C includes
39#ifdef HAVE_ADOLC
40#ifdef PACKAGE
41#undef PACKAGE
42#endif
43#ifdef PACKAGE_NAME
44#undef PACKAGE_NAME
45#endif
46#ifdef PACKAGE_BUGREPORT
47#undef PACKAGE_BUGREPORT
48#endif
49#ifdef PACKAGE_STRING
50#undef PACKAGE_STRING
51#endif
52#ifdef PACKAGE_TARNAME
53#undef PACKAGE_TARNAME
54#endif
55#ifdef PACKAGE_VERSION
56#undef PACKAGE_VERSION
57#endif
58#ifdef VERSION
59#undef VERSION
60#endif
61#include "adolc/adouble.h"
62#include "adolc/drivers/drivers.h"
63#include "adolc/interfaces.h"
64#include "adolc/taping.h"
65#endif
66
67// A performance test that computes a finite-element-like adjoint using
68// Rad and ADOL-C
69
72
73struct ElemData {
74 static const unsigned int nqp = 2;
75 static const unsigned int nnode = 2;
76 double w[nqp], jac[nqp], phi[nqp][nnode], dphi[nqp][nnode];
77 unsigned int gid[nnode];
78
79 ElemData(double mesh_spacing) {
80 // Quadrature points
81 double xi[nqp];
82 xi[0] = -1.0 / std::sqrt(3.0);
83 xi[1] = 1.0 / std::sqrt(3.0);
84
85 for (unsigned int i=0; i<nqp; i++) {
86 // Weights
87 w[i] = 1.0;
88
89 // Element transformation Jacobian
90 jac[i] = 0.5*mesh_spacing;
91
92 // Shape functions & derivatives
93 phi[i][0] = 0.5*(1.0 - xi[i]);
94 phi[i][1] = 0.5*(1.0 + xi[i]);
95 dphi[i][0] = -0.5;
96 dphi[i][1] = 0.5;
97 }
98 }
99};
100
101template <class FadType>
103 unsigned int neqn,
104 const std::vector<double>& x,
105 const std::vector< std::vector<double> >& w,
106 std::vector<FadType>& x_fad,
107 std::vector< std::vector<double> >& w_local) {
108 for (unsigned int node=0; node<e.nnode; node++)
109 for (unsigned int eqn=0; eqn<neqn; eqn++)
110 x_fad[node*neqn+eqn] = FadType(e.nnode*neqn, node*neqn+eqn,
111 x[e.gid[node]*neqn+eqn]);
112
113 for (unsigned int col=0; col<w.size(); col++)
114 for (unsigned int node=0; node<e.nnode; node++)
115 for (unsigned int eqn=0; eqn<neqn; eqn++)
116 w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
117}
118
120 unsigned int neqn,
121 const std::vector<double>& x,
122 const std::vector< std::vector<double> >& w,
123 std::vector<RadType>& x_rad,
124 std::vector< std::vector<double> >& w_local) {
125 for (unsigned int node=0; node<e.nnode; node++)
126 for (unsigned int eqn=0; eqn<neqn; eqn++)
127 x_rad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
128
129 for (unsigned int col=0; col<w.size(); col++)
130 for (unsigned int node=0; node<e.nnode; node++)
131 for (unsigned int eqn=0; eqn<neqn; eqn++)
132 w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
133}
134
136 unsigned int neqn,
137 const std::vector<double>& x,
138 const std::vector< std::vector<double> >& w,
139 std::vector<VRadType>& x_rad,
140 double** w_local) {
141 for (unsigned int node=0; node<e.nnode; node++)
142 for (unsigned int eqn=0; eqn<neqn; eqn++)
143 x_rad[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
144
145 for (unsigned int col=0; col<w.size(); col++)
146 for (unsigned int node=0; node<e.nnode; node++)
147 for (unsigned int eqn=0; eqn<neqn; eqn++)
148 w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
149}
150
151#ifdef HAVE_ADOLC
152void adolc_init_fill(bool retape,
153 int tag,
154 const ElemData& e,
155 unsigned int neqn,
156 const std::vector<double>& x,
157 const std::vector< std::vector<double> >& w,
158 std::vector<double>& x_local,
159 std::vector<adouble>& x_ad,
160 double** w_local) {
161 if (retape)
162 trace_on(tag, 1);
163 for (unsigned int node=0; node<e.nnode; node++)
164 for (unsigned int eqn=0; eqn<neqn; eqn++) {
165 x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
166 if (retape)
167 x_ad[node*neqn+eqn] <<= x_local[node*neqn+eqn];
168 }
169
170 for (unsigned int col=0; col<w.size(); col++)
171 for (unsigned int node=0; node<e.nnode; node++)
172 for (unsigned int eqn=0; eqn<neqn; eqn++)
173 w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
174}
175#endif
176
178 unsigned int neqn,
179 const std::vector<double>& x,
180 const std::vector< std::vector<double> >& w,
181 std::vector<double>& x_local,
182 std::vector< std::vector<double> >& w_local) {
183 for (unsigned int node=0; node<e.nnode; node++)
184 for (unsigned int eqn=0; eqn<neqn; eqn++)
185 x_local[node*neqn+eqn] = x[e.gid[node]*neqn+eqn];
186
187 for (unsigned int node=0; node<e.nnode; node++)
188 for (unsigned int eqn=0; eqn<neqn; eqn++)
189 for (unsigned int col=0; col<w.size(); col++)
190 w_local[col][node*neqn+eqn] = w[col][e.gid[node]*neqn+eqn];
191}
192
193template <class T>
195 unsigned int neqn,
196 const std::vector<T>& x,
197 std::vector<T>& u,
198 std::vector<T>& du,
199 std::vector<T>& f) {
200 // Construct element solution, derivative
201 for (unsigned int qp=0; qp<e.nqp; qp++) {
202 for (unsigned int eqn=0; eqn<neqn; eqn++) {
203 u[qp*neqn+eqn] = 0.0;
204 du[qp*neqn+eqn] = 0.0;
205 for (unsigned int node=0; node<e.nnode; node++) {
206 u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
207 du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
208 }
209 }
210 }
211
212 // Compute sum of equations for coupling
213 std::vector<T> s(e.nqp*neqn);
214 for (unsigned int qp=0; qp<e.nqp; qp++) {
215 for (unsigned int eqn=0; eqn<neqn; eqn++) {
216 s[qp*neqn+eqn] = 0.0;
217 for (unsigned int j=0; j<neqn; j++) {
218 if (j != eqn)
219 s[qp*neqn+eqn] += u[qp*neqn+j];
220 }
221 }
222 }
223
224 // Evaluate element residual
225 for (unsigned int node=0; node<e.nnode; node++) {
226 for (unsigned int eqn=0; eqn<neqn; eqn++) {
227 unsigned int row = node*neqn+eqn;
228 f[row] = 0.0;
229 for (unsigned int qp=0; qp<e.nqp; qp++) {
230 f[row] +=
231 e.w[qp]*e.jac[qp]*(-(1.0/(e.jac[qp]*e.jac[qp]))*
232 du[qp*neqn+eqn]*e.dphi[qp][node] +
233 e.phi[qp][node]*(exp(u[qp*neqn+eqn]) +
234 u[qp*neqn+eqn]*s[qp*neqn+eqn]));
235 }
236 }
237 }
238}
239
241 unsigned int neqn,
242 const std::vector<double>& x,
243 std::vector< std::vector<double> >& w,
244 std::vector<double>& u,
245 std::vector<double>& du,
246 std::vector<double>& f,
247 std::vector< std::vector<double> >& adj) {
248 // Construct element solution, derivative
249 for (unsigned int qp=0; qp<e.nqp; qp++) {
250 for (unsigned int eqn=0; eqn<neqn; eqn++) {
251 u[qp*neqn+eqn] = 0.0;
252 du[qp*neqn+eqn] = 0.0;
253 for (unsigned int node=0; node<e.nnode; node++) {
254 u[qp*neqn+eqn] += x[node*neqn+eqn] * e.phi[qp][node];
255 du[qp*neqn+eqn] += x[node*neqn+eqn] * e.dphi[qp][node];
256 }
257 }
258 }
259
260 // Compute sum of equations for coupling
261 std::vector<double> s(e.nqp*neqn);
262 for (unsigned int qp=0; qp<e.nqp; qp++) {
263 for (unsigned int eqn=0; eqn<neqn; eqn++) {
264 s[qp*neqn+eqn] = 0.0;
265 for (unsigned int j=0; j<neqn; j++) {
266 if (j != eqn)
267 s[qp*neqn+eqn] += u[qp*neqn+j];
268 }
269 }
270 }
271
272 for (unsigned int col=0; col<w.size(); col++)
273 for (unsigned int node=0; node<e.nnode; node++)
274 for (unsigned int eqn=0; eqn<neqn; eqn++)
275 adj[col][node*neqn+eqn] = 0.0;
276
277 // Evaluate element residual
278 for (unsigned int node=0; node<e.nnode; node++) {
279 for (unsigned int eqn=0; eqn<neqn; eqn++) {
280 unsigned int row = node*neqn+eqn;
281 f[row] = 0.0;
282 for (unsigned int qp=0; qp<e.nqp; qp++) {
283 double c1 = e.w[qp]*e.jac[qp];
284 double c2 = -(1.0/(e.jac[qp]*e.jac[qp]))*e.dphi[qp][node];
285 double c3 = e.phi[qp][node]*(exp(u[qp*neqn+eqn]));
286 double c4 = e.phi[qp][node]*u[qp*neqn+eqn];
287 double c5 = e.phi[qp][node]*s[qp*neqn+eqn];
288 double c35 = c3+c5;
289 double c14 = c1*c4;
290 f[row] += c1*(c2*du[qp*neqn+eqn] + c3 + c4*s[qp*neqn+eqn]);
291 for (unsigned int col=0; col<w.size(); col++) {
292 for (unsigned int node_col=0; node_col<e.nnode; node_col++) {
293 adj[col][node_col*neqn+eqn] +=
294 c1*(c2*e.dphi[qp][node_col] + c35*e.phi[qp][node_col])*w[col][row];
295 for (unsigned int eqn_col=0; eqn_col<neqn; eqn_col++) {
296 if (eqn_col != eqn)
297 adj[col][node_col*neqn+eqn_col] += c14*e.phi[qp][node_col]*w[col][row];
298 }
299 }
300 }
301 }
302 }
303 }
304}
305
306template <class FadType>
308 unsigned int neqn,
309 const std::vector< std::vector<double> >& w_local,
310 const std::vector<FadType>& f_fad,
311 std::vector<double>& f,
312 std::vector< std::vector<double> >& adj) {
313 // Get residual
314 for (unsigned int node=0; node<e.nnode; node++)
315 for (unsigned int eqn=0; eqn<neqn; eqn++)
316 f[e.gid[node]*neqn+eqn] += f_fad[node*neqn+eqn].val();
317
318 // Get adjoint for each adjoint direction
319 for (unsigned int col=0; col<w_local.size(); col++) {
320 FadType z = 0.0;
321 for (unsigned int node=0; node<e.nnode; node++)
322 for (unsigned int eqn=0; eqn<neqn; eqn++)
323 z += f_fad[node*neqn+eqn]*w_local[col][node*neqn+eqn];
324
325 for (unsigned int node=0; node<e.nnode; node++)
326 for (unsigned int eqn=0; eqn<neqn; eqn++)
327 adj[col][e.gid[node]*neqn+eqn] += z.fastAccessDx(node*neqn+eqn);
328 }
329}
330
332 unsigned int neqn,
333 std::vector< std::vector<double> >& w_local,
334 std::vector<RadType>& x_rad,
335 std::vector<RadType>& f_rad,
336 std::vector<RadType*>& ff_rad,
337 std::vector<double>& f,
338 std::vector< std::vector<double> >& adj) {
339 // Get residual
340 for (unsigned int node=0; node<e.nnode; node++)
341 for (unsigned int eqn=0; eqn<neqn; eqn++)
342 f[e.gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
343
344 // Get adjoint for each adjoint direction
345 for (unsigned int col=0; col<w_local.size(); col++) {
347 &ff_rad[0],
348 &w_local[col][0]);
349 for (unsigned int node=0; node<e.nnode; node++)
350 for (unsigned int eqn=0; eqn<neqn; eqn++)
351 adj[col][e.gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj();
352 }
353}
354
356 unsigned int neqn,
357 unsigned int ncol,
358 std::vector<std::size_t>& vn,
359 double** w_local,
360 std::vector<VRadType>& x_rad,
361 std::vector<VRadType>& f_rad,
362 VRadType*** vf_rad,
363 std::vector<double>& f,
364 std::vector< std::vector<double> >& adj) {
365 // Get residual
366 for (unsigned int node=0; node<e.nnode; node++)
367 for (unsigned int eqn=0; eqn<neqn; eqn++)
368 f[e.gid[node]*neqn+eqn] += f_rad[node*neqn+eqn].val();
369
370 // Get adjoint for each adjoint direction
372 &vn[0],
373 vf_rad,
374 w_local);
375 for (unsigned int col=0; col<ncol; col++)
376 for (unsigned int node=0; node<e.nnode; node++)
377 for (unsigned int eqn=0; eqn<neqn; eqn++)
378 adj[col][e.gid[node]*neqn+eqn] += x_rad[node*neqn+eqn].adj(col);
379}
380
381#ifdef HAVE_ADOLC
382void adolc_process_fill(bool retape,
383 int tag,
384 const ElemData& e,
385 unsigned int neqn,
386 unsigned int ncol,
387 std::vector<double>& x_local,
388 double **w_local,
389 std::vector<adouble>& f_ad,
390 std::vector<double>& f_local,
391 std::vector<double>& f,
392 double **adj_local,
393 std::vector< std::vector<double> >& adj) {
394 if (retape) {
395 for (unsigned int node=0; node<e.nnode; node++)
396 for (unsigned int eqn=0; eqn<neqn; eqn++)
397 f_ad[node*neqn+eqn] >>= f_local[node*neqn+eqn];
398 trace_off();
399 }
400 else
401 zos_forward(tag, neqn*e.nnode, neqn*e.nnode, 1, &x_local[0], &f_local[0]);
402
403 fov_reverse(tag, neqn*e.nnode, neqn*e.nnode, ncol, w_local, adj_local);
404
405 for (unsigned int node=0; node<e.nnode; node++)
406 for (unsigned int eqn=0; eqn<neqn; eqn++)
407 f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
408
409 for (unsigned int col=0; col<ncol; col++)
410 for (unsigned int node=0; node<e.nnode; node++)
411 for (unsigned int eqn=0; eqn<neqn; eqn++)
412 adj[col][e.gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
413}
414#endif
415
417 unsigned int neqn,
418 const std::vector<double>& f_local,
419 const std::vector< std::vector<double> >& adj_local,
420 std::vector<double>& f,
421 std::vector< std::vector<double> >& adj) {
422 for (unsigned int node=0; node<e.nnode; node++)
423 for (unsigned int eqn=0; eqn<neqn; eqn++)
424 f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
425
426 for (unsigned int col=0; col<adj.size(); col++)
427 for (unsigned int node=0; node<e.nnode; node++)
428 for (unsigned int eqn=0; eqn<neqn; eqn++)
429 adj[col][e.gid[node]*neqn+eqn] += adj_local[col][node*neqn+eqn];
430}
431
433 unsigned int neqn,
434 const std::vector<double>& f_local,
435 std::vector<double>& f) {
436 for (unsigned int node=0; node<e.nnode; node++)
437 for (unsigned int eqn=0; eqn<neqn; eqn++)
438 f[e.gid[node]*neqn+eqn] += f_local[node*neqn+eqn];
439}
440
441template <typename FadType>
442double fad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
443 unsigned int num_cols, double mesh_spacing) {
444 ElemData e(mesh_spacing);
445
446 // Solution vector, residual, adjoint
447 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
448 std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
449 std::vector< std::vector<double> > adj(num_cols);
450 for (unsigned int node=0; node<num_nodes; node++)
451 for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
452 x[node*num_eqns+eqn] =
453 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
454 f[node*num_eqns+eqn] = 0.0;
455 }
456
457 for (unsigned int col=0; col<num_cols; col++) {
458 w[col] = std::vector<double>(num_nodes*num_eqns);
459 w_local[col] = std::vector<double>(e.nnode*num_eqns);
460 adj[col] = std::vector<double>(num_nodes*num_eqns);
461 for (unsigned int node=0; node<num_nodes; node++)
462 for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
463 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
464 adj[col][node*num_eqns+eqn] = 0.0;
465 }
466 }
467
468 Teuchos::Time timer("FE Fad Adjoint Fill", false);
469 timer.start(true);
470 std::vector<FadType> x_fad(e.nnode*num_eqns), f_fad(e.nnode*num_eqns);
471 std::vector<FadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
472 for (unsigned int i=0; i<num_nodes-1; i++) {
473 e.gid[0] = i;
474 e.gid[1] = i+1;
475
476 fad_init_fill(e, num_eqns, x, w, x_fad, w_local);
477 template_element_fill(e, num_eqns, x_fad, u, du, f_fad);
478 fad_process_fill(e, num_eqns, w_local, f_fad, f, adj);
479 }
480 timer.stop();
481
482 // std::cout << "Fad Residual = " << std::endl;
483 // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
484 // std::cout << "\t" << f[i] << std::endl;
485
486 // std::cout.precision(8);
487 // std::cout << "Fad Adjoint = " << std::endl;
488 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
489 // std::cout << "\t";
490 // for (unsigned int j=0; j<num_cols; j++)
491 // std::cout << adj[j][i] << "\t";
492 // std::cout << std::endl;
493 // }
494
495 return timer.totalElapsedTime();
496}
497
498double rad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
499 unsigned int num_cols, double mesh_spacing) {
500 ElemData e(mesh_spacing);
501
502 // Solution vector, residual, adjoint
503 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
504 std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
505 std::vector< std::vector<double> > adj(num_cols);
506 for (unsigned int node=0; node<num_nodes; node++)
507 for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
508 x[node*num_eqns+eqn] =
509 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
510 f[node*num_eqns+eqn] = 0.0;
511 }
512
513 for (unsigned int col=0; col<num_cols; col++) {
514 w[col] = std::vector<double>(num_nodes*num_eqns);
515 w_local[col] = std::vector<double>(e.nnode*num_eqns);
516 adj[col] = std::vector<double>(num_nodes*num_eqns);
517 for (unsigned int node=0; node<num_nodes; node++)
518 for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
519 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
520 adj[col][node*num_eqns+eqn] = 0.0;
521 }
522 }
523
524 Teuchos::Time timer("FE Rad Adjoint Fill", false);
525 timer.start(true);
526 std::vector<RadType> x_rad(e.nnode*num_eqns), f_rad(e.nnode*num_eqns);
527 std::vector<RadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
528 std::vector<RadType*> ff_rad(f_rad.size());
529 for (unsigned int i=0; i<f_rad.size(); i++)
530 ff_rad[i] = &f_rad[i];
531 for (unsigned int i=0; i<num_nodes-1; i++) {
532 e.gid[0] = i;
533 e.gid[1] = i+1;
534
535 rad_init_fill(e, num_eqns, x, w, x_rad, w_local);
536 template_element_fill(e, num_eqns, x_rad, u, du, f_rad);
537 rad_process_fill(e, num_eqns, w_local, x_rad, f_rad, ff_rad, f, adj);
538 }
539 timer.stop();
540
541 // std::cout << "Rad Residual = " << std::endl;
542 // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
543 // std::cout << "\t" << f[i] << std::endl;
544
545 // std::cout.precision(8);
546 // std::cout << "Rad Adjoint = " << std::endl;
547 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
548 // std::cout << "\t";
549 // for (unsigned int j=0; j<num_cols; j++)
550 // std::cout << adj[j][i] << "\t";
551 // std::cout << std::endl;
552 // }
553
554 return timer.totalElapsedTime();
555}
556
557double vrad_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
558 unsigned int num_cols, double mesh_spacing) {
559 ElemData e(mesh_spacing);
560
561 // Solution vector, residual, adjoint
562 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
563 std::vector< std::vector<double> > w(num_cols);
564 double **w_local = new double*[num_cols];
565 std::vector< std::vector<double> > adj(num_cols);
566 for (unsigned int node=0; node<num_nodes; node++)
567 for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
568 x[node*num_eqns+eqn] =
569 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
570 f[node*num_eqns+eqn] = 0.0;
571 }
572
573 for (unsigned int col=0; col<num_cols; col++) {
574 w[col] = std::vector<double>(num_nodes*num_eqns);
575 w_local[col] = new double[e.nnode*num_eqns];
576 adj[col] = std::vector<double>(num_nodes*num_eqns);
577 for (unsigned int node=0; node<num_nodes; node++)
578 for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
579 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
580 adj[col][node*num_eqns+eqn] = 0.0;
581 }
582 }
583
584 Teuchos::Time timer("FE Vector Rad Adjoint Fill", false);
585 timer.start(true);
586 std::vector<VRadType> x_rad(e.nnode*num_eqns), f_rad(e.nnode*num_eqns);
587 std::vector<VRadType> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
588 VRadType ***vf_rad = new VRadType**[num_cols];
589 std::vector<std::size_t> vn(num_cols);
590 for (unsigned int i=0; i<num_cols; i++) {
591 vf_rad[i] = new VRadType*[num_eqns*e.nnode];
592 vn[i] = num_eqns*e.nnode;
593 for (unsigned int j=0; j<num_eqns*e.nnode; j++)
594 vf_rad[i][j] = &f_rad[j];
595 }
596 for (unsigned int i=0; i<num_nodes-1; i++) {
597 e.gid[0] = i;
598 e.gid[1] = i+1;
599
600 vrad_init_fill(e, num_eqns, x, w, x_rad, w_local);
601 template_element_fill(e, num_eqns, x_rad, u, du, f_rad);
602 vrad_process_fill(e, num_eqns, num_cols, vn, w_local, x_rad, f_rad, vf_rad,
603 f, adj);
604 }
605 for (unsigned int col=0; col<num_cols; col++) {
606 delete [] w_local[col];
607 delete [] vf_rad[col];
608 }
609 delete [] w_local;
610 delete [] vf_rad;
611 timer.stop();
612
613 // std::cout << "Vector Rad Residual = " << std::endl;
614 // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
615 // std::cout << "\t" << f[i] << std::endl;
616
617 // std::cout.precision(8);
618 // std::cout << "Vector Rad Adjoint = " << std::endl;
619 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
620 // std::cout << "\t";
621 // for (unsigned int j=0; j<num_cols; j++)
622 // std::cout << adj[j][i] << "\t";
623 // std::cout << std::endl;
624 // }
625
626 return timer.totalElapsedTime();
627}
628
629#ifdef HAVE_ADOLC
630double adolc_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
631 unsigned int num_cols, double mesh_spacing) {
632 ElemData e(mesh_spacing);
633
634 // Solution vector, residual, adjoint
635 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
636 std::vector< std::vector<double> > w(num_cols);
637 std::vector< std::vector<double> > adj(num_cols);
638 for (unsigned int node=0; node<num_nodes; node++)
639 for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
640 x[node*num_eqns+eqn] =
641 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
642 f[node*num_eqns+eqn] = 0.0;
643 }
644
645 for (unsigned int col=0; col<num_cols; col++) {
646 w[col] = std::vector<double>(num_nodes*num_eqns);
647 adj[col] = std::vector<double>(num_nodes*num_eqns);
648 for (unsigned int node=0; node<num_nodes; node++)
649 for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
650 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
651 adj[col][node*num_eqns+eqn] = 0.0;
652 }
653 }
654
655 Teuchos::Time timer("FE ADOL-C Adjoint Fill", false);
656 timer.start(true);
657 std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
658 std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
659 std::vector<double> x_local(e.nnode*num_eqns);
660 std::vector<double> f_local(e.nnode*num_eqns);
661 double **adj_local = new double*[num_cols];
662 double **w_local = new double*[num_cols];
663 for (unsigned int i=0; i<num_cols; i++) {
664 adj_local[i] = new double[e.nnode*num_eqns];
665 w_local[i] = new double[e.nnode*num_eqns];
666 }
667
668 // Tape first element
669 e.gid[0] = 0;
670 e.gid[1] = 1;
671 adolc_init_fill(true, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
672 template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
673 adolc_process_fill(true, 0, e, num_eqns, num_cols, x_local, w_local, f_ad,
674 f_local, f, adj_local, adj);
675
676 // Now do remaining fills reusing tape
677 for (unsigned int i=1; i<num_nodes-1; i++) {
678 e.gid[0] = i;
679 e.gid[1] = i+1;
680
681 adolc_init_fill(false, 0, e, num_eqns, x, w, x_local, x_ad, w_local);
682 adolc_process_fill(false, 0, e, num_eqns, num_cols, x_local, w_local, f_ad,
683 f_local, f, adj_local, adj);
684 }
685 for (unsigned int i=0; i<num_cols; i++) {
686 delete [] adj_local[i];
687 delete [] w_local[i];
688 }
689 delete [] adj_local;
690 delete [] w_local;
691 timer.stop();
692
693 // std::cout << "ADOL-C Residual = " << std::endl;
694 // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
695 // std::cout << "\t" << f[i] << std::endl;
696
697 // std::cout.precision(8);
698 // std::cout << "ADOL-C Adjoint = " << std::endl;
699 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
700 // std::cout << "\t";
701 // for (unsigned int j=0; j<num_cols; j++)
702 // std::cout << adj[j][i] << "\t";
703 // std::cout << std::endl;
704 // }
705
706 return timer.totalElapsedTime();
707}
708
709double adolc_retape_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
710 unsigned int num_cols, double mesh_spacing) {
711 ElemData e(mesh_spacing);
712
713 // Solution vector, residual, adjoint
714 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
715 std::vector< std::vector<double> > w(num_cols);
716 std::vector< std::vector<double> > adj(num_cols);
717 for (unsigned int node=0; node<num_nodes; node++)
718 for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
719 x[node*num_eqns+eqn] =
720 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
721 f[node*num_eqns+eqn] = 0.0;
722 }
723
724 for (unsigned int col=0; col<num_cols; col++) {
725 w[col] = std::vector<double>(num_nodes*num_eqns);
726 adj[col] = std::vector<double>(num_nodes*num_eqns);
727 for (unsigned int node=0; node<num_nodes; node++)
728 for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
729 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
730 adj[col][node*num_eqns+eqn] = 0.0;
731 }
732 }
733
734 Teuchos::Time timer("FE ADOL-C Retape Adjoint Fill", false);
735 timer.start(true);
736 std::vector<adouble> x_ad(e.nnode*num_eqns), f_ad(e.nnode*num_eqns);
737 std::vector<adouble> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
738 std::vector<double> x_local(e.nnode*num_eqns);
739 std::vector<double> f_local(e.nnode*num_eqns);
740 double **adj_local = new double*[num_cols];
741 double **w_local = new double*[num_cols];
742 for (unsigned int i=0; i<num_cols; i++) {
743 adj_local[i] = new double[e.nnode*num_eqns];
744 w_local[i] = new double[e.nnode*num_eqns];
745 }
746
747 for (unsigned int i=0; i<num_nodes-1; i++) {
748 e.gid[0] = i;
749 e.gid[1] = i+1;
750
751 adolc_init_fill(true, 1, e, num_eqns, x, w, x_local, x_ad, w_local);
752 template_element_fill(e, num_eqns, x_ad, u, du, f_ad);
753 adolc_process_fill(true, 1, e, num_eqns, num_cols, x_local, w_local, f_ad,
754 f_local, f, adj_local, adj);
755 }
756 for (unsigned int i=0; i<num_cols; i++) {
757 delete [] adj_local[i];
758 delete [] w_local[i];
759 }
760 delete [] adj_local;
761 delete [] w_local;
762 timer.stop();
763
764 // std::cout << "ADOL-C Residual (retaped) = " << std::endl;
765 // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
766 // std::cout << "\t" << f[i] << std::endl;
767
768 // std::cout.precision(8);
769 // std::cout << "ADOL-C Adjoint (retaped) = " << std::endl;
770 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
771 // std::cout << "\t";
772 // for (unsigned int j=0; j<num_cols; j++)
773 // std::cout << adj[j][i] << "\t";
774 // std::cout << std::endl;
775 // }
776
777 return timer.totalElapsedTime();
778}
779#endif
780
781double analytic_adj_fill(unsigned int num_nodes, unsigned int num_eqns,
782 unsigned int num_cols, double mesh_spacing) {
783 ElemData e(mesh_spacing);
784
785 // Solution vector, residual, adjoint
786 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
787 std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
788 std::vector< std::vector<double> > adj(num_cols);
789 for (unsigned int node=0; node<num_nodes; node++)
790 for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
791 x[node*num_eqns+eqn] =
792 (mesh_spacing*node - 0.5)*(mesh_spacing*node - 0.5);
793 f[node*num_eqns+eqn] = 0.0;
794 }
795
796 for (unsigned int col=0; col<num_cols; col++) {
797 w[col] = std::vector<double>(num_nodes*num_eqns);
798 w_local[col] = std::vector<double>(e.nnode*num_eqns);
799 adj[col] = std::vector<double>(num_nodes*num_eqns);
800 for (unsigned int node=0; node<num_nodes; node++)
801 for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
802 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
803 adj[col][node*num_eqns+eqn] = 0.0;
804 }
805 }
806
807 Teuchos::Time timer("FE Analytic Adjoint Fill", false);
808 timer.start(true);
809 std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
810 std::vector< std::vector<double> > adj_local(num_cols);
811 std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
812 for (unsigned int i=0; i<num_cols; i++)
813 adj_local[i] = std::vector<double>(e.nnode*num_eqns);
814 for (unsigned int i=0; i<num_nodes-1; i++) {
815 e.gid[0] = i;
816 e.gid[1] = i+1;
817
818 analytic_init_fill(e, num_eqns, x, w, x_local, w_local);
819 analytic_element_fill(e, num_eqns, x_local, w_local, u, du, f_local,
820 adj_local);
821 analytic_process_fill(e, num_eqns, f_local, adj_local, f, adj);
822 }
823 timer.stop();
824
825 // std::cout.precision(8);
826 // std::cout << "Analytic Residual = " << std::endl;
827 // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
828 // std::cout << "\t" << f[i] << std::endl;
829
830 // std::cout.precision(8);
831 // std::cout << "Analytic Adjoint = " << std::endl;
832 // for (unsigned int i=0; i<num_nodes*num_eqns; i++) {
833 // std::cout << "\t";
834 // for (unsigned int j=0; j<num_cols; j++)
835 // std::cout << adj[j][i] << "\t";
836 // std::cout << std::endl;
837 // }
838
839 return timer.totalElapsedTime();
840}
841
842double residual_fill(unsigned int num_nodes, unsigned int num_eqns,
843 unsigned int num_cols, double mesh_spacing) {
844 ElemData e(mesh_spacing);
845
846 // Solution vector, residual, jacobian
847 std::vector<double> x(num_nodes*num_eqns), f(num_nodes*num_eqns);
848 std::vector< std::vector<double> > w(num_cols), w_local(num_cols);
849 for (unsigned int node_row=0; node_row<num_nodes; node_row++) {
850 for (unsigned int eqn_row=0; eqn_row<num_eqns; eqn_row++) {
851 unsigned int row = node_row*num_eqns + eqn_row;
852 x[row] = (mesh_spacing*node_row - 0.5)*(mesh_spacing*node_row - 0.5);
853 f[row] = 0.0;
854 }
855 }
856
857 for (unsigned int col=0; col<num_cols; col++) {
858 w[col] = std::vector<double>(num_nodes*num_eqns);
859 w_local[col] = std::vector<double>(e.nnode*num_eqns);
860 for (unsigned int node=0; node<num_nodes; node++)
861 for (unsigned int eqn=0; eqn<num_eqns; eqn++) {
862 w[col][node*num_eqns+eqn] = x[node*num_eqns+eqn];
863 }
864 }
865
866 Teuchos::Time timer("FE Residual Fill", false);
867 timer.start(true);
868 std::vector<double> x_local(e.nnode*num_eqns), f_local(e.nnode*num_eqns);
869 std::vector<double> u(e.nqp*num_eqns), du(e.nqp*num_eqns);
870 for (unsigned int i=0; i<num_nodes-1; i++) {
871 e.gid[0] = i;
872 e.gid[1] = i+1;
873
874 analytic_init_fill(e, num_eqns, x, w, x_local, w_local);
875 template_element_fill(e, num_eqns, x_local, u, du, f_local);
876 residual_process_fill(e, num_eqns, f_local, f);
877 }
878 timer.stop();
879
880 // std::cout.precision(8);
881 // std::cout << "Analytic Residual = " << std::endl;
882 // for (unsigned int i=0; i<num_nodes*num_eqns; i++)
883 // std::cout << "\t" << f[i] << std::endl;
884
885 return timer.totalElapsedTime();
886}
887
888int main(int argc, char* argv[]) {
889 int ierr = 0;
890
891 try {
892 double t, ta, tr;
893 int p = 2;
894 int w = p+7;
895
896 // Set up command line options
897 Teuchos::CommandLineProcessor clp;
898 clp.setDocString("This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
899 int num_nodes = 100000;
900 int num_eqns = 2;
901 int num_cols = 4;
902 int rt = 0;
903 clp.setOption("n", &num_nodes, "Number of nodes");
904 clp.setOption("p", &num_eqns, "Number of equations");
905 clp.setOption("q", &num_cols, "Number of adjoint directions");
906 clp.setOption("rt", &rt, "Include ADOL-C retaping test");
907
908 // Parse options
909 Teuchos::CommandLineProcessor::EParseCommandLineReturn
910 parseReturn= clp.parse(argc, argv);
911 if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
912 return 1;
913
914 double mesh_spacing = 1.0 / static_cast<double>(num_nodes - 1);
915
916 std::cout.setf(std::ios::scientific);
917 std::cout.precision(p);
918 std::cout << "num_nodes = " << num_nodes
919 << ", num_eqns = " << num_eqns
920 << ", num_cols = " << num_cols << ": " << std::endl
921 << " " << " Time" << "\t"<< "Time/Analytic" << "\t"
922 << "Time/Residual" << std::endl;
923
924 ta = 1.0;
925 tr = 1.0;
926
927 tr = residual_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
928
929 ta = analytic_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
930 std::cout << "Analytic: " << std::setw(w) << ta << "\t" << std::setw(w) << ta/ta << "\t" << std::setw(w) << ta/tr << std::endl;
931
932#ifdef HAVE_ADOLC
933 t = adolc_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
934 std::cout << "ADOL-C: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
935
936 if (rt != 0) {
937 t = adolc_retape_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
938 std::cout << "ADOL-C(rt):" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
939 }
940#endif
941
942 t = fad_adj_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
943 std::cout << "DFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
944
945 t = fad_adj_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, num_cols, mesh_spacing);
946 std::cout << "ELRDFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
947
948 t = rad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
949 std::cout << "Rad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
950
951 t = vrad_adj_fill(num_nodes, num_eqns, num_cols, mesh_spacing);
952 std::cout << "Vector Rad:" << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/tr << std::endl;
953
954 }
955 catch (std::exception& e) {
956 std::cout << e.what() << std::endl;
957 ierr = 1;
958 }
959 catch (const char *s) {
960 std::cout << s << std::endl;
961 ierr = 1;
962 }
963 catch (...) {
964 std::cout << "Caught unknown exception!" << std::endl;
965 ierr = 1;
966 }
967
968 return ierr;
969}
exp(expr.val())
int main()
Sacado::Fad::DFad< double > FadType
static void Weighted_GradcompVec(size_t, size_t *, ADVar ***, Double **)
static void Weighted_Gradcomp(size_t, ADVar **, Double *)
Uncopyable z
const char * p
void analytic_element_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, std::vector< std::vector< double > > &w, std::vector< double > &u, std::vector< double > &du, std::vector< double > &f, std::vector< std::vector< double > > &adj)
double residual_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void rad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< RadType > &x_rad, std::vector< std::vector< double > > &w_local)
void fad_process_fill(const ElemData &e, unsigned int neqn, const std::vector< std::vector< double > > &w_local, const std::vector< FadType > &f_fad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
double fad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
Sacado::RadVec::ADvar< double > VRadType
double rad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
double analytic_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
double vrad_adj_fill(unsigned int num_nodes, unsigned int num_eqns, unsigned int num_cols, double mesh_spacing)
void fad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< FadType > &x_fad, std::vector< std::vector< double > > &w_local)
void vrad_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< VRadType > &x_rad, double **w_local)
void analytic_init_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &x, const std::vector< std::vector< double > > &w, std::vector< double > &x_local, std::vector< std::vector< double > > &w_local)
void template_element_fill(const ElemData &e, unsigned int neqn, const std::vector< T > &x, std::vector< T > &u, std::vector< T > &du, std::vector< T > &f)
void rad_process_fill(const ElemData &e, unsigned int neqn, std::vector< std::vector< double > > &w_local, std::vector< RadType > &x_rad, std::vector< RadType > &f_rad, std::vector< RadType * > &ff_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
void vrad_process_fill(const ElemData &e, unsigned int neqn, unsigned int ncol, std::vector< std::size_t > &vn, double **w_local, std::vector< VRadType > &x_rad, std::vector< VRadType > &f_rad, VRadType ***vf_rad, std::vector< double > &f, std::vector< std::vector< double > > &adj)
void residual_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, std::vector< double > &f)
Sacado::Rad::ADvar< double > RadType
void analytic_process_fill(const ElemData &e, unsigned int neqn, const std::vector< double > &f_local, const std::vector< std::vector< double > > &adj_local, std::vector< double > &f, std::vector< std::vector< double > > &adj)
InactiveDouble * jac
InactiveDouble * w
ElemData(double mesh_spacing)
InactiveDouble ** phi
InactiveDouble ** dphi