Sacado Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
FadBLASUnitTests.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Sacado Package
5// Copyright (2006) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// This library is free software; you can redistribute it and/or modify
11// it under the terms of the GNU Lesser General Public License as
12// published by the Free Software Foundation; either version 2.1 of the
13// License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23// USA
24// Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25// (etphipp@sandia.gov).
26//
27// ***********************************************************************
28// @HEADER
29
30#ifndef FADBLASUNITTESTS_HPP
31#define FADBLASUNITTESTS_HPP
32
33// Sacado includes
34#include "Sacado_No_Kokkos.hpp"
35#include "Sacado_Fad_BLAS.hpp"
36#include "Sacado_Random.hpp"
37
38// gtest includes
39#include <gtest/gtest.h>
40
41#include "GTestUtils.hpp"
42
43#define COMPARE_FAD_VECTORS(X1, X2, n) \
44 ASSERT_TRUE(X1.size() == std::size_t(n)); \
45 ASSERT_TRUE(X2.size() == std::size_t(n)); \
46 for (unsigned int i=0; i<n; i++) { \
47 COMPARE_FADS(X1[i], X2[i]); \
48 } \
49 ;
50
51// A class for testing differentiated BLAS operations for general Fad types
52template <class FadType>
54protected:
57
58 // Random number generator
60
61 // Real random number generator for derivative components
63
64 // Number of matrix rows
65 unsigned int m_;
66
67 // Number of matrix columns
68 unsigned int n_;
69
70 // Number of matrix columns for level 3 blas
71 unsigned int l_;
72
73 // Number of derivative components
74 unsigned int ndot_;
75
76 // Tolerances to which fad objects should be the same
77 double tol_a, tol_r;
78
80
82 urand(), real_urand(), m_(5), n_(6), l_(4), ndot_(7),
83 tol_a(1.0e-11), tol_r(1.0e-11) {}
84
85}; // class FadBLASUnitTests
86
88
89// Tests all arguments
91 typedef decltype(this->fad) FadType;
92 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
94 auto m = this->m_;
95 auto ndot = this->ndot_;
96
97 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
98 for (unsigned int i=0; i<m; i++) {
99 ScalarType val = this->urand.number();
100 x1[i] = FadType(ndot, val);
101 x2[i] = FadType(ndot, val);
102 x3[i] = FadType(ndot, val);
103 for (unsigned int k=0; k<ndot; k++) {
104 val = this->urand.number();
105 x1[i].fastAccessDx(k) = val;
106 x2[i].fastAccessDx(k) = val;
107 x3[i].fastAccessDx(k) = val;
108 }
109 }
110 FadType alpha(ndot, this->urand.number());
111 for (unsigned int k=0; k<ndot; k++) {
112 alpha.fastAccessDx(k) = this->urand.number();
113 }
114
115 Teuchos::BLAS<int,FadType> teuchos_blas;
116 teuchos_blas.SCAL(m, alpha, &x1[0], 1);
117
118 Teuchos::BLAS<int,FadType> sacado_blas(false);
119 sacado_blas.SCAL(m, alpha, &x2[0], 1);
120
121 COMPARE_FAD_VECTORS(x1, x2, m);
122
123 unsigned int sz = m*(1+ndot);
124 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
125 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
126
127 COMPARE_FAD_VECTORS(x1, x3, m);
128}
129
130// Tests non-unit inc
132 typedef decltype(this->fad) FadType;
133 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
135 auto m = this->m_;
136 auto ndot = this->ndot_;
137
138 unsigned int incx = 2;
139 VectorType x1(m*incx,ndot), x2(m*incx,ndot), x3(m*incx,ndot);
140 for (unsigned int i=0; i<m*incx; i++) {
141 ScalarType val = this->urand.number();
142 x1[i] = FadType(ndot, val);
143 x2[i] = FadType(ndot, val);
144 x3[i] = FadType(ndot, val);
145 for (unsigned int k=0; k<ndot; k++) {
146 val = this->urand.number();
147 x1[i].fastAccessDx(k) = val;
148 x2[i].fastAccessDx(k) = val;
149 x3[i].fastAccessDx(k) = val;
150 }
151 }
152 FadType alpha(ndot, this->urand.number());
153 for (unsigned int k=0; k<ndot; k++) {
154 alpha.fastAccessDx(k) = this->urand.number();
155 }
156
157 Teuchos::BLAS<int,FadType> teuchos_blas;
158 teuchos_blas.SCAL(m, alpha, &x1[0], incx);
159
160 Teuchos::BLAS<int,FadType> sacado_blas(false);
161 sacado_blas.SCAL(m, alpha, &x2[0], incx);
162
163 COMPARE_FAD_VECTORS(x1, x2, m*incx);
164
165 unsigned int sz = m*(1+ndot);
166 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
167 sacado_blas2.SCAL(m, alpha, &x3[0], incx);
168
169 COMPARE_FAD_VECTORS(x1, x3, m*incx);
170}
171
172// Tests constant alpha
174 typedef decltype(this->fad) FadType;
175 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
177 auto m = this->m_;
178 auto ndot = this->ndot_;
179
180 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
181 for (unsigned int i=0; i<m; i++) {
182 ScalarType val = this->urand.number();
183 x1[i] = FadType(ndot, val);
184 x2[i] = FadType(ndot, val);
185 x3[i] = FadType(ndot, val);
186 for (unsigned int k=0; k<ndot; k++) {
187 val = this->urand.number();
188 x1[i].fastAccessDx(k) = val;
189 x2[i].fastAccessDx(k) = val;
190 x3[i].fastAccessDx(k) = val;
191 }
192 }
193 ScalarType alpha = this->urand.number();
194
195 Teuchos::BLAS<int,FadType> teuchos_blas;
196 teuchos_blas.SCAL(m, alpha, &x1[0], 1);
197
198 Teuchos::BLAS<int,FadType> sacado_blas(false);
199 sacado_blas.SCAL(m, alpha, &x2[0], 1);
200
201 COMPARE_FAD_VECTORS(x1, x2, m);
202
203 unsigned int sz = m*(1+ndot);
204 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
205 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
206
207 COMPARE_FAD_VECTORS(x1, x3, m);
208}
209
210// Tests constant x
212 typedef decltype(this->fad) FadType;
213 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
215 auto m = this->m_;
216 auto ndot = this->ndot_;
217
218 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
219 for (unsigned int i=0; i<m; i++) {
220 ScalarType val = this->urand.number();
221 x1[i] = val;
222 x2[i] = val;
223 x3[i] = val;
224 }
225 FadType alpha = FadType(ndot, this->urand.number());
226 for (unsigned int k=0; k<ndot; k++)
227 alpha.fastAccessDx(k) = this->urand.number();
228
229 Teuchos::BLAS<int,FadType> teuchos_blas;
230 teuchos_blas.SCAL(m, alpha, &x1[0], 1);
231
232 Teuchos::BLAS<int,FadType> sacado_blas(false);
233 sacado_blas.SCAL(m, alpha, &x2[0], 1);
234
235 COMPARE_FAD_VECTORS(x1, x2, m);
236
237 unsigned int sz = m*(1+ndot);
238 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
239 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
240
241 COMPARE_FAD_VECTORS(x1, x3, m);
242}
243
244// Tests all arguments
246 typedef decltype(this->fad) FadType;
247 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
249 auto m = this->m_;
250 auto ndot = this->ndot_;
251
252 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
253 for (unsigned int i=0; i<m; i++) {
254 x[i] = FadType(ndot, this->urand.number());
255 ScalarType val = this->urand.number();
256 y1[i] = FadType(ndot, val);
257 y2[i] = FadType(ndot, val);
258 y3[i] = FadType(ndot, val);
259 for (unsigned int k=0; k<ndot; k++) {
260 x[i].fastAccessDx(k) = this->urand.number();
261 val = this->urand.number();
262 y1[i].fastAccessDx(k) = val;
263 y2[i].fastAccessDx(k) = val;
264 y3[i].fastAccessDx(k) = val;
265 }
266 }
267
268 Teuchos::BLAS<int,FadType> teuchos_blas;
269 teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
270
271 Teuchos::BLAS<int,FadType> sacado_blas(false);
272 sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
273
274 COMPARE_FAD_VECTORS(y1, y2, m);
275
276 unsigned int sz = 2*m*(1+ndot);
277 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
278 sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
279
280 COMPARE_FAD_VECTORS(y1, y3, m);
281}
282
283// Tests non unit inc
285 typedef decltype(this->fad) FadType;
286 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
288 auto m = this->m_;
289 auto ndot = this->ndot_;
290
291 unsigned int incx = 2;
292 unsigned int incy = 3;
293 VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
294 for (unsigned int i=0; i<m*incx; i++) {
295 x[i] = FadType(ndot, this->urand.number());
296 for (unsigned int k=0; k<ndot; k++) {
297 x[i].fastAccessDx(k) = this->urand.number();
298 }
299 }
300 for (unsigned int i=0; i<m*incy; i++) {
301 ScalarType val = this->urand.number();
302 y1[i] = FadType(ndot, val);
303 y2[i] = FadType(ndot, val);
304 y3[i] = FadType(ndot, val);
305 for (unsigned int k=0; k<ndot; k++) {
306 val = this->urand.number();
307 y1[i].fastAccessDx(k) = val;
308 y2[i].fastAccessDx(k) = val;
309 y3[i].fastAccessDx(k) = val;
310 }
311 }
312
313 Teuchos::BLAS<int,FadType> teuchos_blas;
314 teuchos_blas.COPY(m, &x[0], incx, &y1[0], incy);
315
316 Teuchos::BLAS<int,FadType> sacado_blas(false);
317 sacado_blas.COPY(m, &x[0], incx, &y2[0], incy);
318
319 COMPARE_FAD_VECTORS(y1, y2, m*incy);
320
321 unsigned int sz = 2*m*(1+ndot);
322 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
323 sacado_blas2.COPY(m, &x[0], incx, &y3[0], incy);
324
325 COMPARE_FAD_VECTORS(y1, y3, m*incy);
326}
327
328// Tests x constant
330 typedef decltype(this->fad) FadType;
331 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
333 auto m = this->m_;
334 auto ndot = this->ndot_;
335
336 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
337 for (unsigned int i=0; i<m; i++) {
338 x[i] = this->urand.number();
339 }
340 for (unsigned int i=0; i<m; i++) {
341 ScalarType val = this->urand.number();
342 y1[i] = FadType(ndot, val);
343 y2[i] = FadType(ndot, val);
344 y3[i] = FadType(ndot, val);
345 for (unsigned int k=0; k<ndot; k++) {
346 val = this->urand.number();
347 y1[i].fastAccessDx(k) = val;
348 y2[i].fastAccessDx(k) = val;
349 y3[i].fastAccessDx(k) = val;
350 }
351 }
352
353 Teuchos::BLAS<int,FadType> teuchos_blas;
354 teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
355
356 Teuchos::BLAS<int,FadType> sacado_blas(false);
357 sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
358
359 COMPARE_FAD_VECTORS(y1, y2, m);
360
361 unsigned int sz = 2*m*(1+ndot);
362 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
363 sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
364
365 COMPARE_FAD_VECTORS(y1, y3, m);
366}
367
368// Tests y constant
370 typedef decltype(this->fad) FadType;
371 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
373 auto m = this->m_;
374 auto ndot = this->ndot_;
375
376 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
377 for (unsigned int i=0; i<m; i++) {
378 x[i] = FadType(ndot, this->urand.number());
379 ScalarType val = this->urand.number();
380 y1[i] = val;
381 y2[i] = val;
382 y3[i] = val;
383 for (unsigned int k=0; k<ndot; k++) {
384 x[i].fastAccessDx(k) = this->urand.number();
385 }
386 }
387
388 Teuchos::BLAS<int,FadType> teuchos_blas;
389 teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
390
391 Teuchos::BLAS<int,FadType> sacado_blas(false);
392 sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
393
394 COMPARE_FAD_VECTORS(y1, y2, m);
395
396 unsigned int sz = 2*m*(1+ndot);
397 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
398 sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
399
400 COMPARE_FAD_VECTORS(y1, y3, m);
401}
402
403// Tests all arguments
405 typedef decltype(this->fad) FadType;
406 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
408 auto m = this->m_;
409 auto ndot = this->ndot_;
410
411 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
412 for (unsigned int i=0; i<m; i++) {
413 x[i] = FadType(ndot, this->urand.number());
414 ScalarType val = this->urand.number();
415 y1[i] = FadType(ndot, val);
416 y2[i] = FadType(ndot, val);
417 y3[i] = FadType(ndot, val);
418 for (unsigned int k=0; k<ndot; k++) {
419 x[i].fastAccessDx(k) = this->urand.number();
420 val = this->urand.number();
421 y1[i].fastAccessDx(k) = val;
422 y2[i].fastAccessDx(k) = val;
423 y3[i].fastAccessDx(k) = val;
424 }
425 }
426 FadType alpha(ndot, this->urand.number());
427 for (unsigned int k=0; k<ndot; k++)
428 alpha.fastAccessDx(k) = this->urand.number();
429
430 Teuchos::BLAS<int,FadType> teuchos_blas;
431 teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
432
433 Teuchos::BLAS<int,FadType> sacado_blas(false);
434 sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
435
436 COMPARE_FAD_VECTORS(y1, y2, m);
437
438 unsigned int sz = 2*m*(1+ndot);
439 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
440 sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
441
442 COMPARE_FAD_VECTORS(y1, y3, m);
443}
444
445// Tests non unit inc
447 typedef decltype(this->fad) FadType;
448 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
450 auto m = this->m_;
451 auto ndot = this->ndot_;
452
453 unsigned int incx = 2;
454 unsigned int incy = 3;
455 VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
456 for (unsigned int i=0; i<m*incx; i++) {
457 x[i] = FadType(ndot, this->urand.number());
458 for (unsigned int k=0; k<ndot; k++) {
459 x[i].fastAccessDx(k) = this->urand.number();
460 }
461 }
462 for (unsigned int i=0; i<m*incy; i++) {
463 ScalarType val = this->urand.number();
464 y1[i] = FadType(ndot, val);
465 y2[i] = FadType(ndot, val);
466 y3[i] = FadType(ndot, val);
467 for (unsigned int k=0; k<ndot; k++) {
468 val = this->urand.number();
469 y1[i].fastAccessDx(k) = val;
470 y2[i].fastAccessDx(k) = val;
471 y3[i].fastAccessDx(k) = val;
472 }
473 }
474 FadType alpha(ndot, this->urand.number());
475 for (unsigned int k=0; k<ndot; k++)
476 alpha.fastAccessDx(k) = this->urand.number();
477
478 Teuchos::BLAS<int,FadType> teuchos_blas;
479 teuchos_blas.AXPY(m, alpha, &x[0], incx, &y1[0], incy);
480
481 Teuchos::BLAS<int,FadType> sacado_blas(false);
482 sacado_blas.AXPY(m, alpha, &x[0], incx, &y2[0], incy);
483
484 COMPARE_FAD_VECTORS(y1, y2, m*incy);
485
486 unsigned int sz = 2*m*(1+ndot);
487 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
488 sacado_blas2.AXPY(m, alpha, &x[0], incx, &y3[0], incy);
489
490 COMPARE_FAD_VECTORS(y1, y3, m*incy);
491}
492
493// Tests x constant
495 typedef decltype(this->fad) FadType;
496 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
498 auto m = this->m_;
499 auto ndot = this->ndot_;
500
501 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot), y4(m,ndot);
502 std::vector<ScalarType> xx(m);
503 for (unsigned int i=0; i<m; i++) {
504 xx[i] = this->urand.number();
505 x[i] = xx[i];
506 ScalarType val = this->urand.number();
507 y1[i] = FadType(ndot, val);
508 y2[i] = FadType(ndot, val);
509 y3[i] = FadType(ndot, val);
510 y4[i] = FadType(ndot, val);
511 for (unsigned int k=0; k<ndot; k++) {
512 val = this->urand.number();
513 y1[i].fastAccessDx(k) = val;
514 y2[i].fastAccessDx(k) = val;
515 y3[i].fastAccessDx(k) = val;
516 y4[i].fastAccessDx(k) = val;
517 }
518 }
519 FadType alpha(ndot, this->urand.number());
520 for (unsigned int k=0; k<ndot; k++)
521 alpha.fastAccessDx(k) = this->urand.number();
522
523 Teuchos::BLAS<int,FadType> teuchos_blas;
524 teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
525
526 Teuchos::BLAS<int,FadType> sacado_blas(false);
527 sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
528
529 COMPARE_FAD_VECTORS(y1, y2, m);
530
531 unsigned int sz = m*(1+ndot)+m;
532 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
533 sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
534
535 COMPARE_FAD_VECTORS(y1, y3, m);
536
537 sacado_blas.AXPY(m, alpha, &xx[0], 1, &y4[0], 1);
538
539 COMPARE_FAD_VECTORS(y1, y4, m);
540}
541
542// Tests y constant
544 typedef decltype(this->fad) FadType;
545 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
547 auto m = this->m_;
548 auto ndot = this->ndot_;
549
550 VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
551 for (unsigned int i=0; i<m; i++) {
552 x[i] = FadType(ndot, this->urand.number());
553 ScalarType val = this->urand.number();
554 y1[i] = val;
555 y2[i] = val;
556 y3[i] = val;
557 for (unsigned int k=0; k<ndot; k++) {
558 x[i].fastAccessDx(k) = this->urand.number();
559 }
560 }
561 FadType alpha(ndot, this->urand.number());
562 for (unsigned int k=0; k<ndot; k++)
563 alpha.fastAccessDx(k) = this->urand.number();
564
565 Teuchos::BLAS<int,FadType> teuchos_blas;
566 teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
567
568 Teuchos::BLAS<int,FadType> sacado_blas(false);
569 sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
570
571 COMPARE_FAD_VECTORS(y1, y2, m);
572
573 unsigned int sz = 2*m*(1+ndot);
574 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
575 sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
576
577 COMPARE_FAD_VECTORS(y1, y3, m);
578}
579
580// Tests all arguments
582 typedef decltype(this->fad) FadType;
584 auto m = this->m_;
585 auto ndot = this->ndot_;
586
587 VectorType X(m,ndot), Y(m,ndot);
588 for (unsigned int i=0; i<m; i++) {
589 X[i] = FadType(ndot, this->real_urand.number());
590 Y[i] = FadType(ndot, this->real_urand.number());
591 for (unsigned int k=0; k<ndot; k++) {
592 X[i].fastAccessDx(k) = this->real_urand.number();
593 Y[i].fastAccessDx(k) = this->real_urand.number();
594 }
595 }
596
597 Teuchos::BLAS<int,FadType> teuchos_blas;
598 FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
599
600 Teuchos::BLAS<int,FadType> sacado_blas(false);
601 FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
602
603 COMPARE_FADS(z1, z2);
604
605 unsigned int sz = 2*m*(1+ndot);
606 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
607 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
608
609 COMPARE_FADS(z1, z3);
610}
611
612// Tests non-unit inc
614 typedef decltype(this->fad) FadType;
616 auto m = this->m_;
617 auto ndot = this->ndot_;
618
619 unsigned int incx = 2;
620 unsigned int incy = 3;
621 VectorType X(m*incx,ndot), Y(m*incy,ndot);
622 for (unsigned int i=0; i<m*incx; i++) {
623 X[i] = FadType(ndot, this->real_urand.number());
624 for (unsigned int k=0; k<ndot; k++) {
625 X[i].fastAccessDx(k) = this->real_urand.number();
626 }
627 }
628 for (unsigned int i=0; i<m*incy; i++) {
629 Y[i] = FadType(ndot, this->real_urand.number());
630 for (unsigned int k=0; k<ndot; k++) {
631 Y[i].fastAccessDx(k) = this->real_urand.number();
632 }
633 }
634
635 Teuchos::BLAS<int,FadType> teuchos_blas;
636 FadType z1 = teuchos_blas.DOT(m, &X[0], incx, &Y[0], incy);
637
638 Teuchos::BLAS<int,FadType> sacado_blas(false);
639 FadType z2 = sacado_blas.DOT(m, &X[0], incx, &Y[0], incy);
640
641 COMPARE_FADS(z1, z2);
642
643 unsigned int sz = 2*m*(1+ndot);
644 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
645 FadType z3 = sacado_blas2.DOT(m, &X[0], incx, &Y[0], incy);
646
647 COMPARE_FADS(z1, z3);
648}
649
650// Tests X constant
652 typedef decltype(this->fad) FadType;
653 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
655 auto m = this->m_;
656 auto ndot = this->ndot_;
657
658 VectorType X(m,0), Y(m,ndot);
659 std::vector<ScalarType> x(m);
660 for (unsigned int i=0; i<m; i++) {
661 x[i] = this->urand.number();
662 X[i] = x[i];
663 Y[i] = FadType(ndot, this->real_urand.number());
664 for (unsigned int k=0; k<ndot; k++) {
665 Y[i].fastAccessDx(k) = this->real_urand.number();
666 }
667 }
668
669 Teuchos::BLAS<int,FadType> teuchos_blas;
670 FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
671
672 Teuchos::BLAS<int,FadType> sacado_blas(false);
673 FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
674
675 COMPARE_FADS(z1, z2);
676
677 unsigned int sz = 2*m*(1+ndot);
678 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
679 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
680
681 COMPARE_FADS(z1, z3);
682
683 FadType z4 = sacado_blas.DOT(m, &x[0], 1, &Y[0], 1);
684
685 COMPARE_FADS(z1, z4);
686}
687
688// Tests Y constant
690 typedef decltype(this->fad) FadType;
691 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
693 auto m = this->m_;
694 auto ndot = this->ndot_;
695
696 VectorType X(m,ndot), Y(m,0);
697 std::vector<ScalarType> y(m);
698 for (unsigned int i=0; i<m; i++) {
699 X[i] = FadType(ndot, this->real_urand.number());
700 y[i] = this->urand.number();
701 Y[i] = y[i];
702 for (unsigned int k=0; k<ndot; k++) {
703 X[i].fastAccessDx(k) = this->real_urand.number();
704 }
705 }
706
707 Teuchos::BLAS<int,FadType> teuchos_blas;
708 FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
709
710 Teuchos::BLAS<int,FadType> sacado_blas(false);
711 FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
712
713 COMPARE_FADS(z1, z2);
714
715 unsigned int sz = 2*m*(1+ndot);
716 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
717 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
718
719 COMPARE_FADS(z1, z3);
720
721 FadType z4 = sacado_blas.DOT(m, &X[0], 1, &y[0], 1);
722
723 COMPARE_FADS(z1, z4);
724}
725
726// Tests all arguments
728 typedef decltype(this->fad) FadType;
730 auto m = this->m_;
731 auto ndot = this->ndot_;
732
733 VectorType X(m,ndot);
734 for (unsigned int i=0; i<m; i++) {
735 X[i] = FadType(ndot, this->real_urand.number());
736 for (unsigned int k=0; k<ndot; k++) {
737 X[i].fastAccessDx(k) = this->real_urand.number();
738 }
739 }
740
741 Teuchos::BLAS<int,FadType> teuchos_blas;
742 typename Teuchos::ScalarTraits<FadType>::magnitudeType z1 =
743 teuchos_blas.NRM2(m, &X[0], 1);
744
745 Teuchos::BLAS<int,FadType> sacado_blas(false);
746 typename Teuchos::ScalarTraits<FadType>::magnitudeType z2 =
747 sacado_blas.NRM2(m, &X[0], 1);
748
749 COMPARE_FADS(z1, z2);
750
751 unsigned int sz = m*(1+ndot);
752 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
753 typename Teuchos::ScalarTraits<FadType>::magnitudeType z3 =
754 sacado_blas2.NRM2(m, &X[0], 1);
755
756 COMPARE_FADS(z1, z3);
757}
758
759// Tests non-unit inc
761 typedef decltype(this->fad) FadType;
763 auto m = this->m_;
764 auto ndot = this->ndot_;
765
766 unsigned int incx = 2;
767 VectorType X(m*incx,ndot);
768 for (unsigned int i=0; i<m*incx; i++) {
769 X[i] = FadType(ndot, this->real_urand.number());
770 for (unsigned int k=0; k<ndot; k++) {
771 X[i].fastAccessDx(k) = this->real_urand.number();
772 }
773 }
774
775 Teuchos::BLAS<int,FadType> teuchos_blas;
776 typename Teuchos::ScalarTraits<FadType>::magnitudeType z1 =
777 teuchos_blas.NRM2(m, &X[0], incx);
778
779 Teuchos::BLAS<int,FadType> sacado_blas(false);
780 typename Teuchos::ScalarTraits<FadType>::magnitudeType z2 =
781 sacado_blas.NRM2(m, &X[0], incx);
782
783 COMPARE_FADS(z1, z2);
784
785 unsigned int sz = m*(1+ndot);
786 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
787 typename Teuchos::ScalarTraits<FadType>::magnitudeType z3 =
788 sacado_blas2.NRM2(m, &X[0], incx);
789
790 COMPARE_FADS(z1, z3);
791}
792
793// Tests all arguments
795 typedef decltype(this->fad) FadType;
796 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
798 auto m = this->m_;
799 auto n = this->n_;
800 auto ndot = this->ndot_;
801
802 VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
803 for (unsigned int j=0; j<n; j++) {
804 for (unsigned int i=0; i<m; i++) {
805 A[i+j*m] = FadType(ndot, this->urand.number());
806 for (unsigned int k=0; k<ndot; k++)
807 A[i+j*m].fastAccessDx(k) = this->urand.number();
808 }
809 B[j] = FadType(ndot, this->urand.number());
810 for (unsigned int k=0; k<ndot; k++)
811 B[j].fastAccessDx(k) = this->urand.number();
812 }
813 FadType alpha(ndot, this->urand.number());
814 FadType beta(ndot, this->urand.number());
815 for (unsigned int k=0; k<ndot; k++) {
816 alpha.fastAccessDx(k) = this->urand.number();
817 beta.fastAccessDx(k) = this->urand.number();
818 }
819
820 for (unsigned int i=0; i<m; i++) {
821 ScalarType val = this->urand.number();
822 C1[i] = FadType(ndot, val);
823 C2[i] = FadType(ndot, val);
824 C3[i] = FadType(ndot, val);
825 for (unsigned int k=0; k<ndot; k++) {
826 val = this->urand.number();
827 C1[i].fastAccessDx(k) = val;
828 C2[i].fastAccessDx(k) = val;
829 C3[i].fastAccessDx(k) = val;
830 }
831 }
832
833 Teuchos::BLAS<int,FadType> teuchos_blas;
834 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
835 beta, &C1[0], 1);
836
837 Teuchos::BLAS<int,FadType> sacado_blas(false);
838 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
839 beta, &C2[0], 1);
840
841 COMPARE_FAD_VECTORS(C1, C2, m);
842
843 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
844 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
845 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
846 beta, &C3[0], 1);
847
848 COMPARE_FAD_VECTORS(C1, C3, m);
849}
850
851// Tests non-unit inc and different lda
853 typedef decltype(this->fad) FadType;
854 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
856 auto m = this->m_;
857 auto n = this->n_;
858 auto ndot = this->ndot_;
859
860 unsigned int lda = m+3;
861 unsigned int incb = 2;
862 unsigned int incc = 3;
863 VectorType A(lda*n,ndot), B(n*incb,ndot), C1(m*incc,ndot), C2(m*incc,ndot),
864 C3(m*incc,ndot);
865 for (unsigned int j=0; j<n; j++) {
866 for (unsigned int i=0; i<lda; i++) {
867 A[i+j*lda] = FadType(ndot, this->urand.number());
868 for (unsigned int k=0; k<ndot; k++)
869 A[i+j*lda].fastAccessDx(k) = this->urand.number();
870 }
871 }
872 for (unsigned int j=0; j<n*incb; j++) {
873 B[j] = FadType(ndot, this->urand.number());
874 for (unsigned int k=0; k<ndot; k++)
875 B[j].fastAccessDx(k) = this->urand.number();
876 }
877 FadType alpha(ndot, this->urand.number());
878 FadType beta(ndot, this->urand.number());
879 for (unsigned int k=0; k<ndot; k++) {
880 alpha.fastAccessDx(k) = this->urand.number();
881 beta.fastAccessDx(k) = this->urand.number();
882 }
883
884 for (unsigned int i=0; i<m*incc; i++) {
885 ScalarType val = this->urand.number();
886 C1[i] = FadType(ndot, val);
887 C2[i] = FadType(ndot, val);
888 C3[i] = FadType(ndot, val);
889 for (unsigned int k=0; k<ndot; k++) {
890 val = this->urand.number();
891 C1[i].fastAccessDx(k) = val;
892 C2[i].fastAccessDx(k) = val;
893 C3[i].fastAccessDx(k) = val;
894 }
895 }
896
897 Teuchos::BLAS<int,FadType> teuchos_blas;
898 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
899 beta, &C1[0], incc);
900
901 Teuchos::BLAS<int,FadType> sacado_blas(false);
902 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
903 beta, &C2[0], incc);
904
905 COMPARE_FAD_VECTORS(C1, C2, m*incc);
906
907 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
908 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
909 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
910 beta, &C3[0], incc);
911
912 COMPARE_FAD_VECTORS(C1, C3, m*incc);
913}
914
915// Tests transpose with all arguments
917 typedef decltype(this->fad) FadType;
918 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
920 auto m = this->m_;
921 auto n = this->n_;
922 auto ndot = this->ndot_;
923
924 VectorType A(m*n,ndot), B(m,ndot), C1(n,ndot), C2(n,ndot), C3(n,ndot);
925 for (unsigned int j=0; j<n; j++) {
926 for (unsigned int i=0; i<m; i++) {
927 A[i+j*m] = FadType(ndot, this->urand.number());
928 for (unsigned int k=0; k<ndot; k++)
929 A[i+j*m].fastAccessDx(k) = this->urand.number();
930 }
931 }
932 for (unsigned int j=0; j<m; j++) {
933 B[j] = FadType(ndot, this->urand.number());
934 for (unsigned int k=0; k<ndot; k++)
935 B[j].fastAccessDx(k) = this->urand.number();
936 }
937 FadType alpha(ndot, this->urand.number());
938 FadType beta(ndot, this->urand.number());
939 for (unsigned int k=0; k<ndot; k++) {
940 alpha.fastAccessDx(k) = this->urand.number();
941 beta.fastAccessDx(k) = this->urand.number();
942 }
943
944 for (unsigned int i=0; i<n; i++) {
945 ScalarType val = this->urand.number();
946 C1[i] = FadType(ndot, val);
947 C2[i] = FadType(ndot, val);
948 C3[i] = FadType(ndot, val);
949 for (unsigned int k=0; k<ndot; k++) {
950 val = this->urand.number();
951 C1[i].fastAccessDx(k) = val;
952 C2[i].fastAccessDx(k) = val;
953 C3[i].fastAccessDx(k) = val;
954 }
955 }
956
957 Teuchos::BLAS<int,FadType> teuchos_blas;
958 teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
959 beta, &C1[0], 1);
960
961 Teuchos::BLAS<int,FadType> sacado_blas(false);
962 sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
963 beta, &C2[0], 1);
964
965 COMPARE_FAD_VECTORS(C1, C2, n);
966
967 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
968 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
969 sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1,
970 beta, &C3[0], 1);
971
972 COMPARE_FAD_VECTORS(C1, C3, n);
973}
974
975// Tests transpose with non-unit inc and different lda
977 typedef decltype(this->fad) FadType;
978 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
980 auto m = this->m_;
981 auto n = this->n_;
982 auto ndot = this->ndot_;
983
984 unsigned int lda = m+3;
985 unsigned int incb = 2;
986 unsigned int incc = 3;
987 VectorType A(lda*n,ndot), B(m*incb,ndot), C1(n*incc,ndot), C2(n*incc,ndot),
988 C3(n*incc,ndot);
989 for (unsigned int j=0; j<n; j++) {
990 for (unsigned int i=0; i<lda; i++) {
991 A[i+j*lda] = FadType(ndot, this->urand.number());
992 for (unsigned int k=0; k<ndot; k++)
993 A[i+j*lda].fastAccessDx(k) = this->urand.number();
994 }
995 }
996 for (unsigned int j=0; j<m*incb; j++) {
997 B[j] = FadType(ndot, this->urand.number());
998 for (unsigned int k=0; k<ndot; k++)
999 B[j].fastAccessDx(k) = this->urand.number();
1000 }
1001 FadType alpha(ndot, this->urand.number());
1002 FadType beta(ndot, this->urand.number());
1003 for (unsigned int k=0; k<ndot; k++) {
1004 alpha.fastAccessDx(k) = this->urand.number();
1005 beta.fastAccessDx(k) = this->urand.number();
1006 }
1007
1008 for (unsigned int i=0; i<n*incc; i++) {
1009 ScalarType val = this->urand.number();
1010 C1[i] = FadType(ndot, val);
1011 C2[i] = FadType(ndot, val);
1012 C3[i] = FadType(ndot, val);
1013 for (unsigned int k=0; k<ndot; k++) {
1014 val = this->urand.number();
1015 C1[i].fastAccessDx(k) = val;
1016 C2[i].fastAccessDx(k) = val;
1017 C3[i].fastAccessDx(k) = val;
1018 }
1019 }
1020
1021 Teuchos::BLAS<int,FadType> teuchos_blas;
1022 teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1023 beta, &C1[0], incc);
1024
1025 Teuchos::BLAS<int,FadType> sacado_blas(false);
1026 sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1027 beta, &C2[0], incc);
1028
1029 COMPARE_FAD_VECTORS(C1, C2, n*incc);
1030
1031 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1032 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1033 sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb,
1034 beta, &C3[0], incc);
1035
1036 COMPARE_FAD_VECTORS(C1, C3, n*incc);
1037}
1038
1039// Tests with constant C
1041 typedef decltype(this->fad) FadType;
1042 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1044 auto m = this->m_;
1045 auto n = this->n_;
1046 auto ndot = this->ndot_;
1047
1048 VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1049 for (unsigned int j=0; j<n; j++) {
1050 for (unsigned int i=0; i<m; i++) {
1051 A[i+j*m] = FadType(ndot, this->urand.number());
1052 for (unsigned int k=0; k<ndot; k++)
1053 A[i+j*m].fastAccessDx(k) = this->urand.number();
1054 }
1055 B[j] = FadType(ndot, this->urand.number());
1056 for (unsigned int k=0; k<ndot; k++)
1057 B[j].fastAccessDx(k) = this->urand.number();
1058 }
1059 FadType alpha(ndot, this->urand.number());
1060 FadType beta(ndot, this->urand.number());
1061 for (unsigned int k=0; k<ndot; k++) {
1062 alpha.fastAccessDx(k) = this->urand.number();
1063 beta.fastAccessDx(k) = this->urand.number();
1064 }
1065
1066 for (unsigned int i=0; i<m; i++) {
1067 ScalarType val = this->urand.number();
1068 C1[i] = val;
1069 C2[i] = val;
1070 C3[i] = val;
1071 }
1072
1073 Teuchos::BLAS<int,FadType> teuchos_blas;
1074 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1075 beta, &C1[0], 1);
1076
1077 Teuchos::BLAS<int,FadType> sacado_blas(false);
1078 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1079 beta, &C2[0], 1);
1080
1081 COMPARE_FAD_VECTORS(C1, C2, m);
1082
1083 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1084 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1085 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1086 beta, &C3[0], 1);
1087
1088 COMPARE_FAD_VECTORS(C1, C3, m);
1089}
1090
1091// Tests with constant alpha, beta
1093 typedef decltype(this->fad) FadType;
1094 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1096 auto m = this->m_;
1097 auto n = this->n_;
1098 auto ndot = this->ndot_;
1099
1100 VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1101 for (unsigned int j=0; j<n; j++) {
1102 for (unsigned int i=0; i<m; i++) {
1103 A[i+j*m] = FadType(ndot, this->urand.number());
1104 for (unsigned int k=0; k<ndot; k++)
1105 A[i+j*m].fastAccessDx(k) = this->urand.number();
1106 }
1107 B[j] = FadType(ndot, this->urand.number());
1108 for (unsigned int k=0; k<ndot; k++)
1109 B[j].fastAccessDx(k) = this->urand.number();
1110 }
1111 ScalarType alpha = this->urand.number();
1112 ScalarType beta = this->urand.number();
1113
1114 for (unsigned int i=0; i<m; i++) {
1115 ScalarType val = this->urand.number();
1116 C1[i] = FadType(ndot, val);
1117 C2[i] = FadType(ndot, val);
1118 C3[i] = FadType(ndot, val);
1119 for (unsigned int k=0; k<ndot; k++) {
1120 val = this->urand.number();
1121 C1[i].fastAccessDx(k) = val;
1122 C2[i].fastAccessDx(k) = val;
1123 C3[i].fastAccessDx(k) = val;
1124 }
1125 }
1126
1127 Teuchos::BLAS<int,FadType> teuchos_blas;
1128 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1129 beta, &C1[0], 1);
1130
1131 Teuchos::BLAS<int,FadType> sacado_blas(false);
1132 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1133 beta, &C2[0], 1);
1134
1135 COMPARE_FAD_VECTORS(C1, C2, m);
1136
1137 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1138 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1139 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1140 beta, &C3[0], 1);
1141
1142 COMPARE_FAD_VECTORS(C1, C3, m);
1143}
1144
1145// Tests wth constant B
1147 typedef decltype(this->fad) FadType;
1148 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1150 auto m = this->m_;
1151 auto n = this->n_;
1152 auto ndot = this->ndot_;
1153
1154 VectorType A(m*n,ndot), B(n,0), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1155 C4(m,ndot);
1156 std::vector<ScalarType> b(n);
1157 for (unsigned int j=0; j<n; j++) {
1158 for (unsigned int i=0; i<m; i++) {
1159 A[i+j*m] = FadType(ndot, this->urand.number());
1160 for (unsigned int k=0; k<ndot; k++)
1161 A[i+j*m].fastAccessDx(k) = this->urand.number();
1162 }
1163 b[j] = this->urand.number();
1164 B[j] = b[j];
1165 }
1166 FadType alpha(ndot, this->urand.number());
1167 FadType beta(ndot, this->urand.number());
1168 for (unsigned int k=0; k<ndot; k++) {
1169 alpha.fastAccessDx(k) = this->urand.number();
1170 beta.fastAccessDx(k) = this->urand.number();
1171 }
1172
1173 for (unsigned int i=0; i<m; i++) {
1174 ScalarType val = this->urand.number();
1175 C1[i] = FadType(ndot, val);
1176 C2[i] = FadType(ndot, val);
1177 C3[i] = FadType(ndot, val);
1178 C4[i] = FadType(ndot, val);
1179 for (unsigned int k=0; k<ndot; k++) {
1180 val = this->urand.number();
1181 C1[i].fastAccessDx(k) = val;
1182 C2[i].fastAccessDx(k) = val;
1183 C3[i].fastAccessDx(k) = val;
1184 C4[i].fastAccessDx(k) = val;
1185 }
1186 }
1187
1188 Teuchos::BLAS<int,FadType> teuchos_blas;
1189 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1190 beta, &C1[0], 1);
1191
1192 Teuchos::BLAS<int,FadType> sacado_blas(false);
1193 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1194 beta, &C2[0], 1);
1195
1196 COMPARE_FAD_VECTORS(C1, C2, m);
1197
1198 unsigned int sz = m*n*(1+ndot) + n + m*(1+ndot);
1199 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1200 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1201 beta, &C3[0], 1);
1202
1203 COMPARE_FAD_VECTORS(C1, C3, m);
1204
1205 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &b[0], 1,
1206 beta, &C4[0], 1);
1207
1208 COMPARE_FAD_VECTORS(C1, C4, m);
1209}
1210
1211// Tests with constant A
1213 typedef decltype(this->fad) FadType;
1214 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1216 auto m = this->m_;
1217 auto n = this->n_;
1218 auto ndot = this->ndot_;
1219
1220 VectorType A(m*n,0), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1221 C4(m,ndot);
1222 std::vector<ScalarType> a(m*n);
1223 for (unsigned int j=0; j<n; j++) {
1224 for (unsigned int i=0; i<m; i++) {
1225 a[i+j*m] = this->urand.number();
1226 A[i+j*m] = a[i+j*m];
1227 }
1228 B[j] = FadType(ndot, this->urand.number());
1229 for (unsigned int k=0; k<ndot; k++)
1230 B[j].fastAccessDx(k) = this->urand.number();
1231 }
1232 FadType alpha(ndot, this->urand.number());
1233 FadType beta(ndot, this->urand.number());
1234 for (unsigned int k=0; k<ndot; k++) {
1235 alpha.fastAccessDx(k) = this->urand.number();
1236 beta.fastAccessDx(k) = this->urand.number();
1237 }
1238
1239 for (unsigned int i=0; i<m; i++) {
1240 ScalarType val = this->urand.number();
1241 C1[i] = FadType(ndot, val);
1242 C2[i] = FadType(ndot, val);
1243 C3[i] = FadType(ndot, val);
1244 C4[i] = FadType(ndot, val);
1245 for (unsigned int k=0; k<ndot; k++) {
1246 val = this->urand.number();
1247 C1[i].fastAccessDx(k) = val;
1248 C2[i].fastAccessDx(k) = val;
1249 C3[i].fastAccessDx(k) = val;
1250 C4[i].fastAccessDx(k) = val;
1251 }
1252 }
1253
1254 Teuchos::BLAS<int,FadType> teuchos_blas;
1255 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1256 beta, &C1[0], 1);
1257
1258 Teuchos::BLAS<int,FadType> sacado_blas(false);
1259 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1260 beta, &C2[0], 1);
1261
1262 COMPARE_FAD_VECTORS(C1, C2, m);
1263
1264 unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1265 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1266 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1267 beta, &C3[0], 1);
1268
1269 COMPARE_FAD_VECTORS(C1, C3, m);
1270
1271 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &a[0], m, &B[0], 1,
1272 beta, &C4[0], 1);
1273
1274 COMPARE_FAD_VECTORS(C1, C4, m);
1275}
1276
1277// Tests with constant A, B
1279 typedef decltype(this->fad) FadType;
1280 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1282 auto m = this->m_;
1283 auto n = this->n_;
1284 auto ndot = this->ndot_;
1285
1286 VectorType A(m*n,0), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1287 C4(m,ndot);
1288 std::vector<ScalarType> a(m*n), b(n);
1289 for (unsigned int j=0; j<n; j++) {
1290 for (unsigned int i=0; i<m; i++) {
1291 a[i+j*m] = this->urand.number();
1292 A[i+j*m] = a[i+j*m];
1293 }
1294 b[j] = this->urand.number();
1295 B[j] = b[j];
1296 }
1297 FadType alpha(ndot, this->urand.number());
1298 FadType beta(ndot, this->urand.number());
1299 for (unsigned int k=0; k<ndot; k++) {
1300 alpha.fastAccessDx(k) = this->urand.number();
1301 beta.fastAccessDx(k) = this->urand.number();
1302 }
1303
1304 for (unsigned int i=0; i<m; i++) {
1305 ScalarType val = this->urand.number();
1306 C1[i] = FadType(ndot, val);
1307 C2[i] = FadType(ndot, val);
1308 C3[i] = FadType(ndot, val);
1309 C4[i] = FadType(ndot, val);
1310 for (unsigned int k=0; k<ndot; k++) {
1311 val = this->urand.number();
1312 C1[i].fastAccessDx(k) = val;
1313 C2[i].fastAccessDx(k) = val;
1314 C3[i].fastAccessDx(k) = val;
1315 C4[i].fastAccessDx(k) = val;
1316 }
1317 }
1318
1319 Teuchos::BLAS<int,FadType> teuchos_blas;
1320 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1321 beta, &C1[0], 1);
1322
1323 Teuchos::BLAS<int,FadType> sacado_blas(false);
1324 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1325 beta, &C2[0], 1);
1326
1327 COMPARE_FAD_VECTORS(C1, C2, m);
1328
1329 unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1330 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1331 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1,
1332 beta, &C3[0], 1);
1333
1334 COMPARE_FAD_VECTORS(C1, C3, m);
1335
1336 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &a[0], m, &b[0], 1,
1337 beta, &C4[0], 1);
1338
1339 COMPARE_FAD_VECTORS(C1, C4, m);
1340}
1341
1342// Tests all arguments
1344 typedef decltype(this->fad) FadType;
1345 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1347 auto n = this->n_;
1348 auto ndot = this->ndot_;
1349
1350 VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1351 for (unsigned int j=0; j<n; j++) {
1352 for (unsigned int i=0; i<n; i++) {
1353 A[i+j*n] = FadType(ndot, this->urand.number());
1354 for (unsigned int k=0; k<ndot; k++)
1355 A[i+j*n].fastAccessDx(k) = this->urand.number();
1356 }
1357 ScalarType val = this->urand.number();
1358 x1[j] = FadType(ndot, val);
1359 x2[j] = FadType(ndot, val);
1360 x3[j] = FadType(ndot, val);
1361 for (unsigned int k=0; k<ndot; k++) {
1362 val = this->urand.number();
1363 x1[j].fastAccessDx(k) = val;
1364 x2[j].fastAccessDx(k) = val;
1365 x3[j].fastAccessDx(k) = val;
1366 }
1367 }
1368
1369 Teuchos::BLAS<int,FadType> teuchos_blas;
1370 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1371 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1372
1373 Teuchos::BLAS<int,FadType> sacado_blas(false);
1374 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1375 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1376
1377 COMPARE_FAD_VECTORS(x1, x2, n);
1378
1379 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1380 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1381 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1382 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1383
1384 COMPARE_FAD_VECTORS(x1, x3, n);
1385
1386 teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1387 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1388 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1389 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1390 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1391 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1392 COMPARE_FAD_VECTORS(x1, x2, n);
1393 COMPARE_FAD_VECTORS(x1, x3, n);
1394
1395 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1396 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1397 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1398 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1399 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1400 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1401 COMPARE_FAD_VECTORS(x1, x2, n);
1402 COMPARE_FAD_VECTORS(x1, x3, n);
1403
1404 for (unsigned int i=0; i<n; i++) {
1405 A[i*n+i].val() = 1.0;
1406 for (unsigned int k=0; k<ndot; k++)
1407 A[i*n+i].fastAccessDx(k) = 0.0;
1408 }
1409 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1410 Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1411 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1412 Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1413 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1414 Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1415 COMPARE_FAD_VECTORS(x1, x2, n);
1416 COMPARE_FAD_VECTORS(x1, x3, n);
1417}
1418
1419// Tests non unit inc, different lda
1421 typedef decltype(this->fad) FadType;
1422 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1424 auto n = this->n_;
1425 auto ndot = this->ndot_;
1426
1427 unsigned int lda = n+3;
1428 unsigned int incx = 2;
1429 VectorType A(lda*n,ndot), x1(n*incx,ndot), x2(n*incx,ndot), x3(n*incx,ndot);
1430 for (unsigned int j=0; j<n; j++) {
1431 for (unsigned int i=0; i<lda; i++) {
1432 A[i+j*lda] = FadType(ndot, this->urand.number());
1433 for (unsigned int k=0; k<ndot; k++)
1434 A[i+j*lda].fastAccessDx(k) = this->urand.number();
1435 }
1436 }
1437 for (unsigned int j=0; j<n*incx; j++) {
1438 ScalarType val = this->urand.number();
1439 x1[j] = FadType(ndot, val);
1440 x2[j] = FadType(ndot, val);
1441 x3[j] = FadType(ndot, val);
1442 for (unsigned int k=0; k<ndot; k++) {
1443 val = this->urand.number();
1444 x1[j].fastAccessDx(k) = val;
1445 x2[j].fastAccessDx(k) = val;
1446 x3[j].fastAccessDx(k) = val;
1447 }
1448 }
1449
1450 Teuchos::BLAS<int,FadType> teuchos_blas;
1451 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1452 Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1453
1454 Teuchos::BLAS<int,FadType> sacado_blas(false);
1455 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1456 Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1457
1458 COMPARE_FAD_VECTORS(x1, x2, n*incx);
1459
1460 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1461 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1462 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1463 Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1464
1465 COMPARE_FAD_VECTORS(x1, x3, n*incx);
1466
1467 teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1468 Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1469 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1470 Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1471 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1472 Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1473 COMPARE_FAD_VECTORS(x1, x2, n*incx);
1474 COMPARE_FAD_VECTORS(x1, x3, n*incx);
1475
1476 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1477 Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1478 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1479 Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1480 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1481 Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1482 COMPARE_FAD_VECTORS(x1, x2, n*incx);
1483 COMPARE_FAD_VECTORS(x1, x3, n*incx);
1484
1485 for (unsigned int i=0; i<n; i++) {
1486 A[i*lda+i].val() = 1.0;
1487 for (unsigned int k=0; k<ndot; k++)
1488 A[i*lda+i].fastAccessDx(k) = 0.0;
1489 }
1490 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1491 Teuchos::UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
1492 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1493 Teuchos::UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
1494 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1495 Teuchos::UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
1496 COMPARE_FAD_VECTORS(x1, x2, n*incx);
1497 COMPARE_FAD_VECTORS(x1, x3, n*incx);
1498}
1499
1500// Tests A constant
1502 typedef decltype(this->fad) FadType;
1503 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1505 auto n = this->n_;
1506 auto ndot = this->ndot_;
1507
1508 VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot), x4(n,ndot),
1509 x5(n,ndot);
1510 std::vector<ScalarType> a(n*n);
1511 for (unsigned int j=0; j<n; j++) {
1512 for (unsigned int i=0; i<n; i++) {
1513 a[i+j*n] = this->urand.number();
1514 A[i+j*n] = a[i+j*n];
1515 }
1516 ScalarType val = this->urand.number();
1517 x1[j] = FadType(ndot, val);
1518 x2[j] = FadType(ndot, val);
1519 x3[j] = FadType(ndot, val);
1520 x4[j] = FadType(ndot, val);
1521 x5[j] = FadType(ndot, val);
1522 for (unsigned int k=0; k<ndot; k++) {
1523 val = this->urand.number();
1524 x1[j].fastAccessDx(k) = val;
1525 x2[j].fastAccessDx(k) = val;
1526 x3[j].fastAccessDx(k) = val;
1527 x4[j].fastAccessDx(k) = val;
1528 x5[j].fastAccessDx(k) = val;
1529 }
1530 }
1531
1532 Teuchos::BLAS<int,FadType> teuchos_blas;
1533 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1534 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1535
1536 Teuchos::BLAS<int,FadType> sacado_blas(false);
1537 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1538 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1539
1540 COMPARE_FAD_VECTORS(x1, x2, n);
1541
1542 unsigned int sz = n*n+n*(1+ndot);
1543 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1544 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1545 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1546
1547 COMPARE_FAD_VECTORS(x1, x3, n);
1548
1549 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1550 Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1551
1552 COMPARE_FAD_VECTORS(x1, x4, n);
1553
1554 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1555 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x5[0], 1);
1556
1557 COMPARE_FAD_VECTORS(x1, x5, n);
1558
1559 teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1560 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1561 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1562 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1563 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1564 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1565 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1566 Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1567 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1568 Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1569 COMPARE_FAD_VECTORS(x1, x2, n);
1570 COMPARE_FAD_VECTORS(x1, x3, n);
1571 COMPARE_FAD_VECTORS(x1, x4, n);
1572 COMPARE_FAD_VECTORS(x1, x5, n);
1573
1574 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1575 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1576 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1577 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1578 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1579 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1580 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1581 Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1582 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1583 Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1584 COMPARE_FAD_VECTORS(x1, x2, n);
1585 COMPARE_FAD_VECTORS(x1, x3, n);
1586 COMPARE_FAD_VECTORS(x1, x4, n);
1587 COMPARE_FAD_VECTORS(x1, x5, n);
1588
1589 for (unsigned int i=0; i<n; i++) {
1590 A[i*n+i].val() = 1.0;
1591 for (unsigned int k=0; k<ndot; k++)
1592 A[i*n+i].fastAccessDx(k) = 0.0;
1593 }
1594 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1595 Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1596 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1597 Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1598 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1599 Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1600 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1601 Teuchos::UNIT_DIAG, n, &a[0], n, &x4[0], 1);
1602 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1603 Teuchos::UNIT_DIAG, n, &a[0], n, &x5[0], 1);
1604 COMPARE_FAD_VECTORS(x1, x2, n);
1605 COMPARE_FAD_VECTORS(x1, x3, n);
1606 COMPARE_FAD_VECTORS(x1, x4, n);
1607 COMPARE_FAD_VECTORS(x1, x5, n);
1608}
1609
1610// Tests x constant
1612 typedef decltype(this->fad) FadType;
1613 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1615 auto n = this->n_;
1616 auto ndot = this->ndot_;
1617
1618 VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1619 for (unsigned int j=0; j<n; j++) {
1620 for (unsigned int i=0; i<n; i++) {
1621 A[i+j*n] = FadType(ndot, this->urand.number());
1622 for (unsigned int k=0; k<ndot; k++)
1623 A[i+j*n].fastAccessDx(k) = this->urand.number();
1624 }
1625 ScalarType val = this->urand.number();
1626 x1[j] = val;
1627 x2[j] = val;
1628 x3[j] = val;
1629 }
1630
1631 Teuchos::BLAS<int,FadType> teuchos_blas;
1632 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1633 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1634
1635 Teuchos::BLAS<int,FadType> sacado_blas(false);
1636 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1637 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1638
1639 COMPARE_FAD_VECTORS(x1, x2, n);
1640
1641 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1642 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1643 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1644 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1645
1646 COMPARE_FAD_VECTORS(x1, x3, n);
1647
1648 teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1649 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1650 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1651 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1652 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1653 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1654 COMPARE_FAD_VECTORS(x1, x2, n);
1655 COMPARE_FAD_VECTORS(x1, x3, n);
1656
1657 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1658 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1659 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1660 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1661 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1662 Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1663 COMPARE_FAD_VECTORS(x1, x2, n);
1664 COMPARE_FAD_VECTORS(x1, x3, n);
1665
1666 for (unsigned int i=0; i<n; i++) {
1667 A[i*n+i].val() = 1.0;
1668 for (unsigned int k=0; k<ndot; k++)
1669 A[i*n+i].fastAccessDx(k) = 0.0;
1670 }
1671 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1672 Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
1673 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1674 Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
1675 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1676 Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
1677 COMPARE_FAD_VECTORS(x1, x2, n);
1678 COMPARE_FAD_VECTORS(x1, x3, n);
1679}
1680
1681// Tests all arguments
1683 typedef decltype(this->fad) FadType;
1684 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1686 auto m = this->m_;
1687 auto n = this->n_;
1688 auto ndot = this->ndot_;
1689
1690
1691 // GER is apparently not defined in the BLAS for complex types
1692 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1693 return;
1694
1695 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1696 for (unsigned int j=0; j<n; j++) {
1697 for (unsigned int i=0; i<m; i++) {
1698 ScalarType val = this->urand.number();
1699 A1[i+j*m] = FadType(ndot, val);
1700 A2[i+j*m] = FadType(ndot, val);
1701 A3[i+j*m] = FadType(ndot, val);
1702 for (unsigned int k=0; k<ndot; k++) {
1703 val = this->urand.number();
1704 A1[i+j*m].fastAccessDx(k) = val;
1705 A2[i+j*m].fastAccessDx(k) = val;
1706 A3[i+j*m].fastAccessDx(k) = val;
1707 }
1708 }
1709 }
1710 for (unsigned int i=0; i<m; i++) {
1711 x[i] = FadType(ndot, this->urand.number());
1712 for (unsigned int k=0; k<ndot; k++)
1713 x[i].fastAccessDx(k) = this->urand.number();
1714 }
1715 for (unsigned int i=0; i<n; i++) {
1716 y[i] = FadType(ndot, this->urand.number());
1717 for (unsigned int k=0; k<ndot; k++)
1718 y[i].fastAccessDx(k) = this->urand.number();
1719 }
1720 FadType alpha(ndot, this->urand.number());
1721 for (unsigned int k=0; k<ndot; k++) {
1722 alpha.fastAccessDx(k) = this->urand.number();
1723 }
1724
1725 Teuchos::BLAS<int,FadType> teuchos_blas;
1726 teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1727
1728 Teuchos::BLAS<int,FadType> sacado_blas(false);
1729 sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1730
1731 COMPARE_FAD_VECTORS(A1, A2, m*n);
1732
1733 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1734 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1735 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1736
1737 COMPARE_FAD_VECTORS(A1, A3, m*n);
1738}
1739
1740// Tests non unit inc, different lda
1742 typedef decltype(this->fad) FadType;
1743 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1745 auto m = this->m_;
1746 auto n = this->n_;
1747 auto ndot = this->ndot_;
1748
1749
1750 // GER is apparently not defined in the BLAS for complex types
1751 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1752 return;
1753
1754 unsigned int lda = m+3;
1755 unsigned int incx = 2;
1756 unsigned int incy = 3;
1757 VectorType A1(lda*n,ndot), A2(lda*n,ndot), A3(lda*n,ndot), x(m*incx,ndot),
1758 y(n*incy,ndot);
1759 for (unsigned int j=0; j<n; j++) {
1760 for (unsigned int i=0; i<lda; i++) {
1761 ScalarType val = this->urand.number();
1762 A1[i+j*lda] = FadType(ndot, val);
1763 A2[i+j*lda] = FadType(ndot, val);
1764 A3[i+j*lda] = FadType(ndot, val);
1765 for (unsigned int k=0; k<ndot; k++) {
1766 val = this->urand.number();
1767 A1[i+j*lda].fastAccessDx(k) = val;
1768 A2[i+j*lda].fastAccessDx(k) = val;
1769 A3[i+j*lda].fastAccessDx(k) = val;
1770 }
1771 }
1772 }
1773 for (unsigned int i=0; i<m*incx; i++) {
1774 x[i] = FadType(ndot, this->urand.number());
1775 for (unsigned int k=0; k<ndot; k++)
1776 x[i].fastAccessDx(k) = this->urand.number();
1777 }
1778 for (unsigned int i=0; i<n*incy; i++) {
1779 y[i] = FadType(ndot, this->urand.number());
1780 for (unsigned int k=0; k<ndot; k++)
1781 y[i].fastAccessDx(k) = this->urand.number();
1782 }
1783 FadType alpha(ndot, this->urand.number());
1784 for (unsigned int k=0; k<ndot; k++) {
1785 alpha.fastAccessDx(k) = this->urand.number();
1786 }
1787
1788 Teuchos::BLAS<int,FadType> teuchos_blas;
1789 teuchos_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A1[0], lda);
1790
1791 Teuchos::BLAS<int,FadType> sacado_blas(false);
1792 sacado_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A2[0], lda);
1793
1794 COMPARE_FAD_VECTORS(A1, A2, lda*n);
1795
1796 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1797 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1798 sacado_blas2.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A3[0], lda);
1799
1800 COMPARE_FAD_VECTORS(A1, A3, lda*n);
1801}
1802
1803// Tests constant alpha
1805 typedef decltype(this->fad) FadType;
1806 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1808 auto m = this->m_;
1809 auto n = this->n_;
1810 auto ndot = this->ndot_;
1811
1812
1813 // GER is apparently not defined in the BLAS for complex types
1814 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1815 return;
1816
1817 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
1818 for (unsigned int j=0; j<n; j++) {
1819 for (unsigned int i=0; i<m; i++) {
1820 ScalarType val = this->urand.number();
1821 A1[i+j*m] = FadType(ndot, val);
1822 A2[i+j*m] = FadType(ndot, val);
1823 A3[i+j*m] = FadType(ndot, val);
1824 for (unsigned int k=0; k<ndot; k++) {
1825 val = this->urand.number();
1826 A1[i+j*m].fastAccessDx(k) = val;
1827 A2[i+j*m].fastAccessDx(k) = val;
1828 A3[i+j*m].fastAccessDx(k) = val;
1829 }
1830 }
1831 }
1832 for (unsigned int i=0; i<m; i++) {
1833 x[i] = FadType(ndot, this->urand.number());
1834 for (unsigned int k=0; k<ndot; k++)
1835 x[i].fastAccessDx(k) = this->urand.number();
1836 }
1837 for (unsigned int i=0; i<n; i++) {
1838 y[i] = FadType(ndot, this->urand.number());
1839 for (unsigned int k=0; k<ndot; k++)
1840 y[i].fastAccessDx(k) = this->urand.number();
1841 }
1842 ScalarType alpha = this->urand.number();
1843
1844 Teuchos::BLAS<int,FadType> teuchos_blas;
1845 teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1846
1847 Teuchos::BLAS<int,FadType> sacado_blas(false);
1848 sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1849
1850 COMPARE_FAD_VECTORS(A1, A2, m*n);
1851
1852 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1853 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1854 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1855
1856 COMPARE_FAD_VECTORS(A1, A3, m*n);
1857}
1858
1859// Tests constant x
1861 typedef decltype(this->fad) FadType;
1862 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1864 auto m = this->m_;
1865 auto n = this->n_;
1866 auto ndot = this->ndot_;
1867
1868
1869 // GER is apparently not defined in the BLAS for complex types
1870 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1871 return;
1872
1873 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1874 A5(m*n,ndot), x(m,ndot), y(n,ndot);
1875 std::vector<ScalarType> xx(m);
1876 for (unsigned int j=0; j<n; j++) {
1877 for (unsigned int i=0; i<m; i++) {
1878 ScalarType val = this->urand.number();
1879 A1[i+j*m] = FadType(ndot, val);
1880 A2[i+j*m] = FadType(ndot, val);
1881 A3[i+j*m] = FadType(ndot, val);
1882 A4[i+j*m] = FadType(ndot, val);
1883 A5[i+j*m] = FadType(ndot, val);
1884 for (unsigned int k=0; k<ndot; k++) {
1885 val = this->urand.number();
1886 A1[i+j*m].fastAccessDx(k) = val;
1887 A2[i+j*m].fastAccessDx(k) = val;
1888 A3[i+j*m].fastAccessDx(k) = val;
1889 A4[i+j*m].fastAccessDx(k) = val;
1890 A5[i+j*m].fastAccessDx(k) = val;
1891 }
1892 }
1893 }
1894 for (unsigned int i=0; i<m; i++) {
1895 xx[i] = this->urand.number();
1896 x[i] = xx[i];
1897 }
1898 for (unsigned int i=0; i<n; i++) {
1899 y[i] = FadType(ndot, this->urand.number());
1900 for (unsigned int k=0; k<ndot; k++)
1901 y[i].fastAccessDx(k) = this->urand.number();
1902 }
1903 FadType alpha(ndot, this->urand.number());
1904 for (unsigned int k=0; k<ndot; k++) {
1905 alpha.fastAccessDx(k) = this->urand.number();
1906 }
1907
1908 Teuchos::BLAS<int,FadType> teuchos_blas;
1909 teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1910
1911 Teuchos::BLAS<int,FadType> sacado_blas(false);
1912 sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1913
1914 COMPARE_FAD_VECTORS(A1, A2, m*n);
1915
1916 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m;
1917 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1918 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1919
1920 COMPARE_FAD_VECTORS(A1, A3, m*n);
1921
1922 sacado_blas.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A4[0], m);
1923
1924 COMPARE_FAD_VECTORS(A1, A4, m*n);
1925
1926 sacado_blas2.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A5[0], m);
1927
1928 COMPARE_FAD_VECTORS(A1, A5, m*n);
1929}
1930
1931// Tests constant y
1933 typedef decltype(this->fad) FadType;
1934 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
1936 auto m = this->m_;
1937 auto n = this->n_;
1938 auto ndot = this->ndot_;
1939
1940
1941 // GER is apparently not defined in the BLAS for complex types
1942 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1943 return;
1944
1945 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1946 A5(m*n,ndot), x(m,ndot), y(n,ndot);
1947 std::vector<ScalarType> yy(n);
1948 for (unsigned int j=0; j<n; j++) {
1949 for (unsigned int i=0; i<m; i++) {
1950 ScalarType val = this->urand.number();
1951 A1[i+j*m] = FadType(ndot, val);
1952 A2[i+j*m] = FadType(ndot, val);
1953 A3[i+j*m] = FadType(ndot, val);
1954 A4[i+j*m] = FadType(ndot, val);
1955 A5[i+j*m] = FadType(ndot, val);
1956 for (unsigned int k=0; k<ndot; k++) {
1957 val = this->urand.number();
1958 A1[i+j*m].fastAccessDx(k) = val;
1959 A2[i+j*m].fastAccessDx(k) = val;
1960 A3[i+j*m].fastAccessDx(k) = val;
1961 A4[i+j*m].fastAccessDx(k) = val;
1962 A5[i+j*m].fastAccessDx(k) = val;
1963 }
1964 }
1965 }
1966 for (unsigned int i=0; i<m; i++) {
1967 x[i] = FadType(ndot, this->urand.number());
1968 for (unsigned int k=0; k<ndot; k++)
1969 x[i].fastAccessDx(k) = this->urand.number();
1970 }
1971 for (unsigned int i=0; i<n; i++) {
1972 yy[i] = this->urand.number();
1973 y[i] = yy[i];
1974 }
1975 FadType alpha(ndot, this->urand.number());
1976 for (unsigned int k=0; k<ndot; k++) {
1977 alpha.fastAccessDx(k) = this->urand.number();
1978 }
1979
1980 Teuchos::BLAS<int,FadType> teuchos_blas;
1981 teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
1982
1983 Teuchos::BLAS<int,FadType> sacado_blas(false);
1984 sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
1985
1986 COMPARE_FAD_VECTORS(A1, A2, m*n);
1987
1988 unsigned int sz = m*n*(1+ndot) + m*(1+ndot) + n;
1989 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
1990 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
1991
1992 COMPARE_FAD_VECTORS(A1, A3, m*n);
1993
1994 sacado_blas.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A4[0], m);
1995
1996 COMPARE_FAD_VECTORS(A1, A4, m*n);
1997
1998 sacado_blas2.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A5[0], m);
1999
2000 COMPARE_FAD_VECTORS(A1, A5, m*n);
2001}
2002
2003// Tests constant x and y
2005 typedef decltype(this->fad) FadType;
2006 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2008 auto m = this->m_;
2009 auto n = this->n_;
2010 auto ndot = this->ndot_;
2011
2012
2013 // GER is apparently not defined in the BLAS for complex types
2014 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
2015 return;
2016
2017 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
2018 A5(m*n,ndot), x(m,ndot), y(n,ndot);
2019 std::vector<ScalarType> xx(n), yy(n);
2020 for (unsigned int j=0; j<n; j++) {
2021 for (unsigned int i=0; i<m; i++) {
2022 ScalarType val = this->urand.number();
2023 A1[i+j*m] = FadType(ndot, val);
2024 A2[i+j*m] = FadType(ndot, val);
2025 A3[i+j*m] = FadType(ndot, val);
2026 A4[i+j*m] = FadType(ndot, val);
2027 A5[i+j*m] = FadType(ndot, val);
2028 for (unsigned int k=0; k<ndot; k++) {
2029 val = this->urand.number();
2030 A1[i+j*m].fastAccessDx(k) = val;
2031 A2[i+j*m].fastAccessDx(k) = val;
2032 A3[i+j*m].fastAccessDx(k) = val;
2033 A4[i+j*m].fastAccessDx(k) = val;
2034 A5[i+j*m].fastAccessDx(k) = val;
2035 }
2036 }
2037 }
2038 for (unsigned int i=0; i<m; i++) {
2039 xx[i] = this->urand.number();
2040 x[i] = xx[i];
2041 }
2042 for (unsigned int i=0; i<n; i++) {
2043 yy[i] = this->urand.number();
2044 y[i] = yy[i];
2045 }
2046 FadType alpha(ndot, this->urand.number());
2047 for (unsigned int k=0; k<ndot; k++) {
2048 alpha.fastAccessDx(k) = this->urand.number();
2049 }
2050
2051 Teuchos::BLAS<int,FadType> teuchos_blas;
2052 teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2053
2054 Teuchos::BLAS<int,FadType> sacado_blas(false);
2055 sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2056
2057 COMPARE_FAD_VECTORS(A1, A2, m*n);
2058
2059 unsigned int sz = m*n*(1+ndot) + m + n;
2060 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2061 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2062
2063 COMPARE_FAD_VECTORS(A1, A3, m*n);
2064
2065 sacado_blas.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A4[0], m);
2066
2067 COMPARE_FAD_VECTORS(A1, A4, m*n);
2068
2069 sacado_blas2.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A5[0], m);
2070
2071 COMPARE_FAD_VECTORS(A1, A5, m*n);
2072}
2073
2074// Tests constant A
2076 typedef decltype(this->fad) FadType;
2077 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2079 auto m = this->m_;
2080 auto n = this->n_;
2081 auto ndot = this->ndot_;
2082
2083
2084 // GER is apparently not defined in the BLAS for complex types
2085 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
2086 return;
2087
2088 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
2089 for (unsigned int j=0; j<n; j++) {
2090 for (unsigned int i=0; i<m; i++) {
2091 ScalarType val = this->urand.number();
2092 A1[i+j*m] = val;
2093 A2[i+j*m] = val;
2094 A3[i+j*m] = val;
2095 }
2096 }
2097 for (unsigned int i=0; i<m; i++) {
2098 x[i] = FadType(ndot, this->urand.number());
2099 for (unsigned int k=0; k<ndot; k++)
2100 x[i].fastAccessDx(k) = this->urand.number();
2101 }
2102 for (unsigned int i=0; i<n; i++) {
2103 y[i] = FadType(ndot, this->urand.number());
2104 for (unsigned int k=0; k<ndot; k++)
2105 y[i].fastAccessDx(k) = this->urand.number();
2106 }
2107 FadType alpha(ndot, this->urand.number());
2108 for (unsigned int k=0; k<ndot; k++) {
2109 alpha.fastAccessDx(k) = this->urand.number();
2110 }
2111
2112 Teuchos::BLAS<int,FadType> teuchos_blas;
2113 teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
2114
2115 Teuchos::BLAS<int,FadType> sacado_blas(false);
2116 sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
2117
2118 COMPARE_FAD_VECTORS(A1, A2, m*n);
2119
2120 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
2121 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2122 sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
2123
2124 COMPARE_FAD_VECTORS(A1, A3, m*n);
2125}
2126
2127// Tests all arguments
2129 typedef decltype(this->fad) FadType;
2130 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2132 auto m = this->m_;
2133 auto n = this->n_;
2134 auto l = this->l_;
2135 auto ndot = this->ndot_;
2136
2137 VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2138 for (unsigned int j=0; j<l; j++) {
2139 for (unsigned int i=0; i<m; i++) {
2140 A[i+j*m] = FadType(ndot, this->urand.number());
2141 for (unsigned int k=0; k<ndot; k++)
2142 A[i+j*m].fastAccessDx(k) = this->urand.number();
2143 }
2144 }
2145 for (unsigned int j=0; j<n; j++) {
2146 for (unsigned int i=0; i<l; i++) {
2147 B[i+j*l] = FadType(ndot, this->urand.number());
2148 for (unsigned int k=0; k<ndot; k++)
2149 B[i+j*l].fastAccessDx(k) = this->urand.number();
2150 }
2151 }
2152 FadType alpha(ndot, this->urand.number());
2153 FadType beta(ndot, this->urand.number());
2154 for (unsigned int k=0; k<ndot; k++) {
2155 alpha.fastAccessDx(k) = this->urand.number();
2156 beta.fastAccessDx(k) = this->urand.number();
2157 }
2158
2159 for (unsigned int j=0; j<n; j++) {
2160 for (unsigned int i=0; i<m; i++) {
2161 ScalarType val = this->urand.number();
2162 C1[i+j*m] = FadType(ndot, val);
2163 C2[i+j*m] = FadType(ndot, val);
2164 C3[i+j*m] = FadType(ndot, val);
2165 for (unsigned int k=0; k<ndot; k++) {
2166 val = this->urand.number();
2167 C1[i+j*m].fastAccessDx(k) = val;
2168 C2[i+j*m].fastAccessDx(k) = val;
2169 C3[i+j*m].fastAccessDx(k) = val;
2170 }
2171 }
2172 }
2173
2174 Teuchos::BLAS<int,FadType> teuchos_blas;
2175 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2176 &A[0], m, &B[0], l, beta, &C1[0], m);
2177
2178 Teuchos::BLAS<int,FadType> sacado_blas(false);
2179 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2180 &A[0], m, &B[0], l, beta, &C2[0], m);
2181
2182 COMPARE_FAD_VECTORS(C1, C2, m*n);
2183
2184 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2185 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2186 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2187 &A[0], m, &B[0], l, beta, &C3[0], m);
2188
2189 COMPARE_FAD_VECTORS(C1, C3, m*n);
2190
2191 // transa
2192 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2193 &A[0], l, &B[0], l, beta, &C1[0], m);
2194 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2195 &A[0], l, &B[0], l, beta, &C2[0], m);
2196 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2197 &A[0], l, &B[0], l, beta, &C3[0], m);
2198
2199 COMPARE_FAD_VECTORS(C1, C2, m*n);
2200 COMPARE_FAD_VECTORS(C1, C3, m*n);
2201
2202 // transb
2203 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2204 &A[0], m, &B[0], n, beta, &C1[0], m);
2205 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2206 &A[0], m, &B[0], n, beta, &C2[0], m);
2207 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2208 &A[0], m, &B[0], n, beta, &C3[0], m);
2209
2210 COMPARE_FAD_VECTORS(C1, C2, m*n);
2211 COMPARE_FAD_VECTORS(C1, C3, m*n);
2212
2213 // transa and transb
2214 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2215 &A[0], l, &B[0], n, beta, &C1[0], m);
2216 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2217 &A[0], l, &B[0], n, beta, &C2[0], m);
2218 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2219 &A[0], l, &B[0], n, beta, &C3[0], m);
2220
2221 COMPARE_FAD_VECTORS(C1, C2, m*n);
2222 COMPARE_FAD_VECTORS(C1, C3, m*n);
2223}
2224
2225// Tests different lda, ldb, ldc
2227 typedef decltype(this->fad) FadType;
2228 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2230 auto m = this->m_;
2231 auto n = this->n_;
2232 auto l = this->l_;
2233 auto ndot = this->ndot_;
2234
2235 unsigned int lda = m+4;
2236 unsigned int ldb = l+4;
2237 unsigned int ldc = m+5;
2238 VectorType A(lda*l,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2239 C3(ldc*n,ndot);
2240 for (unsigned int j=0; j<l; j++) {
2241 for (unsigned int i=0; i<lda; i++) {
2242 A[i+j*lda] = FadType(ndot, this->urand.number());
2243 for (unsigned int k=0; k<ndot; k++)
2244 A[i+j*lda].fastAccessDx(k) = this->urand.number();
2245 }
2246 }
2247 for (unsigned int j=0; j<n; j++) {
2248 for (unsigned int i=0; i<ldb; i++) {
2249 B[i+j*ldb] = FadType(ndot, this->urand.number());
2250 for (unsigned int k=0; k<ndot; k++)
2251 B[i+j*ldb].fastAccessDx(k) = this->urand.number();
2252 }
2253 }
2254 FadType alpha(ndot, this->urand.number());
2255 FadType beta(ndot, this->urand.number());
2256 for (unsigned int k=0; k<ndot; k++) {
2257 alpha.fastAccessDx(k) = this->urand.number();
2258 beta.fastAccessDx(k) = this->urand.number();
2259 }
2260
2261 for (unsigned int j=0; j<n; j++) {
2262 for (unsigned int i=0; i<ldc; i++) {
2263 ScalarType val = this->urand.number();
2264 C1[i+j*ldc] = FadType(ndot, val);
2265 C2[i+j*ldc] = FadType(ndot, val);
2266 C3[i+j*ldc] = FadType(ndot, val);
2267 for (unsigned int k=0; k<ndot; k++) {
2268 val = this->urand.number();
2269 C1[i+j*ldc].fastAccessDx(k) = val;
2270 C2[i+j*ldc].fastAccessDx(k) = val;
2271 C3[i+j*ldc].fastAccessDx(k) = val;
2272 }
2273 }
2274 }
2275
2276 Teuchos::BLAS<int,FadType> teuchos_blas;
2277 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2278 &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2279
2280 Teuchos::BLAS<int,FadType> sacado_blas(false);
2281 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2282 &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2283
2284 COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2285
2286 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2287 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2288 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2289 &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2290
2291 COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2292}
2293
2294// Tests different lda, ldb, ldc with transa
2296 typedef decltype(this->fad) FadType;
2297 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2299 auto m = this->m_;
2300 auto n = this->n_;
2301 auto l = this->l_;
2302 auto ndot = this->ndot_;
2303
2304 unsigned int lda = l+3;
2305 unsigned int ldb = l+4;
2306 unsigned int ldc = m+5;
2307 VectorType A(lda*m,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2308 C3(ldc*n,ndot);
2309 for (unsigned int j=0; j<m; j++) {
2310 for (unsigned int i=0; i<lda; i++) {
2311 A[i+j*lda] = FadType(ndot, this->urand.number());
2312 for (unsigned int k=0; k<ndot; k++)
2313 A[i+j*lda].fastAccessDx(k) = this->urand.number();
2314 }
2315 }
2316 for (unsigned int j=0; j<n; j++) {
2317 for (unsigned int i=0; i<ldb; i++) {
2318 B[i+j*ldb] = FadType(ndot, this->urand.number());
2319 for (unsigned int k=0; k<ndot; k++)
2320 B[i+j*ldb].fastAccessDx(k) = this->urand.number();
2321 }
2322 }
2323 FadType alpha(ndot, this->urand.number());
2324 FadType beta(ndot, this->urand.number());
2325 for (unsigned int k=0; k<ndot; k++) {
2326 alpha.fastAccessDx(k) = this->urand.number();
2327 beta.fastAccessDx(k) = this->urand.number();
2328 }
2329
2330 for (unsigned int j=0; j<n; j++) {
2331 for (unsigned int i=0; i<ldc; i++) {
2332 ScalarType val = this->urand.number();
2333 C1[i+j*ldc] = FadType(ndot, val);
2334 C2[i+j*ldc] = FadType(ndot, val);
2335 C3[i+j*ldc] = FadType(ndot, val);
2336 for (unsigned int k=0; k<ndot; k++) {
2337 val = this->urand.number();
2338 C1[i+j*ldc].fastAccessDx(k) = val;
2339 C2[i+j*ldc].fastAccessDx(k) = val;
2340 C3[i+j*ldc].fastAccessDx(k) = val;
2341 }
2342 }
2343 }
2344
2345 Teuchos::BLAS<int,FadType> teuchos_blas;
2346 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2347 &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2348
2349 Teuchos::BLAS<int,FadType> sacado_blas(false);
2350 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2351 &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2352
2353 COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2354
2355 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2356 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2357 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2358 &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2359
2360 COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2361}
2362
2363// Tests different lda, ldb, ldc with transb
2365 typedef decltype(this->fad) FadType;
2366 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2368 auto m = this->m_;
2369 auto n = this->n_;
2370 auto l = this->l_;
2371 auto ndot = this->ndot_;
2372
2373 unsigned int lda = m+4;
2374 unsigned int ldb = n+4;
2375 unsigned int ldc = m+5;
2376 VectorType A(lda*l,ndot), B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2377 C3(ldc*n,ndot);
2378 for (unsigned int j=0; j<l; j++) {
2379 for (unsigned int i=0; i<lda; i++) {
2380 A[i+j*lda] = FadType(ndot, this->urand.number());
2381 for (unsigned int k=0; k<ndot; k++)
2382 A[i+j*lda].fastAccessDx(k) = this->urand.number();
2383 }
2384 }
2385 for (unsigned int j=0; j<l; j++) {
2386 for (unsigned int i=0; i<ldb; i++) {
2387 B[i+j*ldb] = FadType(ndot, this->urand.number());
2388 for (unsigned int k=0; k<ndot; k++)
2389 B[i+j*ldb].fastAccessDx(k) = this->urand.number();
2390 }
2391 }
2392 FadType alpha(ndot, this->urand.number());
2393 FadType beta(ndot, this->urand.number());
2394 for (unsigned int k=0; k<ndot; k++) {
2395 alpha.fastAccessDx(k) = this->urand.number();
2396 beta.fastAccessDx(k) = this->urand.number();
2397 }
2398
2399 for (unsigned int j=0; j<n; j++) {
2400 for (unsigned int i=0; i<ldc; i++) {
2401 ScalarType val = this->urand.number();
2402 C1[i+j*ldc] = FadType(ndot, val);
2403 C2[i+j*ldc] = FadType(ndot, val);
2404 C3[i+j*ldc] = FadType(ndot, val);
2405 for (unsigned int k=0; k<ndot; k++) {
2406 val = this->urand.number();
2407 C1[i+j*ldc].fastAccessDx(k) = val;
2408 C2[i+j*ldc].fastAccessDx(k) = val;
2409 C3[i+j*ldc].fastAccessDx(k) = val;
2410 }
2411 }
2412 }
2413
2414 Teuchos::BLAS<int,FadType> teuchos_blas;
2415 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2416 &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2417
2418 Teuchos::BLAS<int,FadType> sacado_blas(false);
2419 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2420 &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2421
2422 COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2423
2424 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2425 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2426 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2427 &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2428
2429 COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2430}
2431
2432// Tests different lda, ldb, ldc with transa and transb
2434 typedef decltype(this->fad) FadType;
2435 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2437 auto m = this->m_;
2438 auto n = this->n_;
2439 auto l = this->l_;
2440 auto ndot = this->ndot_;
2441
2442 unsigned int lda = l+3;
2443 unsigned int ldb = n+4;
2444 unsigned int ldc = m+5;
2445 VectorType A(lda*m,ndot), B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2446 C3(ldc*n,ndot);
2447 for (unsigned int j=0; j<m; j++) {
2448 for (unsigned int i=0; i<lda; i++) {
2449 A[i+j*lda] = FadType(ndot, this->urand.number());
2450 for (unsigned int k=0; k<ndot; k++)
2451 A[i+j*lda].fastAccessDx(k) = this->urand.number();
2452 }
2453 }
2454 for (unsigned int j=0; j<l; j++) {
2455 for (unsigned int i=0; i<ldb; i++) {
2456 B[i+j*ldb] = FadType(ndot, this->urand.number());
2457 for (unsigned int k=0; k<ndot; k++)
2458 B[i+j*ldb].fastAccessDx(k) = this->urand.number();
2459 }
2460 }
2461 FadType alpha(ndot, this->urand.number());
2462 FadType beta(ndot, this->urand.number());
2463 for (unsigned int k=0; k<ndot; k++) {
2464 alpha.fastAccessDx(k) = this->urand.number();
2465 beta.fastAccessDx(k) = this->urand.number();
2466 }
2467
2468 for (unsigned int j=0; j<n; j++) {
2469 for (unsigned int i=0; i<ldc; i++) {
2470 ScalarType val = this->urand.number();
2471 C1[i+j*ldc] = FadType(ndot, val);
2472 C2[i+j*ldc] = FadType(ndot, val);
2473 C3[i+j*ldc] = FadType(ndot, val);
2474 for (unsigned int k=0; k<ndot; k++) {
2475 val = this->urand.number();
2476 C1[i+j*ldc].fastAccessDx(k) = val;
2477 C2[i+j*ldc].fastAccessDx(k) = val;
2478 C3[i+j*ldc].fastAccessDx(k) = val;
2479 }
2480 }
2481 }
2482
2483 Teuchos::BLAS<int,FadType> teuchos_blas;
2484 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2485 &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
2486
2487 Teuchos::BLAS<int,FadType> sacado_blas(false);
2488 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2489 &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
2490
2491 COMPARE_FAD_VECTORS(C1, C2, ldc*n);
2492
2493 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2494 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2495 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2496 &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
2497
2498 COMPARE_FAD_VECTORS(C1, C3, ldc*n);
2499}
2500
2501// Tests with constant C
2503 typedef decltype(this->fad) FadType;
2504 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2506 auto m = this->m_;
2507 auto n = this->n_;
2508 auto l = this->l_;
2509 auto ndot = this->ndot_;
2510
2511 VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2512 for (unsigned int j=0; j<l; j++) {
2513 for (unsigned int i=0; i<m; i++) {
2514 A[i+j*m] = FadType(ndot, this->urand.number());
2515 for (unsigned int k=0; k<ndot; k++)
2516 A[i+j*m].fastAccessDx(k) = this->urand.number();
2517 }
2518 }
2519 for (unsigned int j=0; j<n; j++) {
2520 for (unsigned int i=0; i<l; i++) {
2521 B[i+j*l] = FadType(ndot, this->urand.number());
2522 for (unsigned int k=0; k<ndot; k++)
2523 B[i+j*l].fastAccessDx(k) = this->urand.number();
2524 }
2525 }
2526 FadType alpha(ndot, this->urand.number());
2527 FadType beta(ndot, this->urand.number());
2528 for (unsigned int k=0; k<ndot; k++) {
2529 alpha.fastAccessDx(k) = this->urand.number();
2530 beta.fastAccessDx(k) = this->urand.number();
2531 }
2532
2533 for (unsigned int j=0; j<n; j++) {
2534 for (unsigned int i=0; i<m; i++) {
2535 ScalarType val = this->urand.number();
2536 C1[i+j*m] = val;
2537 C2[i+j*m] = val;
2538 C3[i+j*m] = val;
2539 }
2540 }
2541
2542 Teuchos::BLAS<int,FadType> teuchos_blas;
2543 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2544 &A[0], m, &B[0], l, beta, &C1[0], m);
2545
2546 Teuchos::BLAS<int,FadType> sacado_blas(false);
2547 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2548 &A[0], m, &B[0], l, beta, &C2[0], m);
2549
2550 COMPARE_FAD_VECTORS(C1, C2, m*n);
2551
2552 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2553 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2554 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2555 &A[0], m, &B[0], l, beta, &C3[0], m);
2556
2557 COMPARE_FAD_VECTORS(C1, C3, m*n);
2558
2559 // transa
2560 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2561 &A[0], l, &B[0], l, beta, &C1[0], m);
2562 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2563 &A[0], l, &B[0], l, beta, &C2[0], m);
2564 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2565 &A[0], l, &B[0], l, beta, &C3[0], m);
2566
2567 COMPARE_FAD_VECTORS(C1, C2, m*n);
2568 COMPARE_FAD_VECTORS(C1, C3, m*n);
2569
2570 // transb
2571 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2572 &A[0], m, &B[0], n, beta, &C1[0], m);
2573 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2574 &A[0], m, &B[0], n, beta, &C2[0], m);
2575 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2576 &A[0], m, &B[0], n, beta, &C3[0], m);
2577
2578 COMPARE_FAD_VECTORS(C1, C2, m*n);
2579 COMPARE_FAD_VECTORS(C1, C3, m*n);
2580
2581 // transa and transb
2582 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2583 &A[0], l, &B[0], n, beta, &C1[0], m);
2584 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2585 &A[0], l, &B[0], n, beta, &C2[0], m);
2586 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2587 &A[0], l, &B[0], n, beta, &C3[0], m);
2588
2589 COMPARE_FAD_VECTORS(C1, C2, m*n);
2590 COMPARE_FAD_VECTORS(C1, C3, m*n);
2591}
2592
2593// Tests with constant alpha, beta
2595 typedef decltype(this->fad) FadType;
2596 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2598 auto m = this->m_;
2599 auto n = this->n_;
2600 auto l = this->l_;
2601 auto ndot = this->ndot_;
2602
2603 VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2604 for (unsigned int j=0; j<l; j++) {
2605 for (unsigned int i=0; i<m; i++) {
2606 A[i+j*m] = FadType(ndot, this->urand.number());
2607 for (unsigned int k=0; k<ndot; k++)
2608 A[i+j*m].fastAccessDx(k) = this->urand.number();
2609 }
2610 }
2611 for (unsigned int j=0; j<n; j++) {
2612 for (unsigned int i=0; i<l; i++) {
2613 B[i+j*l] = FadType(ndot, this->urand.number());
2614 for (unsigned int k=0; k<ndot; k++)
2615 B[i+j*l].fastAccessDx(k) = this->urand.number();
2616 }
2617 }
2618 ScalarType alpha = this->urand.number();
2619 ScalarType beta = this->urand.number();
2620
2621 for (unsigned int j=0; j<n; j++) {
2622 for (unsigned int i=0; i<m; i++) {
2623 ScalarType val = this->urand.number();
2624 C1[i+j*m] = FadType(ndot, val);
2625 C2[i+j*m] = FadType(ndot, val);
2626 C3[i+j*m] = FadType(ndot, val);
2627 for (unsigned int k=0; k<ndot; k++) {
2628 val = this->urand.number();
2629 C1[i+j*m].fastAccessDx(k) = val;
2630 C2[i+j*m].fastAccessDx(k) = val;
2631 C3[i+j*m].fastAccessDx(k) = val;
2632 }
2633 }
2634 }
2635
2636 Teuchos::BLAS<int,FadType> teuchos_blas;
2637 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2638 &A[0], m, &B[0], l, beta, &C1[0], m);
2639
2640 Teuchos::BLAS<int,FadType> sacado_blas(false);
2641 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2642 &A[0], m, &B[0], l, beta, &C2[0], m);
2643
2644 COMPARE_FAD_VECTORS(C1, C2, m*n);
2645
2646 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2647 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2648 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2649 &A[0], m, &B[0], l, beta, &C3[0], m);
2650
2651 COMPARE_FAD_VECTORS(C1, C3, m*n);
2652
2653 // transa
2654 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2655 &A[0], l, &B[0], l, beta, &C1[0], m);
2656 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2657 &A[0], l, &B[0], l, beta, &C2[0], m);
2658 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2659 &A[0], l, &B[0], l, beta, &C3[0], m);
2660
2661 COMPARE_FAD_VECTORS(C1, C2, m*n);
2662 COMPARE_FAD_VECTORS(C1, C3, m*n);
2663
2664 // transb
2665 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2666 &A[0], m, &B[0], n, beta, &C1[0], m);
2667 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2668 &A[0], m, &B[0], n, beta, &C2[0], m);
2669 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2670 &A[0], m, &B[0], n, beta, &C3[0], m);
2671
2672 COMPARE_FAD_VECTORS(C1, C2, m*n);
2673 COMPARE_FAD_VECTORS(C1, C3, m*n);
2674
2675 // transa and transb
2676 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2677 &A[0], l, &B[0], n, beta, &C1[0], m);
2678 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2679 &A[0], l, &B[0], n, beta, &C2[0], m);
2680 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2681 &A[0], l, &B[0], n, beta, &C3[0], m);
2682
2683 COMPARE_FAD_VECTORS(C1, C2, m*n);
2684 COMPARE_FAD_VECTORS(C1, C3, m*n);
2685}
2686
2687// Tests with constant A
2689 typedef decltype(this->fad) FadType;
2690 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2692 auto m = this->m_;
2693 auto n = this->n_;
2694 auto l = this->l_;
2695 auto ndot = this->ndot_;
2696
2697 VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2698 C4(m*n,ndot), C5(m*n,ndot);
2699 std::vector<ScalarType> a(m*l);
2700 for (unsigned int j=0; j<l; j++) {
2701 for (unsigned int i=0; i<m; i++) {
2702 a[i+j*m] = this->urand.number();
2703 A[i+j*m] = a[i+j*m];
2704 }
2705 }
2706 for (unsigned int j=0; j<n; j++) {
2707 for (unsigned int i=0; i<l; i++) {
2708 B[i+j*l] = FadType(ndot, this->urand.number());
2709 for (unsigned int k=0; k<ndot; k++)
2710 B[i+j*l].fastAccessDx(k) = this->urand.number();
2711 }
2712 }
2713 FadType alpha(ndot, this->urand.number());
2714 FadType beta(ndot, this->urand.number());
2715 for (unsigned int k=0; k<ndot; k++) {
2716 alpha.fastAccessDx(k) = this->urand.number();
2717 beta.fastAccessDx(k) = this->urand.number();
2718 }
2719
2720 for (unsigned int j=0; j<n; j++) {
2721 for (unsigned int i=0; i<m; i++) {
2722 ScalarType val = this->urand.number();
2723 C1[i+j*m] = FadType(ndot, val);
2724 C2[i+j*m] = FadType(ndot, val);
2725 C3[i+j*m] = FadType(ndot, val);
2726 C4[i+j*m] = FadType(ndot, val);
2727 C5[i+j*m] = FadType(ndot, val);
2728 for (unsigned int k=0; k<ndot; k++) {
2729 val = this->urand.number();
2730 C1[i+j*m].fastAccessDx(k) = val;
2731 C2[i+j*m].fastAccessDx(k) = val;
2732 C3[i+j*m].fastAccessDx(k) = val;
2733 C4[i+j*m].fastAccessDx(k) = val;
2734 C5[i+j*m].fastAccessDx(k) = val;
2735 }
2736 }
2737 }
2738
2739 Teuchos::BLAS<int,FadType> teuchos_blas;
2740 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2741 &A[0], m, &B[0], l, beta, &C1[0], m);
2742
2743 Teuchos::BLAS<int,FadType> sacado_blas(false);
2744 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2745 &A[0], m, &B[0], l, beta, &C2[0], m);
2746
2747 COMPARE_FAD_VECTORS(C1, C2, m*n);
2748
2749 unsigned int sz = m*l + l*n*(1+ndot) + m*n*(1+ndot);
2750 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2751 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2752 &A[0], m, &B[0], l, beta, &C3[0], m);
2753
2754 COMPARE_FAD_VECTORS(C1, C3, m*n);
2755
2756 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2757 &a[0], m, &B[0], l, beta, &C4[0], m);
2758
2759 COMPARE_FAD_VECTORS(C1, C4, m*n);
2760
2761 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2762 &a[0], m, &B[0], l, beta, &C5[0], m);
2763
2764 COMPARE_FAD_VECTORS(C1, C5, m*n);
2765
2766 // transa
2767 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2768 &A[0], l, &B[0], l, beta, &C1[0], m);
2769 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2770 &A[0], l, &B[0], l, beta, &C2[0], m);
2771 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2772 &A[0], l, &B[0], l, beta, &C3[0], m);
2773 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2774 &a[0], l, &B[0], l, beta, &C4[0], m);
2775 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2776 &a[0], l, &B[0], l, beta, &C5[0], m);
2777
2778 COMPARE_FAD_VECTORS(C1, C2, m*n);
2779 COMPARE_FAD_VECTORS(C1, C3, m*n);
2780 COMPARE_FAD_VECTORS(C1, C4, m*n);
2781 COMPARE_FAD_VECTORS(C1, C5, m*n);
2782
2783 // transb
2784 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2785 &A[0], m, &B[0], n, beta, &C1[0], m);
2786 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2787 &A[0], m, &B[0], n, beta, &C2[0], m);
2788 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2789 &A[0], m, &B[0], n, beta, &C3[0], m);
2790 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2791 &a[0], m, &B[0], n, beta, &C4[0], m);
2792 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2793 &a[0], m, &B[0], n, beta, &C5[0], m);
2794
2795 COMPARE_FAD_VECTORS(C1, C2, m*n);
2796 COMPARE_FAD_VECTORS(C1, C3, m*n);
2797 COMPARE_FAD_VECTORS(C1, C4, m*n);
2798 COMPARE_FAD_VECTORS(C1, C5, m*n);
2799
2800 // transa and transb
2801 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2802 &A[0], l, &B[0], n, beta, &C1[0], m);
2803 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2804 &A[0], l, &B[0], n, beta, &C2[0], m);
2805 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2806 &A[0], l, &B[0], n, beta, &C3[0], m);
2807 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2808 &a[0], l, &B[0], n, beta, &C4[0], m);
2809 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2810 &a[0], l, &B[0], n, beta, &C5[0], m);
2811
2812 COMPARE_FAD_VECTORS(C1, C2, m*n);
2813 COMPARE_FAD_VECTORS(C1, C3, m*n);
2814 COMPARE_FAD_VECTORS(C1, C4, m*n);
2815 COMPARE_FAD_VECTORS(C1, C5, m*n);
2816}
2817
2818// Tests with constant B
2820 typedef decltype(this->fad) FadType;
2821 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2823 auto m = this->m_;
2824 auto n = this->n_;
2825 auto l = this->l_;
2826 auto ndot = this->ndot_;
2827
2828 VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2829 C4(m*n,ndot), C5(m*n,ndot);
2830 std::vector<ScalarType> b(l*n);
2831 for (unsigned int j=0; j<l; j++) {
2832 for (unsigned int i=0; i<m; i++) {
2833 A[i+j*m] = FadType(ndot, this->urand.number());
2834 for (unsigned int k=0; k<ndot; k++)
2835 A[i+j*m].fastAccessDx(k) = this->urand.number();
2836 }
2837 }
2838 for (unsigned int j=0; j<n; j++) {
2839 for (unsigned int i=0; i<l; i++) {
2840 b[i+j*l] = this->urand.number();
2841 B[i+j*l] = b[i+j*l];
2842 }
2843 }
2844 FadType alpha(ndot, this->urand.number());
2845 FadType beta(ndot, this->urand.number());
2846 for (unsigned int k=0; k<ndot; k++) {
2847 alpha.fastAccessDx(k) = this->urand.number();
2848 beta.fastAccessDx(k) = this->urand.number();
2849 }
2850
2851 for (unsigned int j=0; j<n; j++) {
2852 for (unsigned int i=0; i<m; i++) {
2853 ScalarType val = this->urand.number();
2854 C1[i+j*m] = FadType(ndot, val);
2855 C2[i+j*m] = FadType(ndot, val);
2856 C3[i+j*m] = FadType(ndot, val);
2857 C4[i+j*m] = FadType(ndot, val);
2858 C5[i+j*m] = FadType(ndot, val);
2859 for (unsigned int k=0; k<ndot; k++) {
2860 val = this->urand.number();
2861 C1[i+j*m].fastAccessDx(k) = val;
2862 C2[i+j*m].fastAccessDx(k) = val;
2863 C3[i+j*m].fastAccessDx(k) = val;
2864 C4[i+j*m].fastAccessDx(k) = val;
2865 C5[i+j*m].fastAccessDx(k) = val;
2866 }
2867 }
2868 }
2869
2870 Teuchos::BLAS<int,FadType> teuchos_blas;
2871 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2872 &A[0], m, &B[0], l, beta, &C1[0], m);
2873
2874 Teuchos::BLAS<int,FadType> sacado_blas(false);
2875 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2876 &A[0], m, &B[0], l, beta, &C2[0], m);
2877
2878 COMPARE_FAD_VECTORS(C1, C2, m*n);
2879
2880 unsigned int sz = m*l*(1+ndot) + l*n + m*n*(1+ndot);
2881 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
2882 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2883 &A[0], m, &B[0], l, beta, &C3[0], m);
2884
2885 COMPARE_FAD_VECTORS(C1, C3, m*n);
2886
2887 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2888 &A[0], m, &b[0], l, beta, &C4[0], m);
2889
2890 COMPARE_FAD_VECTORS(C1, C4, m*n);
2891
2892 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2893 &A[0], m, &b[0], l, beta, &C5[0], m);
2894
2895 COMPARE_FAD_VECTORS(C1, C5, m*n);
2896
2897 // transa
2898 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2899 &A[0], l, &B[0], l, beta, &C1[0], m);
2900 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2901 &A[0], l, &B[0], l, beta, &C2[0], m);
2902 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2903 &A[0], l, &B[0], l, beta, &C3[0], m);
2904 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2905 &A[0], l, &b[0], l, beta, &C4[0], m);
2906 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2907 &A[0], l, &b[0], l, beta, &C5[0], m);
2908
2909 COMPARE_FAD_VECTORS(C1, C2, m*n);
2910 COMPARE_FAD_VECTORS(C1, C3, m*n);
2911 COMPARE_FAD_VECTORS(C1, C4, m*n);
2912 COMPARE_FAD_VECTORS(C1, C5, m*n);
2913
2914 // transb
2915 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2916 &A[0], m, &B[0], n, beta, &C1[0], m);
2917 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2918 &A[0], m, &B[0], n, beta, &C2[0], m);
2919 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2920 &A[0], m, &B[0], n, beta, &C3[0], m);
2921 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2922 &A[0], m, &b[0], n, beta, &C4[0], m);
2923 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2924 &A[0], m, &b[0], n, beta, &C5[0], m);
2925
2926 COMPARE_FAD_VECTORS(C1, C2, m*n);
2927 COMPARE_FAD_VECTORS(C1, C3, m*n);
2928 COMPARE_FAD_VECTORS(C1, C4, m*n);
2929 COMPARE_FAD_VECTORS(C1, C5, m*n);
2930
2931 // transa and transb
2932 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2933 &A[0], l, &B[0], n, beta, &C1[0], m);
2934 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2935 &A[0], l, &B[0], n, beta, &C2[0], m);
2936 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2937 &A[0], l, &B[0], n, beta, &C3[0], m);
2938 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2939 &A[0], l, &b[0], n, beta, &C4[0], m);
2940 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2941 &A[0], l, &b[0], n, beta, &C5[0], m);
2942
2943 COMPARE_FAD_VECTORS(C1, C2, m*n);
2944 COMPARE_FAD_VECTORS(C1, C3, m*n);
2945 COMPARE_FAD_VECTORS(C1, C4, m*n);
2946 COMPARE_FAD_VECTORS(C1, C5, m*n);
2947}
2948
2949// Tests with constant A and B
2951 typedef decltype(this->fad) FadType;
2952 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
2954 auto m = this->m_;
2955 auto n = this->n_;
2956 auto l = this->l_;
2957 auto ndot = this->ndot_;
2958
2959 VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2960 C4(m*n,ndot), C5(m*n,ndot);
2961 std::vector<ScalarType> a(m*l), b(l*n);
2962 for (unsigned int j=0; j<l; j++) {
2963 for (unsigned int i=0; i<m; i++) {
2964 a[i+j*m] = this->urand.number();
2965 A[i+j*m] = a[i+j*m];
2966 }
2967 }
2968 for (unsigned int j=0; j<n; j++) {
2969 for (unsigned int i=0; i<l; i++) {
2970 b[i+j*l] = this->urand.number();
2971 B[i+j*l] = b[i+j*l];
2972 }
2973 }
2974 FadType alpha(ndot, this->urand.number());
2975 FadType beta(ndot, this->urand.number());
2976 for (unsigned int k=0; k<ndot; k++) {
2977 alpha.fastAccessDx(k) = this->urand.number();
2978 beta.fastAccessDx(k) = this->urand.number();
2979 }
2980
2981 for (unsigned int j=0; j<n; j++) {
2982 for (unsigned int i=0; i<m; i++) {
2983 ScalarType val = this->urand.number();
2984 C1[i+j*m] = FadType(ndot, val);
2985 C2[i+j*m] = FadType(ndot, val);
2986 C3[i+j*m] = FadType(ndot, val);
2987 C4[i+j*m] = FadType(ndot, val);
2988 C5[i+j*m] = FadType(ndot, val);
2989 for (unsigned int k=0; k<ndot; k++) {
2990 val = this->urand.number();
2991 C1[i+j*m].fastAccessDx(k) = val;
2992 C2[i+j*m].fastAccessDx(k) = val;
2993 C3[i+j*m].fastAccessDx(k) = val;
2994 C4[i+j*m].fastAccessDx(k) = val;
2995 C5[i+j*m].fastAccessDx(k) = val;
2996 }
2997 }
2998 }
2999
3000 Teuchos::BLAS<int,FadType> teuchos_blas;
3001 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3002 &A[0], m, &B[0], l, beta, &C1[0], m);
3003
3004 Teuchos::BLAS<int,FadType> sacado_blas(false);
3005 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3006 &A[0], m, &B[0], l, beta, &C2[0], m);
3007
3008 COMPARE_FAD_VECTORS(C1, C2, m*n);
3009
3010 unsigned int sz = m*l + l*n + m*n*(1+ndot);
3011 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3012 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3013 &A[0], m, &B[0], l, beta, &C3[0], m);
3014
3015 COMPARE_FAD_VECTORS(C1, C3, m*n);
3016
3017 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3018 &a[0], m, &b[0], l, beta, &C4[0], m);
3019
3020 COMPARE_FAD_VECTORS(C1, C4, m*n);
3021
3022 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3023 &a[0], m, &b[0], l, beta, &C5[0], m);
3024
3025 COMPARE_FAD_VECTORS(C1, C5, m*n);
3026
3027 // transa
3028 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3029 &A[0], l, &B[0], l, beta, &C1[0], m);
3030 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3031 &A[0], l, &B[0], l, beta, &C2[0], m);
3032 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3033 &A[0], l, &B[0], l, beta, &C3[0], m);
3034 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3035 &a[0], l, &b[0], l, beta, &C4[0], m);
3036 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3037 &a[0], l, &b[0], l, beta, &C5[0], m);
3038
3039 COMPARE_FAD_VECTORS(C1, C2, m*n);
3040 COMPARE_FAD_VECTORS(C1, C3, m*n);
3041 COMPARE_FAD_VECTORS(C1, C4, m*n);
3042 COMPARE_FAD_VECTORS(C1, C5, m*n);
3043
3044 // transb
3045 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3046 &A[0], m, &B[0], n, beta, &C1[0], m);
3047 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3048 &A[0], m, &B[0], n, beta, &C2[0], m);
3049 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3050 &A[0], m, &B[0], n, beta, &C3[0], m);
3051 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3052 &a[0], m, &b[0], n, beta, &C4[0], m);
3053 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3054 &a[0], m, &b[0], n, beta, &C5[0], m);
3055
3056 COMPARE_FAD_VECTORS(C1, C2, m*n);
3057 COMPARE_FAD_VECTORS(C1, C3, m*n);
3058 COMPARE_FAD_VECTORS(C1, C4, m*n);
3059 COMPARE_FAD_VECTORS(C1, C5, m*n);
3060
3061 // transa and transb
3062 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3063 &A[0], l, &B[0], n, beta, &C1[0], m);
3064 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3065 &A[0], l, &B[0], n, beta, &C2[0], m);
3066 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3067 &A[0], l, &B[0], n, beta, &C3[0], m);
3068 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3069 &a[0], l, &b[0], n, beta, &C4[0], m);
3070 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3071 &a[0], l, &b[0], n, beta, &C5[0], m);
3072
3073 COMPARE_FAD_VECTORS(C1, C2, m*n);
3074 COMPARE_FAD_VECTORS(C1, C3, m*n);
3075 COMPARE_FAD_VECTORS(C1, C4, m*n);
3076 COMPARE_FAD_VECTORS(C1, C5, m*n);
3077}
3078
3079// Tests all arguments, left side
3081 typedef decltype(this->fad) FadType;
3082 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3084 auto m = this->m_;
3085 auto n = this->n_;
3086 auto ndot = this->ndot_;
3087
3088
3089 // SYMM is apparently not defined in the BLAS for complex types
3090 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3091 return;
3092
3093 VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3094 for (unsigned int j=0; j<m; j++) {
3095 for (unsigned int i=0; i<m; i++) {
3096 A[i+j*m] = FadType(ndot, this->urand.number());
3097 for (unsigned int k=0; k<ndot; k++)
3098 A[i+j*m].fastAccessDx(k) = this->urand.number();
3099 }
3100 }
3101 for (unsigned int j=0; j<n; j++) {
3102 for (unsigned int i=0; i<m; i++) {
3103 B[i+j*m] = FadType(ndot, this->urand.number());
3104 for (unsigned int k=0; k<ndot; k++)
3105 B[i+j*m].fastAccessDx(k) = this->urand.number();
3106 }
3107 }
3108 FadType alpha(ndot, this->urand.number());
3109 FadType beta(ndot, this->urand.number());
3110 for (unsigned int k=0; k<ndot; k++) {
3111 alpha.fastAccessDx(k) = this->urand.number();
3112 beta.fastAccessDx(k) = this->urand.number();
3113 }
3114
3115 for (unsigned int j=0; j<n; j++) {
3116 for (unsigned int i=0; i<m; i++) {
3117 ScalarType val = this->urand.number();
3118 C1[i+j*m] = FadType(ndot, val);
3119 C2[i+j*m] = FadType(ndot, val);
3120 C3[i+j*m] = FadType(ndot, val);
3121 for (unsigned int k=0; k<ndot; k++) {
3122 val = this->urand.number();
3123 C1[i+j*m].fastAccessDx(k) = val;
3124 C2[i+j*m].fastAccessDx(k) = val;
3125 C3[i+j*m].fastAccessDx(k) = val;
3126 }
3127 }
3128 }
3129
3130 Teuchos::BLAS<int,FadType> teuchos_blas;
3131 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3132 &A[0], m, &B[0], m, beta, &C1[0], m);
3133
3134 Teuchos::BLAS<int,FadType> sacado_blas(false);
3135 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3136 &A[0], m, &B[0], m, beta, &C2[0], m);
3137
3138 COMPARE_FAD_VECTORS(C1, C2, m*n);
3139
3140 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3141 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3142 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3143 &A[0], m, &B[0], m, beta, &C3[0], m);
3144
3145 COMPARE_FAD_VECTORS(C1, C3, m*n);
3146
3147 // lower tri
3148 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3149 &A[0], m, &B[0], m, beta, &C1[0], m);
3150 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3151 &A[0], m, &B[0], m, beta, &C2[0], m);
3152 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3153 &A[0], m, &B[0], m, beta, &C3[0], m);
3154
3155 COMPARE_FAD_VECTORS(C1, C2, m*n);
3156 COMPARE_FAD_VECTORS(C1, C3, m*n);
3157}
3158
3159// Tests all arguments, right side
3161 typedef decltype(this->fad) FadType;
3162 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3164 auto m = this->m_;
3165 auto n = this->n_;
3166 auto ndot = this->ndot_;
3167
3168
3169 // SYMM is apparently not defined in the BLAS for complex types
3170 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3171 return;
3172
3173 VectorType A(n*n,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3174 for (unsigned int j=0; j<n; j++) {
3175 for (unsigned int i=0; i<n; i++) {
3176 A[i+j*n] = FadType(ndot, this->urand.number());
3177 for (unsigned int k=0; k<ndot; k++)
3178 A[i+j*n].fastAccessDx(k) = this->urand.number();
3179 }
3180 }
3181 for (unsigned int j=0; j<n; j++) {
3182 for (unsigned int i=0; i<m; i++) {
3183 B[i+j*m] = FadType(ndot, this->urand.number());
3184 for (unsigned int k=0; k<ndot; k++)
3185 B[i+j*m].fastAccessDx(k) = this->urand.number();
3186 }
3187 }
3188 FadType alpha(ndot, this->urand.number());
3189 FadType beta(ndot, this->urand.number());
3190 for (unsigned int k=0; k<ndot; k++) {
3191 alpha.fastAccessDx(k) = this->urand.number();
3192 beta.fastAccessDx(k) = this->urand.number();
3193 }
3194
3195 for (unsigned int j=0; j<n; j++) {
3196 for (unsigned int i=0; i<m; i++) {
3197 ScalarType val = this->urand.number();
3198 C1[i+j*m] = FadType(ndot, val);
3199 C2[i+j*m] = FadType(ndot, val);
3200 C3[i+j*m] = FadType(ndot, val);
3201 for (unsigned int k=0; k<ndot; k++) {
3202 val = this->urand.number();
3203 C1[i+j*m].fastAccessDx(k) = val;
3204 C2[i+j*m].fastAccessDx(k) = val;
3205 C3[i+j*m].fastAccessDx(k) = val;
3206 }
3207 }
3208 }
3209
3210 Teuchos::BLAS<int,FadType> teuchos_blas;
3211 teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3212 &A[0], n, &B[0], m, beta, &C1[0], m);
3213
3214 Teuchos::BLAS<int,FadType> sacado_blas(false);
3215 sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3216 &A[0], n, &B[0], m, beta, &C2[0], m);
3217
3218 COMPARE_FAD_VECTORS(C1, C2, m*n);
3219
3220 unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3221 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3222 sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3223 &A[0], n, &B[0], m, beta, &C3[0], m);
3224
3225 COMPARE_FAD_VECTORS(C1, C3, m*n);
3226
3227 // lower tri
3228 teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3229 &A[0], n, &B[0], m, beta, &C1[0], m);
3230 sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3231 &A[0], n, &B[0], m, beta, &C2[0], m);
3232 sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3233 &A[0], n, &B[0], m, beta, &C3[0], m);
3234
3235 COMPARE_FAD_VECTORS(C1, C2, m*n);
3236 COMPARE_FAD_VECTORS(C1, C3, m*n);
3237}
3238
3239// Tests different lda, ldb, ldc, left side
3241 typedef decltype(this->fad) FadType;
3242 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3244 auto m = this->m_;
3245 auto n = this->n_;
3246 auto ndot = this->ndot_;
3247
3248
3249 // SYMM is apparently not defined in the BLAS for complex types
3250 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3251 return;
3252
3253 unsigned int lda = m+4;
3254 unsigned int ldb = m+5;
3255 unsigned int ldc = m+6;
3256 VectorType A(lda*m,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3257 C3(ldc*n,ndot);
3258 for (unsigned int j=0; j<m; j++) {
3259 for (unsigned int i=0; i<lda; i++) {
3260 A[i+j*lda] = FadType(ndot, this->urand.number());
3261 for (unsigned int k=0; k<ndot; k++)
3262 A[i+j*lda].fastAccessDx(k) = this->urand.number();
3263 }
3264 }
3265 for (unsigned int j=0; j<n; j++) {
3266 for (unsigned int i=0; i<ldb; i++) {
3267 B[i+j*ldb] = FadType(ndot, this->urand.number());
3268 for (unsigned int k=0; k<ndot; k++)
3269 B[i+j*ldb].fastAccessDx(k) = this->urand.number();
3270 }
3271 }
3272 FadType alpha(ndot, this->urand.number());
3273 FadType beta(ndot, this->urand.number());
3274 for (unsigned int k=0; k<ndot; k++) {
3275 alpha.fastAccessDx(k) = this->urand.number();
3276 beta.fastAccessDx(k) = this->urand.number();
3277 }
3278
3279 for (unsigned int j=0; j<n; j++) {
3280 for (unsigned int i=0; i<ldc; i++) {
3281 ScalarType val = this->urand.number();
3282 C1[i+j*ldc] = FadType(ndot, val);
3283 C2[i+j*ldc] = FadType(ndot, val);
3284 C3[i+j*ldc] = FadType(ndot, val);
3285 for (unsigned int k=0; k<ndot; k++) {
3286 val = this->urand.number();
3287 C1[i+j*ldc].fastAccessDx(k) = val;
3288 C2[i+j*ldc].fastAccessDx(k) = val;
3289 C3[i+j*ldc].fastAccessDx(k) = val;
3290 }
3291 }
3292 }
3293
3294 Teuchos::BLAS<int,FadType> teuchos_blas;
3295 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3296 &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3297
3298 Teuchos::BLAS<int,FadType> sacado_blas(false);
3299 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3300 &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3301
3302 COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3303
3304 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3305 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3306 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3307 &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3308
3309 COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3310
3311 // lower tri
3312 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3313 &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3314 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3315 &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3316 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3317 &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3318
3319 COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3320 COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3321}
3322
3323// Tests different lda, ldb, ldc, right side
3325 typedef decltype(this->fad) FadType;
3326 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3328 auto m = this->m_;
3329 auto n = this->n_;
3330 auto ndot = this->ndot_;
3331
3332
3333 // SYMM is apparently not defined in the BLAS for complex types
3334 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3335 return;
3336
3337 unsigned int lda = n+4;
3338 unsigned int ldb = m+5;
3339 unsigned int ldc = m+6;
3340 VectorType A(lda*n,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3341 C3(ldc*n,ndot);
3342 for (unsigned int j=0; j<n; j++) {
3343 for (unsigned int i=0; i<lda; i++) {
3344 A[i+j*lda] = FadType(ndot, this->urand.number());
3345 for (unsigned int k=0; k<ndot; k++)
3346 A[i+j*lda].fastAccessDx(k) = this->urand.number();
3347 }
3348 }
3349 for (unsigned int j=0; j<n; j++) {
3350 for (unsigned int i=0; i<ldb; i++) {
3351 B[i+j*ldb] = FadType(ndot, this->urand.number());
3352 for (unsigned int k=0; k<ndot; k++)
3353 B[i+j*ldb].fastAccessDx(k) = this->urand.number();
3354 }
3355 }
3356 FadType alpha(ndot, this->urand.number());
3357 FadType beta(ndot, this->urand.number());
3358 for (unsigned int k=0; k<ndot; k++) {
3359 alpha.fastAccessDx(k) = this->urand.number();
3360 beta.fastAccessDx(k) = this->urand.number();
3361 }
3362
3363 for (unsigned int j=0; j<n; j++) {
3364 for (unsigned int i=0; i<ldc; i++) {
3365 ScalarType val = this->urand.number();
3366 C1[i+j*ldc] = FadType(ndot, val);
3367 C2[i+j*ldc] = FadType(ndot, val);
3368 C3[i+j*ldc] = FadType(ndot, val);
3369 for (unsigned int k=0; k<ndot; k++) {
3370 val = this->urand.number();
3371 C1[i+j*ldc].fastAccessDx(k) = val;
3372 C2[i+j*ldc].fastAccessDx(k) = val;
3373 C3[i+j*ldc].fastAccessDx(k) = val;
3374 }
3375 }
3376 }
3377
3378 Teuchos::BLAS<int,FadType> teuchos_blas;
3379 teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3380 &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3381
3382 Teuchos::BLAS<int,FadType> sacado_blas(false);
3383 sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3384 &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3385
3386 COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3387
3388 unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3389 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3390 sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3391 &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3392
3393 COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3394
3395 // lower tri
3396 teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3397 &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
3398 sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3399 &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
3400 sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3401 &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
3402
3403 COMPARE_FAD_VECTORS(C1, C2, ldc*n);
3404 COMPARE_FAD_VECTORS(C1, C3, ldc*n);
3405}
3406
3407// Tests with constant C
3409 typedef decltype(this->fad) FadType;
3410 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3412 auto m = this->m_;
3413 auto n = this->n_;
3414 auto ndot = this->ndot_;
3415
3416
3417 // SYMM is apparently not defined in the BLAS for complex types
3418 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3419 return;
3420
3421 VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3422 for (unsigned int j=0; j<m; j++) {
3423 for (unsigned int i=0; i<m; i++) {
3424 A[i+j*m] = FadType(ndot, this->urand.number());
3425 for (unsigned int k=0; k<ndot; k++)
3426 A[i+j*m].fastAccessDx(k) = this->urand.number();
3427 }
3428 }
3429 for (unsigned int j=0; j<n; j++) {
3430 for (unsigned int i=0; i<m; i++) {
3431 B[i+j*m] = FadType(ndot, this->urand.number());
3432 for (unsigned int k=0; k<ndot; k++)
3433 B[i+j*m].fastAccessDx(k) = this->urand.number();
3434 }
3435 }
3436 FadType alpha(ndot, this->urand.number());
3437 FadType beta(ndot, this->urand.number());
3438 for (unsigned int k=0; k<ndot; k++) {
3439 alpha.fastAccessDx(k) = this->urand.number();
3440 beta.fastAccessDx(k) = this->urand.number();
3441 }
3442
3443 for (unsigned int j=0; j<n; j++) {
3444 for (unsigned int i=0; i<m; i++) {
3445 ScalarType val = this->urand.number();
3446 C1[i+j*m] = val;
3447 C2[i+j*m] = val;
3448 C3[i+j*m] = val;
3449 }
3450 }
3451
3452 Teuchos::BLAS<int,FadType> teuchos_blas;
3453 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3454 &A[0], m, &B[0], m, beta, &C1[0], m);
3455
3456 Teuchos::BLAS<int,FadType> sacado_blas(false);
3457 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3458 &A[0], m, &B[0], m, beta, &C2[0], m);
3459
3460 COMPARE_FAD_VECTORS(C1, C2, m*n);
3461
3462 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3463 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3464 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3465 &A[0], m, &B[0], m, beta, &C3[0], m);
3466
3467 COMPARE_FAD_VECTORS(C1, C3, m*n);
3468
3469 // lower tri
3470 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3471 &A[0], m, &B[0], m, beta, &C1[0], m);
3472 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3473 &A[0], m, &B[0], m, beta, &C2[0], m);
3474 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3475 &A[0], m, &B[0], m, beta, &C3[0], m);
3476
3477 COMPARE_FAD_VECTORS(C1, C2, m*n);
3478 COMPARE_FAD_VECTORS(C1, C3, m*n);
3479}
3480
3481// Tests with constant alpha, beta
3483 typedef decltype(this->fad) FadType;
3484 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3486 auto m = this->m_;
3487 auto n = this->n_;
3488 auto ndot = this->ndot_;
3489
3490
3491 // SYMM is apparently not defined in the BLAS for complex types
3492 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3493 return;
3494
3495 VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3496 for (unsigned int j=0; j<m; j++) {
3497 for (unsigned int i=0; i<m; i++) {
3498 A[i+j*m] = FadType(ndot, this->urand.number());
3499 for (unsigned int k=0; k<ndot; k++)
3500 A[i+j*m].fastAccessDx(k) = this->urand.number();
3501 }
3502 }
3503 for (unsigned int j=0; j<n; j++) {
3504 for (unsigned int i=0; i<m; i++) {
3505 B[i+j*m] = FadType(ndot, this->urand.number());
3506 for (unsigned int k=0; k<ndot; k++)
3507 B[i+j*m].fastAccessDx(k) = this->urand.number();
3508 }
3509 }
3510 ScalarType alpha = this->urand.number();
3511 ScalarType beta = this->urand.number();
3512
3513 for (unsigned int j=0; j<n; j++) {
3514 for (unsigned int i=0; i<m; i++) {
3515 ScalarType val = this->urand.number();
3516 C1[i+j*m] = FadType(ndot, val);
3517 C2[i+j*m] = FadType(ndot, val);
3518 C3[i+j*m] = FadType(ndot, val);
3519 for (unsigned int k=0; k<ndot; k++) {
3520 val = this->urand.number();
3521 C1[i+j*m].fastAccessDx(k) = val;
3522 C2[i+j*m].fastAccessDx(k) = val;
3523 C3[i+j*m].fastAccessDx(k) = val;
3524 }
3525 }
3526 }
3527
3528 Teuchos::BLAS<int,FadType> teuchos_blas;
3529 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3530 &A[0], m, &B[0], m, beta, &C1[0], m);
3531
3532 Teuchos::BLAS<int,FadType> sacado_blas(false);
3533 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3534 &A[0], m, &B[0], m, beta, &C2[0], m);
3535
3536 COMPARE_FAD_VECTORS(C1, C2, m*n);
3537
3538 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3539 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3540 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3541 &A[0], m, &B[0], m, beta, &C3[0], m);
3542
3543 COMPARE_FAD_VECTORS(C1, C3, m*n);
3544
3545 // lower tri
3546 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3547 &A[0], m, &B[0], m, beta, &C1[0], m);
3548 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3549 &A[0], m, &B[0], m, beta, &C2[0], m);
3550 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3551 &A[0], m, &B[0], m, beta, &C3[0], m);
3552
3553 COMPARE_FAD_VECTORS(C1, C2, m*n);
3554 COMPARE_FAD_VECTORS(C1, C3, m*n);
3555}
3556
3557// Tests with constant A
3559 typedef decltype(this->fad) FadType;
3560 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3562 auto m = this->m_;
3563 auto n = this->n_;
3564 auto ndot = this->ndot_;
3565
3566
3567 // SYMM is apparently not defined in the BLAS for complex types
3568 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3569 return;
3570
3571 VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3572 C4(m*n,ndot), C5(m*n,ndot);
3573 std::vector<ScalarType> a(m*m);
3574 for (unsigned int j=0; j<m; j++) {
3575 for (unsigned int i=0; i<m; i++) {
3576 a[i+j*m] = this->urand.number();
3577 A[i+j*m] = a[i+j*m];
3578 }
3579 }
3580 for (unsigned int j=0; j<n; j++) {
3581 for (unsigned int i=0; i<m; i++) {
3582 B[i+j*m] = FadType(ndot, this->urand.number());
3583 for (unsigned int k=0; k<ndot; k++)
3584 B[i+j*m].fastAccessDx(k) = this->urand.number();
3585 }
3586 }
3587 FadType alpha(ndot, this->urand.number());
3588 FadType beta(ndot, this->urand.number());
3589 for (unsigned int k=0; k<ndot; k++) {
3590 alpha.fastAccessDx(k) = this->urand.number();
3591 beta.fastAccessDx(k) = this->urand.number();
3592 }
3593
3594 for (unsigned int j=0; j<n; j++) {
3595 for (unsigned int i=0; i<m; i++) {
3596 ScalarType val = this->urand.number();
3597 C1[i+j*m] = FadType(ndot, val);
3598 C2[i+j*m] = FadType(ndot, val);
3599 C3[i+j*m] = FadType(ndot, val);
3600 C4[i+j*m] = FadType(ndot, val);
3601 C5[i+j*m] = FadType(ndot, val);
3602 for (unsigned int k=0; k<ndot; k++) {
3603 val = this->urand.number();
3604 C1[i+j*m].fastAccessDx(k) = val;
3605 C2[i+j*m].fastAccessDx(k) = val;
3606 C3[i+j*m].fastAccessDx(k) = val;
3607 C4[i+j*m].fastAccessDx(k) = val;
3608 C5[i+j*m].fastAccessDx(k) = val;
3609 }
3610 }
3611 }
3612
3613 Teuchos::BLAS<int,FadType> teuchos_blas;
3614 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3615 &A[0], m, &B[0], m, beta, &C1[0], m);
3616
3617 Teuchos::BLAS<int,FadType> sacado_blas(false);
3618 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3619 &A[0], m, &B[0], m, beta, &C2[0], m);
3620
3621 COMPARE_FAD_VECTORS(C1, C2, m*n);
3622
3623 unsigned int sz = m*m + 2*m*n*(1+ndot);
3624 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3625 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3626 &A[0], m, &B[0], m, beta, &C3[0], m);
3627
3628 COMPARE_FAD_VECTORS(C1, C3, m*n);
3629
3630 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3631 &a[0], m, &B[0], m, beta, &C4[0], m);
3632
3633 COMPARE_FAD_VECTORS(C1, C4, m*n);
3634
3635 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3636 &a[0], m, &B[0], m, beta, &C5[0], m);
3637
3638 COMPARE_FAD_VECTORS(C1, C5, m*n);
3639
3640 // lower tri
3641 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3642 &A[0], m, &B[0], m, beta, &C1[0], m);
3643 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3644 &A[0], m, &B[0], m, beta, &C2[0], m);
3645 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3646 &A[0], m, &B[0], m, beta, &C3[0], m);
3647 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3648 &a[0], m, &B[0], m, beta, &C4[0], m);
3649 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3650 &a[0], m, &B[0], m, beta, &C5[0], m);
3651
3652 COMPARE_FAD_VECTORS(C1, C2, m*n);
3653 COMPARE_FAD_VECTORS(C1, C3, m*n);
3654 COMPARE_FAD_VECTORS(C1, C4, m*n);
3655 COMPARE_FAD_VECTORS(C1, C5, m*n);
3656}
3657
3658// Tests with constant B
3660 typedef decltype(this->fad) FadType;
3661 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3663 auto m = this->m_;
3664 auto n = this->n_;
3665 auto ndot = this->ndot_;
3666
3667
3668 // SYMM is apparently not defined in the BLAS for complex types
3669 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3670 return;
3671
3672 VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3673 C4(m*n,ndot), C5(m*n,ndot);
3674 std::vector<ScalarType> b(m*n);
3675 for (unsigned int j=0; j<m; j++) {
3676 for (unsigned int i=0; i<m; i++) {
3677 A[i+j*m] = FadType(ndot, this->urand.number());
3678 for (unsigned int k=0; k<ndot; k++)
3679 A[i+j*m].fastAccessDx(k) = this->urand.number();
3680 }
3681 }
3682 for (unsigned int j=0; j<n; j++) {
3683 for (unsigned int i=0; i<m; i++) {
3684 b[i+j*m] = this->urand.number();
3685 B[i+j*m] = b[i+j*m];
3686 }
3687 }
3688 FadType alpha(ndot, this->urand.number());
3689 FadType beta(ndot, this->urand.number());
3690 for (unsigned int k=0; k<ndot; k++) {
3691 alpha.fastAccessDx(k) = this->urand.number();
3692 beta.fastAccessDx(k) = this->urand.number();
3693 }
3694
3695 for (unsigned int j=0; j<n; j++) {
3696 for (unsigned int i=0; i<m; i++) {
3697 ScalarType val = this->urand.number();
3698 C1[i+j*m] = FadType(ndot, val);
3699 C2[i+j*m] = FadType(ndot, val);
3700 C3[i+j*m] = FadType(ndot, val);
3701 C4[i+j*m] = FadType(ndot, val);
3702 C5[i+j*m] = FadType(ndot, val);
3703 for (unsigned int k=0; k<ndot; k++) {
3704 val = this->urand.number();
3705 C1[i+j*m].fastAccessDx(k) = val;
3706 C2[i+j*m].fastAccessDx(k) = val;
3707 C3[i+j*m].fastAccessDx(k) = val;
3708 C4[i+j*m].fastAccessDx(k) = val;
3709 C5[i+j*m].fastAccessDx(k) = val;
3710 }
3711 }
3712 }
3713
3714 Teuchos::BLAS<int,FadType> teuchos_blas;
3715 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3716 &A[0], m, &B[0], m, beta, &C1[0], m);
3717
3718 Teuchos::BLAS<int,FadType> sacado_blas(false);
3719 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3720 &A[0], m, &B[0], m, beta, &C2[0], m);
3721
3722 COMPARE_FAD_VECTORS(C1, C2, m*n);
3723
3724 unsigned int sz = m*m*(1+ndot) + m*n*(2+ndot);
3725 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3726 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3727 &A[0], m, &B[0], m, beta, &C3[0], m);
3728
3729 COMPARE_FAD_VECTORS(C1, C3, m*n);
3730
3731 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3732 &A[0], m, &b[0], m, beta, &C4[0], m);
3733
3734 COMPARE_FAD_VECTORS(C1, C4, m*n);
3735
3736 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3737 &A[0], m, &b[0], m, beta, &C5[0], m);
3738
3739 COMPARE_FAD_VECTORS(C1, C5, m*n);
3740
3741 // lower tri
3742 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3743 &A[0], m, &B[0], m, beta, &C1[0], m);
3744 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3745 &A[0], m, &B[0], m, beta, &C2[0], m);
3746 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3747 &A[0], m, &B[0], m, beta, &C3[0], m);
3748 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3749 &A[0], m, &b[0], m, beta, &C4[0], m);
3750 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3751 &A[0], m, &b[0], m, beta, &C5[0], m);
3752
3753 COMPARE_FAD_VECTORS(C1, C2, m*n);
3754 COMPARE_FAD_VECTORS(C1, C3, m*n);
3755 COMPARE_FAD_VECTORS(C1, C4, m*n);
3756 COMPARE_FAD_VECTORS(C1, C5, m*n);
3757}
3758
3759// Tests with constant A and B
3761 typedef decltype(this->fad) FadType;
3762 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3764 auto m = this->m_;
3765 auto n = this->n_;
3766 auto ndot = this->ndot_;
3767
3768
3769 // SYMM is apparently not defined in the BLAS for complex types
3770 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3771 return;
3772
3773 VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3774 C4(m*n,ndot), C5(m*n,ndot);
3775 std::vector<ScalarType> a(m*m), b(m*n);
3776 for (unsigned int j=0; j<m; j++) {
3777 for (unsigned int i=0; i<m; i++) {
3778 a[i+j*m] = this->urand.number();
3779 A[i+j*m] = a[i+j*m];
3780 }
3781 }
3782 for (unsigned int j=0; j<n; j++) {
3783 for (unsigned int i=0; i<m; i++) {
3784 b[i+j*m] = this->urand.number();
3785 B[i+j*m] = b[i+j*m];
3786 }
3787 }
3788 FadType alpha(ndot, this->urand.number());
3789 FadType beta(ndot, this->urand.number());
3790 for (unsigned int k=0; k<ndot; k++) {
3791 alpha.fastAccessDx(k) = this->urand.number();
3792 beta.fastAccessDx(k) = this->urand.number();
3793 }
3794
3795 for (unsigned int j=0; j<n; j++) {
3796 for (unsigned int i=0; i<m; i++) {
3797 ScalarType val = this->urand.number();
3798 C1[i+j*m] = FadType(ndot, val);
3799 C2[i+j*m] = FadType(ndot, val);
3800 C3[i+j*m] = FadType(ndot, val);
3801 C4[i+j*m] = FadType(ndot, val);
3802 C5[i+j*m] = FadType(ndot, val);
3803 for (unsigned int k=0; k<ndot; k++) {
3804 val = this->urand.number();
3805 C1[i+j*m].fastAccessDx(k) = val;
3806 C2[i+j*m].fastAccessDx(k) = val;
3807 C3[i+j*m].fastAccessDx(k) = val;
3808 C4[i+j*m].fastAccessDx(k) = val;
3809 C5[i+j*m].fastAccessDx(k) = val;
3810 }
3811 }
3812 }
3813
3814 Teuchos::BLAS<int,FadType> teuchos_blas;
3815 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3816 &A[0], m, &B[0], m, beta, &C1[0], m);
3817
3818 Teuchos::BLAS<int,FadType> sacado_blas(false);
3819 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3820 &A[0], m, &B[0], m, beta, &C2[0], m);
3821
3822 COMPARE_FAD_VECTORS(C1, C2, m*n);
3823
3824 unsigned int sz = m*m + m*n*(2+ndot);
3825 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3826 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3827 &A[0], m, &B[0], m, beta, &C3[0], m);
3828
3829 COMPARE_FAD_VECTORS(C1, C3, m*n);
3830
3831 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3832 &a[0], m, &b[0], m, beta, &C4[0], m);
3833
3834 COMPARE_FAD_VECTORS(C1, C4, m*n);
3835
3836 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3837 &a[0], m, &b[0], m, beta, &C5[0], m);
3838
3839 COMPARE_FAD_VECTORS(C1, C5, m*n);
3840
3841 // lower tri
3842 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3843 &A[0], m, &B[0], m, beta, &C1[0], m);
3844 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3845 &A[0], m, &B[0], m, beta, &C2[0], m);
3846 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3847 &A[0], m, &B[0], m, beta, &C3[0], m);
3848 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3849 &a[0], m, &b[0], m, beta, &C4[0], m);
3850 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3851 &a[0], m, &b[0], m, beta, &C5[0], m);
3852
3853 COMPARE_FAD_VECTORS(C1, C2, m*n);
3854 COMPARE_FAD_VECTORS(C1, C3, m*n);
3855 COMPARE_FAD_VECTORS(C1, C4, m*n);
3856 COMPARE_FAD_VECTORS(C1, C5, m*n);
3857}
3858
3859// Tests all arguments, left side
3861 typedef decltype(this->fad) FadType;
3862 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3864 auto m = this->m_;
3865 auto n = this->n_;
3866 auto ndot = this->ndot_;
3867
3868 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3869 for (unsigned int j=0; j<m; j++) {
3870 for (unsigned int i=0; i<m; i++) {
3871 A[i+j*m] = FadType(ndot, this->urand.number());
3872 for (unsigned int k=0; k<ndot; k++)
3873 A[i+j*m].fastAccessDx(k) = this->urand.number();
3874 }
3875 }
3876 FadType alpha(ndot, this->urand.number());
3877 for (unsigned int k=0; k<ndot; k++) {
3878 alpha.fastAccessDx(k) = this->urand.number();
3879 }
3880
3881 for (unsigned int j=0; j<n; j++) {
3882 for (unsigned int i=0; i<m; i++) {
3883 ScalarType val = this->urand.number();
3884 B1[i+j*m] = FadType(ndot, val);
3885 B2[i+j*m] = FadType(ndot, val);
3886 B3[i+j*m] = FadType(ndot, val);
3887 for (unsigned int k=0; k<ndot; k++) {
3888 val = this->urand.number();
3889 B1[i+j*m].fastAccessDx(k) = val;
3890 B2[i+j*m].fastAccessDx(k) = val;
3891 B3[i+j*m].fastAccessDx(k) = val;
3892 }
3893 }
3894 }
3895
3896 Teuchos::BLAS<int,FadType> teuchos_blas;
3897 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3898 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3899
3900 Teuchos::BLAS<int,FadType> sacado_blas(false);
3901 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3902 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3903
3904 COMPARE_FAD_VECTORS(B1, B2, m*n);
3905
3906 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
3907 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3908 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3909 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3910
3911 COMPARE_FAD_VECTORS(B1, B3, m*n);
3912
3913 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
3914 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3915 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
3916 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3917 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
3918 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3919 COMPARE_FAD_VECTORS(B1, B2, m*n);
3920 COMPARE_FAD_VECTORS(B1, B3, m*n);
3921
3922 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
3923 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3924 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
3925 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3926 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
3927 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3928 COMPARE_FAD_VECTORS(B1, B2, m*n);
3929 COMPARE_FAD_VECTORS(B1, B3, m*n);
3930
3931 for (unsigned int i=0; i<m; i++) {
3932 A[i*m+i].val() = 1.0;
3933 for (unsigned int k=0; k<ndot; k++)
3934 A[i*m+i].fastAccessDx(k) = 0.0;
3935 }
3936 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3937 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
3938 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3939 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
3940 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3941 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
3942 COMPARE_FAD_VECTORS(B1, B2, m*n);
3943 COMPARE_FAD_VECTORS(B1, B3, m*n);
3944}
3945
3946// Tests all arguments, right side
3948 typedef decltype(this->fad) FadType;
3949 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
3951 auto m = this->m_;
3952 auto n = this->n_;
3953 auto ndot = this->ndot_;
3954
3955 VectorType A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3956 for (unsigned int j=0; j<n; j++) {
3957 for (unsigned int i=0; i<n; i++) {
3958 A[i+j*n] = FadType(ndot, this->urand.number());
3959 for (unsigned int k=0; k<ndot; k++)
3960 A[i+j*n].fastAccessDx(k) = this->urand.number();
3961 }
3962 }
3963 FadType alpha(ndot, this->urand.number());
3964 for (unsigned int k=0; k<ndot; k++) {
3965 alpha.fastAccessDx(k) = this->urand.number();
3966 }
3967
3968 for (unsigned int j=0; j<n; j++) {
3969 for (unsigned int i=0; i<m; i++) {
3970 ScalarType val = this->urand.number();
3971 B1[i+j*m] = FadType(ndot, val);
3972 B2[i+j*m] = FadType(ndot, val);
3973 B3[i+j*m] = FadType(ndot, val);
3974 for (unsigned int k=0; k<ndot; k++) {
3975 val = this->urand.number();
3976 B1[i+j*m].fastAccessDx(k) = val;
3977 B2[i+j*m].fastAccessDx(k) = val;
3978 B3[i+j*m].fastAccessDx(k) = val;
3979 }
3980 }
3981 }
3982
3983 Teuchos::BLAS<int,FadType> teuchos_blas;
3984 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3985 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
3986
3987 Teuchos::BLAS<int,FadType> sacado_blas(false);
3988 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3989 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
3990
3991 COMPARE_FAD_VECTORS(B1, B2, m*n);
3992
3993 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
3994 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
3995 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3996 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
3997
3998 COMPARE_FAD_VECTORS(B1, B3, m*n);
3999
4000 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4001 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4002 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4003 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4004 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4005 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4006 COMPARE_FAD_VECTORS(B1, B2, m*n);
4007 COMPARE_FAD_VECTORS(B1, B3, m*n);
4008
4009 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4010 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4011 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4012 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4013 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4014 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4015 COMPARE_FAD_VECTORS(B1, B2, m*n);
4016 COMPARE_FAD_VECTORS(B1, B3, m*n);
4017
4018 for (unsigned int i=0; i<n; i++) {
4019 A[i*n+i].val() = 1.0;
4020 for (unsigned int k=0; k<ndot; k++)
4021 A[i*n+i].fastAccessDx(k) = 0.0;
4022 }
4023 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4024 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4025 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4026 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4027 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4028 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4029 COMPARE_FAD_VECTORS(B1, B2, m*n);
4030 COMPARE_FAD_VECTORS(B1, B3, m*n);
4031}
4032
4033// Tests all arguments, left side, different lda, ldb
4035 typedef decltype(this->fad) FadType;
4036 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4038 auto m = this->m_;
4039 auto n = this->n_;
4040 auto ndot = this->ndot_;
4041
4042 unsigned int lda = m+4;
4043 unsigned int ldb = m+5;
4044 VectorType A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4045 for (unsigned int j=0; j<m; j++) {
4046 for (unsigned int i=0; i<lda; i++) {
4047 A[i+j*lda] = FadType(ndot, this->urand.number());
4048 for (unsigned int k=0; k<ndot; k++)
4049 A[i+j*lda].fastAccessDx(k) = this->urand.number();
4050 }
4051 }
4052 FadType alpha(ndot, this->urand.number());
4053 for (unsigned int k=0; k<ndot; k++) {
4054 alpha.fastAccessDx(k) = this->urand.number();
4055 }
4056
4057 for (unsigned int j=0; j<n; j++) {
4058 for (unsigned int i=0; i<ldb; i++) {
4059 ScalarType val = this->urand.number();
4060 B1[i+j*ldb] = FadType(ndot, val);
4061 B2[i+j*ldb] = FadType(ndot, val);
4062 B3[i+j*ldb] = FadType(ndot, val);
4063 for (unsigned int k=0; k<ndot; k++) {
4064 val = this->urand.number();
4065 B1[i+j*ldb].fastAccessDx(k) = val;
4066 B2[i+j*ldb].fastAccessDx(k) = val;
4067 B3[i+j*ldb].fastAccessDx(k) = val;
4068 }
4069 }
4070 }
4071
4072 Teuchos::BLAS<int,FadType> teuchos_blas;
4073 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4074 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4075
4076 Teuchos::BLAS<int,FadType> sacado_blas(false);
4077 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4078 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4079
4080 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4081
4082 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4083 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4084 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4085 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4086
4087 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4088
4089 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4090 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4091 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4092 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4093 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4094 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4095 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4096 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4097
4098 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4099 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4100 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4101 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4102 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4103 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4104 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4105 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4106
4107 for (unsigned int i=0; i<m; i++) {
4108 A[i*lda+i].val() = 1.0;
4109 for (unsigned int k=0; k<ndot; k++)
4110 A[i*lda+i].fastAccessDx(k) = 0.0;
4111 }
4112 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4113 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4114 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4115 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4116 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4117 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4118 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4119 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4120}
4121
4122// Tests all arguments, right side, different lda, ldb
4124 typedef decltype(this->fad) FadType;
4125 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4127 auto m = this->m_;
4128 auto n = this->n_;
4129 auto ndot = this->ndot_;
4130
4131 unsigned int lda = n+4;
4132 unsigned int ldb = m+5;
4133 VectorType A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4134 for (unsigned int j=0; j<n; j++) {
4135 for (unsigned int i=0; i<lda; i++) {
4136 A[i+j*lda] = FadType(ndot, this->urand.number());
4137 for (unsigned int k=0; k<ndot; k++)
4138 A[i+j*lda].fastAccessDx(k) = this->urand.number();
4139 }
4140 }
4141 FadType alpha(ndot, this->urand.number());
4142 for (unsigned int k=0; k<ndot; k++) {
4143 alpha.fastAccessDx(k) = this->urand.number();
4144 }
4145
4146 for (unsigned int j=0; j<n; j++) {
4147 for (unsigned int i=0; i<ldb; i++) {
4148 ScalarType val = this->urand.number();
4149 B1[i+j*ldb] = FadType(ndot, val);
4150 B2[i+j*ldb] = FadType(ndot, val);
4151 B3[i+j*ldb] = FadType(ndot, val);
4152 for (unsigned int k=0; k<ndot; k++) {
4153 val = this->urand.number();
4154 B1[i+j*ldb].fastAccessDx(k) = val;
4155 B2[i+j*ldb].fastAccessDx(k) = val;
4156 B3[i+j*ldb].fastAccessDx(k) = val;
4157 }
4158 }
4159 }
4160
4161 Teuchos::BLAS<int,FadType> teuchos_blas;
4162 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4163 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4164
4165 Teuchos::BLAS<int,FadType> sacado_blas(false);
4166 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4167 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4168
4169 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4170
4171 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4172 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4173 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4174 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4175
4176 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4177
4178 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4179 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4180 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4181 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4182 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4183 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4184 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4185 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4186
4187 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4188 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4189 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4190 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4191 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4192 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4193 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4194 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4195
4196 for (unsigned int i=0; i<n; i++) {
4197 A[i*lda+i].val() = 1.0;
4198 for (unsigned int k=0; k<ndot; k++)
4199 A[i*lda+i].fastAccessDx(k) = 0.0;
4200 }
4201 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4202 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4203 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4204 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4205 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4206 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4207 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4208 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4209}
4210
4211// Tests constant alpha
4213 typedef decltype(this->fad) FadType;
4214 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4216 auto m = this->m_;
4217 auto n = this->n_;
4218 auto ndot = this->ndot_;
4219
4220 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4221 for (unsigned int j=0; j<m; j++) {
4222 for (unsigned int i=0; i<m; i++) {
4223 A[i+j*m] = FadType(ndot, this->urand.number());
4224 for (unsigned int k=0; k<ndot; k++)
4225 A[i+j*m].fastAccessDx(k) = this->urand.number();
4226 }
4227 }
4228 ScalarType alpha = this->urand.number();
4229
4230 for (unsigned int j=0; j<n; j++) {
4231 for (unsigned int i=0; i<m; i++) {
4232 ScalarType val = this->urand.number();
4233 B1[i+j*m] = FadType(ndot, val);
4234 B2[i+j*m] = FadType(ndot, val);
4235 B3[i+j*m] = FadType(ndot, val);
4236 for (unsigned int k=0; k<ndot; k++) {
4237 val = this->urand.number();
4238 B1[i+j*m].fastAccessDx(k) = val;
4239 B2[i+j*m].fastAccessDx(k) = val;
4240 B3[i+j*m].fastAccessDx(k) = val;
4241 }
4242 }
4243 }
4244
4245 Teuchos::BLAS<int,FadType> teuchos_blas;
4246 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4247 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4248
4249 Teuchos::BLAS<int,FadType> sacado_blas(false);
4250 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4251 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4252
4253 COMPARE_FAD_VECTORS(B1, B2, m*n);
4254
4255 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4256 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4257 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4258 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4259
4260 COMPARE_FAD_VECTORS(B1, B3, m*n);
4261
4262 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4263 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4264 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4265 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4266 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4267 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4268 COMPARE_FAD_VECTORS(B1, B2, m*n);
4269 COMPARE_FAD_VECTORS(B1, B3, m*n);
4270
4271 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4272 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4273 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4274 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4275 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4276 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4277 COMPARE_FAD_VECTORS(B1, B2, m*n);
4278 COMPARE_FAD_VECTORS(B1, B3, m*n);
4279
4280 for (unsigned int i=0; i<m; i++) {
4281 A[i*m+i].val() = 1.0;
4282 for (unsigned int k=0; k<ndot; k++)
4283 A[i*m+i].fastAccessDx(k) = 0.0;
4284 }
4285 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4286 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4287 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4288 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4289 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4290 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4291 COMPARE_FAD_VECTORS(B1, B2, m*n);
4292 COMPARE_FAD_VECTORS(B1, B3, m*n);
4293}
4294
4295// Tests constant B
4297 typedef decltype(this->fad) FadType;
4298 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4300 auto m = this->m_;
4301 auto n = this->n_;
4302 auto ndot = this->ndot_;
4303
4304 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4305 for (unsigned int j=0; j<m; j++) {
4306 for (unsigned int i=0; i<m; i++) {
4307 A[i+j*m] = FadType(ndot, this->urand.number());
4308 for (unsigned int k=0; k<ndot; k++)
4309 A[i+j*m].fastAccessDx(k) = this->urand.number();
4310 }
4311 }
4312 FadType alpha(ndot, this->urand.number());
4313 for (unsigned int k=0; k<ndot; k++) {
4314 alpha.fastAccessDx(k) = this->urand.number();
4315 }
4316
4317 for (unsigned int j=0; j<n; j++) {
4318 for (unsigned int i=0; i<m; i++) {
4319 ScalarType val = this->urand.number();
4320 B1[i+j*m] = val;
4321 B2[i+j*m] = val;
4322 B3[i+j*m] = val;
4323 }
4324 }
4325
4326 Teuchos::BLAS<int,FadType> teuchos_blas;
4327 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4328 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4329
4330 Teuchos::BLAS<int,FadType> sacado_blas(false);
4331 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4332 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4333
4334 COMPARE_FAD_VECTORS(B1, B2, m*n);
4335
4336 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4337 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4338 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4339 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4340
4341 COMPARE_FAD_VECTORS(B1, B3, m*n);
4342
4343 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4344 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4345 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4346 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4347 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4348 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4349 COMPARE_FAD_VECTORS(B1, B2, m*n);
4350 COMPARE_FAD_VECTORS(B1, B3, m*n);
4351
4352 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4353 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4354 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4355 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4356 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4357 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4358 COMPARE_FAD_VECTORS(B1, B2, m*n);
4359 COMPARE_FAD_VECTORS(B1, B3, m*n);
4360
4361 for (unsigned int i=0; i<m; i++) {
4362 A[i*m+i].val() = 1.0;
4363 for (unsigned int k=0; k<ndot; k++)
4364 A[i*m+i].fastAccessDx(k) = 0.0;
4365 }
4366 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4367 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4368 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4369 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4370 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4371 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4372 COMPARE_FAD_VECTORS(B1, B2, m*n);
4373 COMPARE_FAD_VECTORS(B1, B3, m*n);
4374}
4375
4376// Tests constant A
4378 typedef decltype(this->fad) FadType;
4379 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4381 auto m = this->m_;
4382 auto n = this->n_;
4383 auto ndot = this->ndot_;
4384
4385 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
4386 B4(m*n,ndot), B5(m*n,ndot);
4387 std::vector<ScalarType> a(m*m);
4388 for (unsigned int j=0; j<m; j++) {
4389 for (unsigned int i=0; i<m; i++) {
4390 a[i+j*m] = this->urand.number();
4391 A[i+j*m] = a[i+j*m];
4392 }
4393 }
4394 FadType alpha(ndot, this->urand.number());
4395 for (unsigned int k=0; k<ndot; k++) {
4396 alpha.fastAccessDx(k) = this->urand.number();
4397 }
4398
4399 for (unsigned int j=0; j<n; j++) {
4400 for (unsigned int i=0; i<m; i++) {
4401 ScalarType val = this->urand.number();
4402 B1[i+j*m] = FadType(ndot, val);
4403 B2[i+j*m] = FadType(ndot, val);
4404 B3[i+j*m] = FadType(ndot, val);
4405 B4[i+j*m] = FadType(ndot, val);
4406 B5[i+j*m] = FadType(ndot, val);
4407 for (unsigned int k=0; k<ndot; k++) {
4408 val = this->urand.number();
4409 B1[i+j*m].fastAccessDx(k) = val;
4410 B2[i+j*m].fastAccessDx(k) = val;
4411 B3[i+j*m].fastAccessDx(k) = val;
4412 B4[i+j*m].fastAccessDx(k) = val;
4413 B5[i+j*m].fastAccessDx(k) = val;
4414 }
4415 }
4416 }
4417
4418 Teuchos::BLAS<int,FadType> teuchos_blas;
4419 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4420 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4421
4422 Teuchos::BLAS<int,FadType> sacado_blas(false);
4423 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4424 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4425
4426 COMPARE_FAD_VECTORS(B1, B2, m*n);
4427
4428 unsigned int sz = m*m + m*n*(1+ndot);
4429 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4430 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4431 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4432
4433 COMPARE_FAD_VECTORS(B1, B3, m*n);
4434
4435 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4436 Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4437
4438 COMPARE_FAD_VECTORS(B1, B4, m*n);
4439
4440 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4441 Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4442
4443 COMPARE_FAD_VECTORS(B1, B5, m*n);
4444
4445 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4446 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4447 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4448 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4449 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4450 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4451 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4452 Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4453 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4454 Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4455 COMPARE_FAD_VECTORS(B1, B2, m*n);
4456 COMPARE_FAD_VECTORS(B1, B3, m*n);
4457 COMPARE_FAD_VECTORS(B1, B4, m*n);
4458 COMPARE_FAD_VECTORS(B1, B5, m*n);
4459
4460 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4461 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4462 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4463 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4464 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4465 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4466 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4467 Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4468 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4469 Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4470 COMPARE_FAD_VECTORS(B1, B2, m*n);
4471 COMPARE_FAD_VECTORS(B1, B3, m*n);
4472 COMPARE_FAD_VECTORS(B1, B4, m*n);
4473 COMPARE_FAD_VECTORS(B1, B5, m*n);
4474
4475 for (unsigned int i=0; i<m; i++) {
4476 A[i*m+i].val() = 1.0;
4477 for (unsigned int k=0; k<ndot; k++)
4478 A[i*m+i].fastAccessDx(k) = 0.0;
4479 }
4480 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4481 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4482 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4483 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4484 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4485 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4486 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4487 Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
4488 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4489 Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
4490 COMPARE_FAD_VECTORS(B1, B2, m*n);
4491 COMPARE_FAD_VECTORS(B1, B3, m*n);
4492 COMPARE_FAD_VECTORS(B1, B4, m*n);
4493 COMPARE_FAD_VECTORS(B1, B5, m*n);
4494}
4495
4496// Tests all arguments, left side
4498 typedef decltype(this->fad) FadType;
4499 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4501 auto m = this->m_;
4502 auto n = this->n_;
4503 auto ndot = this->ndot_;
4504
4505 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4506 for (unsigned int j=0; j<m; j++) {
4507 for (unsigned int i=0; i<m; i++) {
4508 //A[i+j*m] = this->urand.number();
4509 A[i+j*m] = FadType(ndot, this->urand.number());
4510 for (unsigned int k=0; k<ndot; k++)
4511 A[i+j*m].fastAccessDx(k) = this->urand.number();
4512 }
4513 }
4514 FadType alpha(ndot, this->urand.number());
4515 for (unsigned int k=0; k<ndot; k++) {
4516 alpha.fastAccessDx(k) = this->urand.number();
4517 }
4518 //ScalarType alpha = this->urand.number();
4519
4520 for (unsigned int j=0; j<n; j++) {
4521 for (unsigned int i=0; i<m; i++) {
4522 ScalarType val = this->urand.number();
4523 // B1[i+j*m] = val;
4524 // B2[i+j*m] = val;
4525 // B3[i+j*m] = val;
4526 B1[i+j*m] = FadType(ndot, val);
4527 B2[i+j*m] = FadType(ndot, val);
4528 B3[i+j*m] = FadType(ndot, val);
4529 for (unsigned int k=0; k<ndot; k++) {
4530 val = this->urand.number();
4531 B1[i+j*m].fastAccessDx(k) = val;
4532 B2[i+j*m].fastAccessDx(k) = val;
4533 B3[i+j*m].fastAccessDx(k) = val;
4534 }
4535 }
4536 }
4537
4538 Teuchos::BLAS<int,FadType> teuchos_blas;
4539 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4540 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4541
4542 Teuchos::BLAS<int,FadType> sacado_blas(false);
4543 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4544 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4545
4546 COMPARE_FAD_VECTORS(B1, B2, m*n);
4547
4548 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4549 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4550 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4551 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4552
4553 COMPARE_FAD_VECTORS(B1, B3, m*n);
4554
4555 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4556 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4557 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4558 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4559 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4560 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4561 COMPARE_FAD_VECTORS(B1, B2, m*n);
4562 COMPARE_FAD_VECTORS(B1, B3, m*n);
4563
4564 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4565 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4566 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4567 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4568 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4569 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4570 COMPARE_FAD_VECTORS(B1, B2, m*n);
4571 COMPARE_FAD_VECTORS(B1, B3, m*n);
4572
4573 for (unsigned int i=0; i<m; i++) {
4574 A[i*m+i].val() = 1.0;
4575 for (unsigned int k=0; k<ndot; k++)
4576 A[i*m+i].fastAccessDx(k) = 0.0;
4577 }
4578 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4579 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4580 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4581 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4582 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4583 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4584 COMPARE_FAD_VECTORS(B1, B2, m*n);
4585 COMPARE_FAD_VECTORS(B1, B3, m*n);
4586}
4587
4588// Tests all arguments, right side
4590 typedef decltype(this->fad) FadType;
4591 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4593 auto m = this->m_;
4594 auto n = this->n_;
4595 auto ndot = this->ndot_;
4596
4597 VectorType A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4598 for (unsigned int j=0; j<n; j++) {
4599 for (unsigned int i=0; i<n; i++) {
4600 A[i+j*n] = FadType(ndot, this->urand.number());
4601 for (unsigned int k=0; k<ndot; k++)
4602 A[i+j*n].fastAccessDx(k) = this->urand.number();
4603 }
4604 }
4605 FadType alpha(ndot, this->urand.number());
4606 for (unsigned int k=0; k<ndot; k++) {
4607 alpha.fastAccessDx(k) = this->urand.number();
4608 }
4609
4610 for (unsigned int j=0; j<n; j++) {
4611 for (unsigned int i=0; i<m; i++) {
4612 ScalarType val = this->urand.number();
4613 B1[i+j*m] = FadType(ndot, val);
4614 B2[i+j*m] = FadType(ndot, val);
4615 B3[i+j*m] = FadType(ndot, val);
4616 for (unsigned int k=0; k<ndot; k++) {
4617 val = this->urand.number();
4618 B1[i+j*m].fastAccessDx(k) = val;
4619 B2[i+j*m].fastAccessDx(k) = val;
4620 B3[i+j*m].fastAccessDx(k) = val;
4621 }
4622 }
4623 }
4624
4625 Teuchos::BLAS<int,FadType> teuchos_blas;
4626 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4627 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4628
4629 Teuchos::BLAS<int,FadType> sacado_blas(false);
4630 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4631 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4632
4633 COMPARE_FAD_VECTORS(B1, B2, m*n);
4634
4635 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4636 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4637 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4638 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4639
4640 COMPARE_FAD_VECTORS(B1, B3, m*n);
4641
4642 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4643 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4644 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4645 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4646 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4647 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4648 COMPARE_FAD_VECTORS(B1, B2, m*n);
4649 COMPARE_FAD_VECTORS(B1, B3, m*n);
4650
4651 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4652 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4653 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4654 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4655 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4656 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4657 COMPARE_FAD_VECTORS(B1, B2, m*n);
4658 COMPARE_FAD_VECTORS(B1, B3, m*n);
4659
4660 for (unsigned int i=0; i<n; i++) {
4661 A[i*n+i].val() = 1.0;
4662 for (unsigned int k=0; k<ndot; k++)
4663 A[i*n+i].fastAccessDx(k) = 0.0;
4664 }
4665 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4666 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
4667 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4668 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
4669 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4670 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
4671 COMPARE_FAD_VECTORS(B1, B2, m*n);
4672 COMPARE_FAD_VECTORS(B1, B3, m*n);
4673}
4674
4675// Tests all arguments, left side, different lda, ldb
4677 typedef decltype(this->fad) FadType;
4678 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4680 auto m = this->m_;
4681 auto n = this->n_;
4682 auto ndot = this->ndot_;
4683
4684 unsigned int lda = m+4;
4685 unsigned int ldb = m+5;
4686 VectorType A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4687 for (unsigned int j=0; j<m; j++) {
4688 for (unsigned int i=0; i<lda; i++) {
4689 A[i+j*lda] = FadType(ndot, this->urand.number());
4690 for (unsigned int k=0; k<ndot; k++)
4691 A[i+j*lda].fastAccessDx(k) = this->urand.number();
4692 }
4693 }
4694 FadType alpha(ndot, this->urand.number());
4695 for (unsigned int k=0; k<ndot; k++) {
4696 alpha.fastAccessDx(k) = this->urand.number();
4697 }
4698
4699 for (unsigned int j=0; j<n; j++) {
4700 for (unsigned int i=0; i<ldb; i++) {
4701 ScalarType val = this->urand.number();
4702 B1[i+j*ldb] = FadType(ndot, val);
4703 B2[i+j*ldb] = FadType(ndot, val);
4704 B3[i+j*ldb] = FadType(ndot, val);
4705 for (unsigned int k=0; k<ndot; k++) {
4706 val = this->urand.number();
4707 B1[i+j*ldb].fastAccessDx(k) = val;
4708 B2[i+j*ldb].fastAccessDx(k) = val;
4709 B3[i+j*ldb].fastAccessDx(k) = val;
4710 }
4711 }
4712 }
4713
4714 Teuchos::BLAS<int,FadType> teuchos_blas;
4715 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4716 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4717
4718 Teuchos::BLAS<int,FadType> sacado_blas(false);
4719 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4720 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4721
4722 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4723
4724 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4725 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4726 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4727 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4728
4729 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4730
4731 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4732 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4733 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4734 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4735 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4736 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4737 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4738 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4739
4740 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4741 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4742 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4743 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4744 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4745 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4746 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4747 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4748
4749 for (unsigned int i=0; i<m; i++) {
4750 A[i*lda+i].val() = 1.0;
4751 for (unsigned int k=0; k<ndot; k++)
4752 A[i*lda+i].fastAccessDx(k) = 0.0;
4753 }
4754 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4755 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4756 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4757 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4758 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4759 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4760 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4761 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4762}
4763
4764// Tests all arguments, right side, different lda, ldb
4766 typedef decltype(this->fad) FadType;
4767 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4769 auto m = this->m_;
4770 auto n = this->n_;
4771 auto ndot = this->ndot_;
4772
4773 unsigned int lda = n+4;
4774 unsigned int ldb = m+5;
4775 VectorType A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4776 for (unsigned int j=0; j<n; j++) {
4777 for (unsigned int i=0; i<lda; i++) {
4778 A[i+j*lda] = FadType(ndot, this->urand.number());
4779 for (unsigned int k=0; k<ndot; k++)
4780 A[i+j*lda].fastAccessDx(k) = this->urand.number();
4781 }
4782 }
4783 FadType alpha(ndot, this->urand.number());
4784 for (unsigned int k=0; k<ndot; k++) {
4785 alpha.fastAccessDx(k) = this->urand.number();
4786 }
4787
4788 for (unsigned int j=0; j<n; j++) {
4789 for (unsigned int i=0; i<ldb; i++) {
4790 ScalarType val = this->urand.number();
4791 B1[i+j*ldb] = FadType(ndot, val);
4792 B2[i+j*ldb] = FadType(ndot, val);
4793 B3[i+j*ldb] = FadType(ndot, val);
4794 for (unsigned int k=0; k<ndot; k++) {
4795 val = this->urand.number();
4796 B1[i+j*ldb].fastAccessDx(k) = val;
4797 B2[i+j*ldb].fastAccessDx(k) = val;
4798 B3[i+j*ldb].fastAccessDx(k) = val;
4799 }
4800 }
4801 }
4802
4803 Teuchos::BLAS<int,FadType> teuchos_blas;
4804 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4805 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4806
4807 Teuchos::BLAS<int,FadType> sacado_blas(false);
4808 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4809 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4810
4811 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4812
4813 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4814 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4815 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4816 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4817
4818 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4819
4820 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4821 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4822 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4823 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4824 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4825 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4826 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4827 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4828
4829 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4830 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4831 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4832 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4833 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4834 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4835 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4836 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4837
4838 for (unsigned int i=0; i<n; i++) {
4839 A[i*lda+i].val() = 1.0;
4840 for (unsigned int k=0; k<ndot; k++)
4841 A[i*lda+i].fastAccessDx(k) = 0.0;
4842 }
4843 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4844 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B1[0], ldb);
4845 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4846 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B2[0], ldb);
4847 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4848 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], lda, &B3[0], ldb);
4849 COMPARE_FAD_VECTORS(B1, B2, ldb*n);
4850 COMPARE_FAD_VECTORS(B1, B3, ldb*n);
4851}
4852
4853// Tests constant alpha
4855 typedef decltype(this->fad) FadType;
4856 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4858 auto m = this->m_;
4859 auto n = this->n_;
4860 auto ndot = this->ndot_;
4861
4862 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4863 for (unsigned int j=0; j<m; j++) {
4864 for (unsigned int i=0; i<m; i++) {
4865 A[i+j*m] = FadType(ndot, this->urand.number());
4866 for (unsigned int k=0; k<ndot; k++)
4867 A[i+j*m].fastAccessDx(k) = this->urand.number();
4868 }
4869 }
4870 ScalarType alpha = this->urand.number();
4871
4872 for (unsigned int j=0; j<n; j++) {
4873 for (unsigned int i=0; i<m; i++) {
4874 ScalarType val = this->urand.number();
4875 B1[i+j*m] = FadType(ndot, val);
4876 B2[i+j*m] = FadType(ndot, val);
4877 B3[i+j*m] = FadType(ndot, val);
4878 for (unsigned int k=0; k<ndot; k++) {
4879 val = this->urand.number();
4880 B1[i+j*m].fastAccessDx(k) = val;
4881 B2[i+j*m].fastAccessDx(k) = val;
4882 B3[i+j*m].fastAccessDx(k) = val;
4883 }
4884 }
4885 }
4886
4887 Teuchos::BLAS<int,FadType> teuchos_blas;
4888 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4889 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4890
4891 Teuchos::BLAS<int,FadType> sacado_blas(false);
4892 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4893 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4894
4895 COMPARE_FAD_VECTORS(B1, B2, m*n);
4896
4897 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4898 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4899 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4900 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4901
4902 COMPARE_FAD_VECTORS(B1, B3, m*n);
4903
4904 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4905 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4906 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4907 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4908 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4909 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4910 COMPARE_FAD_VECTORS(B1, B2, m*n);
4911 COMPARE_FAD_VECTORS(B1, B3, m*n);
4912
4913 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4914 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4915 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4916 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4917 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4918 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4919 COMPARE_FAD_VECTORS(B1, B2, m*n);
4920 COMPARE_FAD_VECTORS(B1, B3, m*n);
4921
4922 for (unsigned int i=0; i<m; i++) {
4923 A[i*m+i].val() = 1.0;
4924 for (unsigned int k=0; k<ndot; k++)
4925 A[i*m+i].fastAccessDx(k) = 0.0;
4926 }
4927 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4928 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4929 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4930 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4931 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4932 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4933 COMPARE_FAD_VECTORS(B1, B2, m*n);
4934 COMPARE_FAD_VECTORS(B1, B3, m*n);
4935}
4936
4937// Tests constant B
4939 typedef decltype(this->fad) FadType;
4940 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
4942 auto m = this->m_;
4943 auto n = this->n_;
4944 auto ndot = this->ndot_;
4945
4946 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4947 for (unsigned int j=0; j<m; j++) {
4948 for (unsigned int i=0; i<m; i++) {
4949 A[i+j*m] = FadType(ndot, this->urand.number());
4950 for (unsigned int k=0; k<ndot; k++)
4951 A[i+j*m].fastAccessDx(k) = this->urand.number();
4952 }
4953 }
4954 FadType alpha(ndot, this->urand.number());
4955 for (unsigned int k=0; k<ndot; k++) {
4956 alpha.fastAccessDx(k) = this->urand.number();
4957 }
4958
4959 for (unsigned int j=0; j<n; j++) {
4960 for (unsigned int i=0; i<m; i++) {
4961 ScalarType val = this->urand.number();
4962 B1[i+j*m] = val;
4963 B2[i+j*m] = val;
4964 B3[i+j*m] = val;
4965 }
4966 }
4967
4968 Teuchos::BLAS<int,FadType> teuchos_blas;
4969 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4970 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4971
4972 Teuchos::BLAS<int,FadType> sacado_blas(false);
4973 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4974 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4975
4976 COMPARE_FAD_VECTORS(B1, B2, m*n);
4977
4978 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4979 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
4980 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4981 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4982
4983 COMPARE_FAD_VECTORS(B1, B3, m*n);
4984
4985 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4986 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4987 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4988 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4989 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4990 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
4991 COMPARE_FAD_VECTORS(B1, B2, m*n);
4992 COMPARE_FAD_VECTORS(B1, B3, m*n);
4993
4994 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4995 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
4996 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4997 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
4998 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4999 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5000 COMPARE_FAD_VECTORS(B1, B2, m*n);
5001 COMPARE_FAD_VECTORS(B1, B3, m*n);
5002
5003 for (unsigned int i=0; i<m; i++) {
5004 A[i*m+i].val() = 1.0;
5005 for (unsigned int k=0; k<ndot; k++)
5006 A[i*m+i].fastAccessDx(k) = 0.0;
5007 }
5008 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5009 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5010 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5011 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5012 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5013 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5014 COMPARE_FAD_VECTORS(B1, B2, m*n);
5015 COMPARE_FAD_VECTORS(B1, B3, m*n);
5016}
5017
5018// Tests constant A
5020 typedef decltype(this->fad) FadType;
5021 typedef typename Sacado::ScalarType<FadType>::type ScalarType;
5023 auto m = this->m_;
5024 auto n = this->n_;
5025 auto ndot = this->ndot_;
5026
5027 VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
5028 B4(m*n,ndot), B5(m*n,ndot);
5029 std::vector<ScalarType> a(m*m);
5030 for (unsigned int j=0; j<m; j++) {
5031 for (unsigned int i=0; i<m; i++) {
5032 a[i+j*m] = this->urand.number();
5033 A[i+j*m] = a[i+j*m];
5034 }
5035 }
5036 FadType alpha(ndot, this->urand.number());
5037 for (unsigned int k=0; k<ndot; k++) {
5038 alpha.fastAccessDx(k) = this->urand.number();
5039 }
5040
5041 for (unsigned int j=0; j<n; j++) {
5042 for (unsigned int i=0; i<m; i++) {
5043 ScalarType val = this->urand.number();
5044 B1[i+j*m] = FadType(ndot, val);
5045 B2[i+j*m] = FadType(ndot, val);
5046 B3[i+j*m] = FadType(ndot, val);
5047 B4[i+j*m] = FadType(ndot, val);
5048 B5[i+j*m] = FadType(ndot, val);
5049 for (unsigned int k=0; k<ndot; k++) {
5050 val = this->urand.number();
5051 B1[i+j*m].fastAccessDx(k) = val;
5052 B2[i+j*m].fastAccessDx(k) = val;
5053 B3[i+j*m].fastAccessDx(k) = val;
5054 B4[i+j*m].fastAccessDx(k) = val;
5055 B5[i+j*m].fastAccessDx(k) = val;
5056 }
5057 }
5058 }
5059
5060 Teuchos::BLAS<int,FadType> teuchos_blas;
5061 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5062 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5063
5064 Teuchos::BLAS<int,FadType> sacado_blas(false);
5065 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5066 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5067
5068 COMPARE_FAD_VECTORS(B1, B2, m*n);
5069
5070 unsigned int sz = m*m + m*n*(1+ndot);
5071 Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
5072 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5073 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5074
5075 COMPARE_FAD_VECTORS(B1, B3, m*n);
5076
5077 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5078 Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5079
5080 COMPARE_FAD_VECTORS(B1, B4, m*n);
5081
5082 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5083 Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5084
5085 COMPARE_FAD_VECTORS(B1, B5, m*n);
5086
5087 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5088 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5089 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5090 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5091 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5092 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5093 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5094 Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5095 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5096 Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5097 COMPARE_FAD_VECTORS(B1, B2, m*n);
5098 COMPARE_FAD_VECTORS(B1, B3, m*n);
5099 COMPARE_FAD_VECTORS(B1, B4, m*n);
5100 COMPARE_FAD_VECTORS(B1, B5, m*n);
5101
5102 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5103 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5104 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5105 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5106 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5107 Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5108 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5109 Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5110 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5111 Teuchos::NON_UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5112 COMPARE_FAD_VECTORS(B1, B2, m*n);
5113 COMPARE_FAD_VECTORS(B1, B3, m*n);
5114 COMPARE_FAD_VECTORS(B1, B4, m*n);
5115 COMPARE_FAD_VECTORS(B1, B5, m*n);
5116
5117 for (unsigned int i=0; i<m; i++) {
5118 A[i*m+i].val() = 1.0;
5119 for (unsigned int k=0; k<ndot; k++)
5120 A[i*m+i].fastAccessDx(k) = 0.0;
5121 }
5122 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5123 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
5124 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5125 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
5126 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5127 Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
5128 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5129 Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B4[0], m);
5130 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5131 Teuchos::UNIT_DIAG, m, n, alpha, &a[0], m, &B5[0], m);
5132 COMPARE_FAD_VECTORS(B1, B2, m*n);
5133 COMPARE_FAD_VECTORS(B1, B3, m*n);
5134 COMPARE_FAD_VECTORS(B1, B4, m*n);
5135 COMPARE_FAD_VECTORS(B1, B5, m*n);
5136}
5137
5140 testSCAL1,
5141 testSCAL2,
5142 testSCAL3,
5143 testSCAL4,
5144
5145 testCOPY1,
5146 testCOPY2,
5147 testCOPY3,
5148 testCOPY4,
5149
5150 testAXPY1,
5151 testAXPY2,
5152 testAXPY3,
5153 testAXPY4,
5154
5155 testDOT1,
5156 testDOT2,
5157 testDOT3,
5158 testDOT4,
5159
5160 testNRM21,
5161 testNRM22,
5162
5163 testGEMV1,
5164 testGEMV2,
5165 testGEMV3,
5166 testGEMV4,
5167 testGEMV5,
5168 testGEMV6,
5169 testGEMV7,
5170 testGEMV8,
5171 testGEMV9,
5172
5173 testTRMV1,
5174 testTRMV2,
5175 testTRMV3,
5176 testTRMV4,
5177
5178 testGER1,
5179 testGER2,
5180 testGER3,
5181 testGER4,
5182 testGER5,
5183 testGER6,
5184 testGER7,
5185
5186 testGEMM1,
5187 testGEMM2,
5188 testGEMM3,
5189 testGEMM4,
5190 testGEMM5,
5191 testGEMM6,
5192 testGEMM7,
5193 testGEMM8,
5194 testGEMM9,
5195 testGEMM10,
5196
5197 testSYMM1,
5198 testSYMM2,
5199 testSYMM3,
5200 testSYMM4,
5201 testSYMM5,
5202 testSYMM6,
5203 testSYMM7,
5204 testSYMM8,
5205 testSYMM9,
5206
5207 testTRMM1,
5208 testTRMM2,
5209 testTRMM3,
5210 testTRMM4,
5211 testTRMM5,
5212 testTRMM6,
5213 testTRMM7,
5214
5215 testTRSM1,
5216 testTRSM2,
5217 testTRSM3,
5218 testTRSM4,
5219 testTRSM5,
5220 testTRSM6,
5221 testTRSM7);
5222
5223
5224#endif // FADBLASUNITTESTS_HPP
#define COMPARE_FAD_VECTORS(X1, X2, n)
TYPED_TEST_P(FadBLASUnitTests, testSCAL1)
TYPED_TEST_SUITE_P(FadBLASUnitTests)
REGISTER_TYPED_TEST_SUITE_P(FadBLASUnitTests, testSCAL1, testSCAL2, testSCAL3, testSCAL4, testCOPY1, testCOPY2, testCOPY3, testCOPY4, testAXPY1, testAXPY2, testAXPY3, testAXPY4, testDOT1, testDOT2, testDOT3, testDOT4, testNRM21, testNRM22, testGEMV1, testGEMV2, testGEMV3, testGEMV4, testGEMV5, testGEMV6, testGEMV7, testGEMV8, testGEMV9, testTRMV1, testTRMV2, testTRMV3, testTRMV4, testGER1, testGER2, testGER3, testGER4, testGER5, testGER6, testGER7, testGEMM1, testGEMM2, testGEMM3, testGEMM4, testGEMM5, testGEMM6, testGEMM7, testGEMM8, testGEMM9, testGEMM10, testSYMM1, testSYMM2, testSYMM3, testSYMM4, testSYMM5, testSYMM6, testSYMM7, testSYMM8, testSYMM9, testTRMM1, testTRMM2, testTRMM3, testTRMM4, testTRMM5, testTRMM6, testTRMM7, testTRSM1, testTRSM2, testTRSM3, testTRSM4, testTRSM5, testTRSM6, testTRSM7)
#define COMPARE_FADS(a, b)
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
expr val()
#define A
Sacado::Fad::DFad< double > FadType
Sacado::Fad::Vector< unsigned int, FadType > VectorType
Sacado::Random< double > real_urand
Sacado::Random< ScalarType > urand
Sacado::ScalarType< FadType >::type ScalarType
Fad specializations for Teuchos::BLAS wrappers.
A random number generator that generates random numbers uniformly distributed in the interval (a,...
const double y