Teuchos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Teuchos_BLAS.cpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Teuchos: Common Tools Package
5// Copyright (2004) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40// @HEADER
41
42#include "Teuchos_BLAS.hpp"
44
45/* for INTEL_CXML, the second arg may need to be changed to 'one'. If so
46the appropriate declaration of one will need to be added back into
47functions that include the macro:
48*/
49
50namespace {
51#if defined (INTEL_CXML)
52 unsigned int one=1;
53#endif
54} // namespace
55
56#ifdef CHAR_MACRO
57#undef CHAR_MACRO
58#endif
59#if defined (INTEL_CXML)
60#define CHAR_MACRO(char_var) &char_var, one
61#else
62#define CHAR_MACRO(char_var) &char_var
63#endif
64
65
66const char Teuchos::ESideChar[] = {'L' , 'R' };
67const char Teuchos::ETranspChar[] = {'N' , 'T' , 'C' };
68const char Teuchos::EUploChar[] = {'U' , 'L' };
69const char Teuchos::EDiagChar[] = {'U' , 'N' };
70const char Teuchos::ETypeChar[] = {'G' , 'L', 'U', 'H', 'B', 'Q', 'Z' };
71//const char Teuchos::EFactChar[] = {'F', 'N' };
72//const char Teuchos::ENormChar[] = {'O', 'I' };
73//const char Teuchos::ECompQChar[] = {'N', 'I', 'V' };
74//const char Teuchos::EJobChar[] = {'E', 'V', 'B' };
75//const char Teuchos::EJobSChar[] = {'E', 'S' };
76//const char Teuchos::EJobVSChar[] = {'V', 'N' };
77//const char Teuchos::EHowmnyChar[] = {'A', 'S' };
78//const char Teuchos::ECMachChar[] = {'E', 'S', 'B', 'P', 'N', 'R', 'M', 'U', 'L', 'O' };
79//const char Teuchos::ESortChar[] = {'N', 'S'};
80
81
82namespace {
83
84
85template<typename Scalar>
86Scalar generic_dot(const int& n, const Scalar* x, const int& incx,
87 const Scalar* y, const int& incy)
88{
90 Scalar dot = 0.0;
91 if (incx==1 && incy==1) {
92 for (int i = 0; i < n; ++i)
93 dot += (*x++)*ST::conjugate(*y++);
94 }
95 else {
96 if (incx < 0)
97 x = x - incx*(n-1);
98 if (incy < 0)
99 y = y - incy*(n-1);
100 for (int i = 0; i < n; ++i, x+=incx, y+=incy)
101 dot += (*x)*ST::conjugate(*y);
102 }
103 return dot;
104}
105
106
107} // namespace
108
109
110namespace Teuchos {
111
112//Explicitly instantiating these templates for windows due to an issue with
113//resolving them when linking dlls.
114#ifdef _MSC_VER
115# ifdef HAVE_TEUCHOS_COMPLEX
116 template class BLAS<long int, std::complex<float> >;
117 template class BLAS<long int, std::complex<double> >;
118# endif
119 template class BLAS<long int, float>;
120 template class BLAS<long int, double>;
121#endif
122
123 // *************************** BLAS<int,float> DEFINITIONS ******************************
124
125 void BLAS<int, float>::ROTG(float* da, float* db, float* c, float* s) const
126 { SROTG_F77(da, db, c, s ); }
127
128 void BLAS<int, float>::ROT(const int& n, float* dx, const int& incx, float* dy, const int& incy, float* c, float* s) const
129 { SROT_F77(&n, dx, &incx, dy, &incy, c, s); }
130
131
132 float BLAS<int, float>::ASUM(const int& n, const float* x, const int& incx) const
133 {
134#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
135 return cblas_sasum(n, x, incx);
136#elif defined(HAVE_TEUCHOS_BLASFLOAT)
137 float tmp = SASUM_F77(&n, x, &incx);
138 return tmp;
139#else
140 typedef ScalarTraits<float> ST;
141 float sum = 0.0;
142 if (incx == 1) {
143 for (int i = 0; i < n; ++i)
144 sum += ST::magnitude(*x++);
145 }
146 else {
147 for (int i = 0; i < n; ++i, x+=incx)
148 sum += ST::magnitude(*x);
149 }
150 return sum;
151#endif
152 }
153
154 void BLAS<int, float>::AXPY(const int& n, const float& alpha, const float* x, const int& incx, float* y, const int& incy) const
155 { SAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
156
157 void BLAS<int, float>::COPY(const int& n, const float* x, const int& incx, float* y, const int& incy) const
158 { SCOPY_F77(&n, x, &incx, y, &incy); }
159
160 float BLAS<int, float>::DOT(const int& n, const float* x, const int& incx, const float* y, const int& incy) const
161 {
162#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
163 return cblas_sdot(n, x, incx, y, incy);
164#elif defined(HAVE_TEUCHOS_BLASFLOAT)
165 return SDOT_F77(&n, x, &incx, y, &incy);
166#else
167 return generic_dot(n, x, incx, y, incy);
168#endif
169 }
170
171 int BLAS<int, float>::IAMAX(const int& n, const float* x, const int& incx) const
172 { return ISAMAX_F77(&n, x, &incx); }
173
174 float BLAS<int, float>::NRM2(const int& n, const float* x, const int& incx) const
175 {
176#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
177 return cblas_snrm2(n, x, incx);
178#elif defined(HAVE_TEUCHOS_BLASFLOAT)
179 return SNRM2_F77(&n, x, &incx);
180#else
182#endif
183 }
184
185 void BLAS<int, float>::SCAL(const int& n, const float& alpha, float* x, const int& incx) const
186 { SSCAL_F77(&n, &alpha, x, &incx); }
187
188 void BLAS<int, float>::GEMV(ETransp trans, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* x, const int& incx, const float& beta, float* y, const int& incy) const
189 { SGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
190
191 void BLAS<int, float>::GER(const int& m, const int& n, const float& alpha, const float* x, const int& incx, const float* y, const int& incy, float* A, const int& lda) const
192 { SGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
193
194 void BLAS<int, float>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const float* A, const int& lda, float* x, const int& incx) const
196
197 void BLAS<int, float>::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const
199
200 void BLAS<int, float>::SWAP(const int& n, float* const x, const int& incx, float* const y, const int& incy) const
201 {
202 SSWAP_F77 (&n, x, &incx, y, &incy);
203 }
204
205 void BLAS<int, float>::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const float& alpha, const float* A, const int& lda, const float* B, const int& ldb, const float& beta, float* C, const int& ldc) const
207
208 void BLAS<int, float>::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const
210
211 void BLAS<int, float>::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const float& alpha, const float* A, const int& lda, const float& beta, float* C, const int& ldc) const
213
214 void BLAS<int, float>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const
216
217 void BLAS<int, float>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const float& alpha, const float* A, const int& lda, float* B, const int& ldb) const
219
220 // *************************** BLAS<int,double> DEFINITIONS ******************************
221
222 void BLAS<int, double>::ROTG(double* da, double* db, double* c, double* s) const
223 { DROTG_F77(da, db, c, s); }
224
225 void BLAS<int, double>::ROT(const int& n, double* dx, const int& incx, double* dy, const int& incy, double* c, double* s) const
226 { DROT_F77(&n, dx, &incx, dy, &incy, c, s); }
227
228 double BLAS<int, double>::ASUM(const int& n, const double* x, const int& incx) const
229 { return DASUM_F77(&n, x, &incx); }
230
231 void BLAS<int, double>::AXPY(const int& n, const double& alpha, const double* x, const int& incx, double* y, const int& incy) const
232 { DAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
233
234 void BLAS<int, double>::COPY(const int& n, const double* x, const int& incx, double* y, const int& incy) const
235 { DCOPY_F77(&n, x, &incx, y, &incy); }
236
237 double BLAS<int, double>::DOT(const int& n, const double* x, const int& incx, const double* y, const int& incy) const
238 {
239 return DDOT_F77(&n, x, &incx, y, &incy);
240 }
241
242 int BLAS<int, double>::IAMAX(const int& n, const double* x, const int& incx) const
243 { return IDAMAX_F77(&n, x, &incx); }
244
245 double BLAS<int, double>::NRM2(const int& n, const double* x, const int& incx) const
246 { return DNRM2_F77(&n, x, &incx); }
247
248 void BLAS<int, double>::SCAL(const int& n, const double& alpha, double* x, const int& incx) const
249 { DSCAL_F77(&n, &alpha, x, &incx); }
250
251 void BLAS<int, double>::GEMV(ETransp trans, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* x, const int& incx, const double& beta, double* y, const int& incy) const
252 { DGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
253
254 void BLAS<int, double>::GER(const int& m, const int& n, const double& alpha, const double* x, const int& incx, const double* y, const int& incy, double* A, const int& lda) const
255 { DGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
256
257 void BLAS<int, double>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const double* A, const int& lda, double* x, const int& incx) const
259
260 void BLAS<int, double>::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const
262
263 void BLAS<int, double>::SWAP(const int& n, double* const x, const int& incx, double* const y, const int& incy) const
264 {
265 DSWAP_F77 (&n, x, &incx, y, &incy);
266 }
267
268 void BLAS<int, double>::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const double& alpha, const double* A, const int& lda, const double* B, const int& ldb, const double& beta, double* C, const int& ldc) const
270
271 void BLAS<int, double>::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const
273
274 void BLAS<int, double>::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const double& alpha, const double* A, const int& lda, const double& beta, double* C, const int& ldc) const
276
277 void BLAS<int, double>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const
279
280 void BLAS<int, double>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const double& alpha, const double* A, const int& lda, double* B, const int& ldb) const
282
283#ifdef HAVE_TEUCHOS_COMPLEX
284
285 // *************************** BLAS<int,std::complex<float> > DEFINITIONS ******************************
286
287 void BLAS<int, std::complex<float> >::ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const
288 { CROTG_F77(da, db, c, s ); }
289
290 void BLAS<int, std::complex<float> >::ROT(const int& n, std::complex<float>* dx, const int& incx, std::complex<float>* dy, const int& incy, float* c, std::complex<float>* s) const
291 { CROT_F77(&n, dx, &incx, dy, &incy, c, s); }
292
293 float BLAS<int, std::complex<float> >::ASUM(const int& n, const std::complex<float>* x, const int& incx) const
294 {
295#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
296 return cblas_scasum(n, x, incx);
297#elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
298 return (float) SCASUM_F77(&n, x, &incx);
299#elif defined(HAVE_TEUCHOS_BLASFLOAT)
300 return SCASUM_F77(&n, x, &incx);
301#else // Wow, you just plain don't have this routine.
302 // mfh 01 Feb 2013: See www.netlib.org/blas/scasum.f.
303 // I've enhanced this by accumulating in double precision.
304 double result = 0;
305 if (incx == 1) {
306 for (int i = 0; i < n; ++i) {
307 result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
308 }
309 } else {
310 const int nincx = n * incx;
311 for (int i = 0; i < nincx; i += incx) {
312 result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
313 }
314 }
315 return static_cast<float> (result);
316#endif
317 }
318
319 void BLAS<int, std::complex<float> >::AXPY(const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const
320 { CAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
321
322 void BLAS<int, std::complex<float> >::COPY(const int& n, const std::complex<float>* x, const int& incx, std::complex<float>* y, const int& incy) const
323 { CCOPY_F77(&n, x, &incx, y, &incy); }
324
325 std::complex<float> BLAS<int, std::complex<float> >::DOT(const int& n, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy) const
326 {
327#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
328 std::complex<float> z;
329 cblas_cdotc_sub(n,x,incx,y,incy,&z);
330 return z;
331#elif defined(HAVE_COMPLEX_BLAS_PROBLEM) && defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
332 std::complex<float> z;
333 CDOT_F77(&z, &n, x, &incx, y, &incy);
334 return z;
335#elif defined(HAVE_TEUCHOS_BLASFLOAT)
336 Teuchos_Complex_float_type_name z = CDOT_F77(&n, x, &incx, y, &incy);
337 return TEUCHOS_BLAS_CONVERT_COMPLEX_FORTRAN_TO_CXX(float, z);
338#else // Wow, you just plain don't have this routine.
339 // mfh 01 Feb 2013: See www.netlib.org/blas/cdotc.f.
340 // I've enhanced this by accumulating in double precision.
341 std::complex<double> result (0, 0);
342 if (n >= 0) {
343 if (incx == 1 && incy == 1) {
344 for (int i = 0; i < n; ++i) {
345 result += std::conj (x[i]) * y[i];
346 }
347 } else {
348 int ix = 0;
349 int iy = 0;
350 if (incx < 0) {
351 ix = (1-n) * incx;
352 }
353 if (incy < 0) {
354 iy = (1-n) * incy;
355 }
356 for (int i = 0; i < n; ++i) {
357 result += std::conj (x[ix]) * y[iy];
358 ix += incx;
359 iy += incy;
360 }
361 }
362 }
363 return static_cast<std::complex<float> > (result);
364#endif
365 }
366
367 int BLAS<int, std::complex<float> >::IAMAX(const int& n, const std::complex<float>* x, const int& incx) const
368 { return ICAMAX_F77(&n, x, &incx); }
369
370 float BLAS<int, std::complex<float> >::NRM2(const int& n, const std::complex<float>* x, const int& incx) const
371 {
372#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
373 return cblas_scnrm2(n, x, incx);
374#elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
375 return (float) SCNRM2_F77(&n, x, &incx);
376#elif defined(HAVE_TEUCHOS_BLASFLOAT)
377 return SCNRM2_F77(&n, x, &incx);
378#else // Wow, you just plain don't have this routine.
379 // mfh 01 Feb 2013: See www.netlib.org/blas/scnrm2.f.
380 // I've enhanced this by accumulating in double precision.
381 if (n < 1 || incx < 1) {
382 return 0;
383 } else {
384 double scale = 0;
385 double ssq = 1;
386
387 const int upper = 1 + (n-1)*incx;
388 for (int ix = 0; ix < upper; ix += incx) {
389 // The reference BLAS implementation cleverly scales the
390 // intermediate result. so that even if the square of the norm
391 // would overflow, computing the norm itself does not. Hence,
392 // "ssq" for "scaled square root."
393 if (std::real (x[ix]) != 0) {
394 const double temp = std::abs (std::real (x[ix]));
395 if (scale < temp) {
396 const double scale_over_temp = scale / temp;
397 ssq = 1 + ssq * scale_over_temp*scale_over_temp;
398 // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
399 scale = temp;
400 } else {
401 const double temp_over_scale = temp / scale;
402 ssq = ssq + temp_over_scale*temp_over_scale;
403 }
404 }
405 if (std::imag (x[ix]) != 0) {
406 const double temp = std::abs (std::imag (x[ix]));
407 if (scale < temp) {
408 const double scale_over_temp = scale / temp;
409 ssq = 1 + ssq * scale_over_temp*scale_over_temp;
410 // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
411 scale = temp;
412 } else {
413 const double temp_over_scale = temp / scale;
414 ssq = ssq + temp_over_scale*temp_over_scale;
415 }
416 }
417 }
418 return static_cast<float> (scale * std::sqrt (ssq));
419 }
420#endif
421 }
422
423 void BLAS<int, std::complex<float> >::SCAL(const int& n, const std::complex<float> alpha, std::complex<float>* x, const int& incx) const
424 { CSCAL_F77(&n, &alpha, x, &incx); }
425
426 void BLAS<int, std::complex<float> >::GEMV(ETransp trans, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* x, const int& incx, const std::complex<float> beta, std::complex<float>* y, const int& incy) const
427 { CGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
428
429 void BLAS<int, std::complex<float> >::GER(const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* x, const int& incx, const std::complex<float>* y, const int& incy, std::complex<float>* A, const int& lda) const
430 { CGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
431
432 void BLAS<int, std::complex<float> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<float>* A, const int& lda, std::complex<float>* x, const int& incx) const
433 { CTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
434
435 void BLAS<int, std::complex<float> >::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* B, const int& ldb, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
436 { CGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
437
438 void BLAS<int, std::complex<float> >::SWAP(const int& n, std::complex<float>* const x, const int& incx, std::complex<float>* const y, const int& incy) const
439 {
440 CSWAP_F77 (&n, x, &incx, y, &incy);
441 }
442
443 void BLAS<int, std::complex<float> >::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float>* B, const int& ldb, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
444 { CSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
445
446 void BLAS<int, std::complex<float> >::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
447 { CSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
448
449 void BLAS<int, std::complex<float> >::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, const std::complex<float> beta, std::complex<float>* C, const int& ldc) const
450 { CHERK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
451
452 void BLAS<int, std::complex<float> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const
453 { CTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
454
455 void BLAS<int, std::complex<float> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<float> alpha, const std::complex<float>* A, const int& lda, std::complex<float>* B, const int& ldb) const
456 { CTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
457
458 // *************************** BLAS<int,std::complex<double> > DEFINITIONS ******************************
459
460 void BLAS<int, std::complex<double> >::ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const
461 { ZROTG_F77(da, db, c, s); }
462
463 void BLAS<int, std::complex<double> >::ROT(const int& n, std::complex<double>* dx, const int& incx, std::complex<double>* dy, const int& incy, double* c, std::complex<double>* s) const
464 { ZROT_F77(&n, dx, &incx, dy, &incy, c, s); }
465
466 double BLAS<int, std::complex<double> >::ASUM(const int& n, const std::complex<double>* x, const int& incx) const
467 { return ZASUM_F77(&n, x, &incx); }
468
469 void BLAS<int, std::complex<double> >::AXPY(const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const
470 { ZAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
471
472 void BLAS<int, std::complex<double> >::COPY(const int& n, const std::complex<double>* x, const int& incx, std::complex<double>* y, const int& incy) const
473 { ZCOPY_F77(&n, x, &incx, y, &incy); }
474
475 std::complex<double> BLAS<int, std::complex<double> >::DOT(const int& n, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy) const
476 {
477#if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
478 std::complex<double> z;
479 cblas_zdotc_sub(n,x,incx,y,incy,&z);
480 return z;
481#elif defined(HAVE_COMPLEX_BLAS_PROBLEM)
482# if defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
483 std::complex<double> z;
484 ZDOT_F77(&z, &n, x, &incx, y, &incy);
485 return z;
486# else
487 // mfh 01 Feb 2013: Your complex BLAS is broken, but the problem
488 // doesn't have the easy workaround. I'll just reimplement the
489 // missing routine here. See www.netlib.org/blas/zdotc.f.
490 std::complex<double> ztemp (0, 0);
491 if (n > 0) {
492 if (incx == 1 && incy == 1) {
493 for (int i = 0; i < n; ++i) {
494 ztemp += std::conj (x[i]) * y[i];
495 }
496 } else {
497 int ix = 0;
498 int iy = 0;
499 if (incx < 0) {
500 ix = (1-n)*incx;
501 }
502 if (incy < 0) {
503 iy = (1-n)*incy;
504 }
505 for (int i = 0; i < n; ++i) {
506 ztemp += std::conj (x[ix]) * y[iy];
507 ix += incx;
508 iy += incy;
509 }
510 }
511 }
512 return ztemp;
513
514# endif // defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
515#else
516 Teuchos_Complex_double_type_name z = ZDOT_F77(&n, x, &incx, y, &incy);
517 return TEUCHOS_BLAS_CONVERT_COMPLEX_FORTRAN_TO_CXX(double, z);
518#endif
519 }
520
521 int BLAS<int, std::complex<double> >::IAMAX(const int& n, const std::complex<double>* x, const int& incx) const
522 { return IZAMAX_F77(&n, x, &incx); }
523
524 double BLAS<int, std::complex<double> >::NRM2(const int& n, const std::complex<double>* x, const int& incx) const
525 { return ZNRM2_F77(&n, x, &incx); }
526
527 void BLAS<int, std::complex<double> >::SCAL(const int& n, const std::complex<double> alpha, std::complex<double>* x, const int& incx) const
528 { ZSCAL_F77(&n, &alpha, x, &incx); }
529
530 void BLAS<int, std::complex<double> >::GEMV(ETransp trans, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* x, const int& incx, const std::complex<double> beta, std::complex<double>* y, const int& incy) const
531 { ZGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
532
533 void BLAS<int, std::complex<double> >::GER(const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* x, const int& incx, const std::complex<double>* y, const int& incy, std::complex<double>* A, const int& lda) const
534 { ZGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
535
536 void BLAS<int, std::complex<double> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int& n, const std::complex<double>* A, const int& lda, std::complex<double>* x, const int& incx) const
537 { ZTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
538
539 void BLAS<int, std::complex<double> >::GEMM(ETransp transa, ETransp transb, const int& m, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double>* B, const int& ldb, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
540 { ZGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
541
542 void BLAS<int, std::complex<double> >::SWAP(const int& n, std::complex<double>* const x, const int& incx, std::complex<double>* const y, const int& incy) const
543 {
544 ZSWAP_F77 (&n, x, &incx, y, &incy);
545 }
546
547 void BLAS<int, std::complex<double> >::SYMM(ESide side, EUplo uplo, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> *B, const int& ldb, const std::complex<double> beta, std::complex<double> *C, const int& ldc) const
548 { ZSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
549
550 void BLAS<int, std::complex<double> >::SYRK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
551 { ZSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
552
553 void BLAS<int, std::complex<double> >::HERK(EUplo uplo, ETransp trans, const int& n, const int& k, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, const std::complex<double> beta, std::complex<double>* C, const int& ldc) const
554 { ZHERK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
555
556 void BLAS<int, std::complex<double> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const
557 { ZTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
558
559 void BLAS<int, std::complex<double> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int& m, const int& n, const std::complex<double> alpha, const std::complex<double>* A, const int& lda, std::complex<double>* B, const int& ldb) const
560 { ZTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
561
562#endif // HAVE_TEUCHOS_COMPLEX
563
564}
#define CHAR_MACRO(char_var)
Templated interface class to BLAS routines.
The Templated BLAS wrappers.
#define DSYMM_F77
#define DASUM_F77
#define DAXPY_F77
#define SROTG_F77
#define SROT_F77
#define SDOT_F77
#define IDAMAX_F77
#define DNRM2_F77
#define STRMV_F77
#define DGEMM_F77
#define SGEMM_F77
#define DSWAP_F77
#define DDOT_F77
#define STRMM_F77
#define SSYRK_F77
#define DSYRK_F77
#define DTRSM_F77
#define SNRM2_F77
#define SCOPY_F77
#define SAXPY_F77
#define DROTG_F77
#define ISAMAX_F77
#define STRSM_F77
#define SGEMV_F77
#define SSCAL_F77
#define Teuchos_Complex_double_type_name
#define SASUM_F77
#define SGER_F77
#define DGEMV_F77
#define DTRMM_F77
#define DTRMV_F77
#define Teuchos_Complex_float_type_name
#define DROT_F77
#define DSCAL_F77
#define DGER_F77
#define DCOPY_F77
#define SSWAP_F77
#define SSYMM_F77
Templated BLAS wrapper.
OrdinalType IAMAX(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Return the index of the element of x with the maximum magnitude.
void ROTG(ScalarType *da, ScalarType *db, rotg_c_type *c, ScalarType *s) const
Computes a Givens plane rotation.
ScalarType DOT(const OrdinalType &n, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy) const
Form the dot product of the vectors x and y.
void SYMM(ESide side, EUplo uplo, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the matrix-matrix operation: C <- alpha*A*B+beta*C or C <- alpha*B*A+beta*C where A is an m ...
void AXPY(const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Perform the operation: y <- y+alpha*x.
void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Solves the matrix equations: op(A)*X=alpha*B or X*op(A)=alpha*B where X and B are m by n matrices,...
void SYRK(EUplo uplo, ETransp trans, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
Performs the symmetric rank k operation: C <- alpha*A*A'+beta*C or C <- alpha*A'*A+beta*C,...
void GEMM(ETransp transa, ETransp transb, const OrdinalType &m, const OrdinalType &n, const OrdinalType &k, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const B_type *B, const OrdinalType &ldb, const beta_type beta, ScalarType *C, const OrdinalType &ldc) const
General matrix-matrix multiply.
ScalarTraits< ScalarType >::magnitudeType NRM2(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Compute the 2-norm of the vector x.
ScalarTraits< ScalarType >::magnitudeType ASUM(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx) const
Sum the absolute values of the entries of x.
void COPY(const OrdinalType &n, const ScalarType *x, const OrdinalType &incx, ScalarType *y, const OrdinalType &incy) const
Copy the vector x to the vector y.
void ROT(const OrdinalType &n, ScalarType *dx, const OrdinalType &incx, ScalarType *dy, const OrdinalType &incy, MagnitudeType *c, ScalarType *s) const
Applies a Givens plane rotation.
void GER(const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const x_type *x, const OrdinalType &incx, const y_type *y, const OrdinalType &incy, ScalarType *A, const OrdinalType &lda) const
Performs the rank 1 operation: A <- alpha*x*y'+A.
void SCAL(const OrdinalType &n, const ScalarType &alpha, ScalarType *x, const OrdinalType &incx) const
Scale the vector x by the constant alpha.
void SWAP(const OrdinalType &n, ScalarType *const x, const OrdinalType &incx, ScalarType *const y, const OrdinalType &incy) const
Swap the entries of x and y.
void GEMV(ETransp trans, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, const x_type *x, const OrdinalType &incx, const beta_type beta, ScalarType *y, const OrdinalType &incy) const
Performs the matrix-vector operation: y <- alpha*A*x+beta*y or y <- alpha*A'*x+beta*y where A is a ge...
void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType &n, const A_type *A, const OrdinalType &lda, ScalarType *x, const OrdinalType &incx) const
Performs the matrix-vector operation: x <- A*x or x <- A'*x where A is a unit/non-unit n by n upper/l...
void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType &m, const OrdinalType &n, const alpha_type alpha, const A_type *A, const OrdinalType &lda, ScalarType *B, const OrdinalType &ldb) const
Performs the matrix-matrix operation: B <- alpha*op(A)*B or B <- alpha*B*op(A) where op(A) is an unit...
Concrete serial communicator subclass.
Definition PackageA.cpp:3
Definition PackageB.cpp:3
Definition PackageC.cpp:3
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETranspChar[]
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EUploChar[]
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETypeChar[]
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EDiagChar[]
TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ESideChar[]
static T squareroot(T x)
Returns a number of magnitudeType that is the square root of this scalar type x.