ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00043 #ifndef GBLAS 00044 #define GBLAS 00045 #include <ctime> 00046 #include "Failure.h" 00047 00048 // We need to include config.h to get USE_LINALG_TEMPLATES flag 00049 #include "config.h" 00050 00051 #include "template_lapack_common.h" 00052 00053 /* LEVEL 3 */ 00054 extern "C" void dgemm_(const char *ta,const char *tb, 00055 const int *n, const int *k, const int *l, 00056 const double *alpha,const double *A,const int *lda, 00057 const double *B, const int *ldb, 00058 const double *beta, double *C, const int *ldc); 00059 extern "C" void dpptrf_(const char *uplo,const int *n, double* ap, int *info); 00060 extern "C" void dspgst_(const int *itype, const char *uplo,const int *n, 00061 double* ap,const double *bp,int *info); 00062 extern "C" void dtptri_(const char *uplo,const char *diag,const int *n, 00063 double* ap,int *info); 00064 /* unit triangular means that a value of 1.0 is assumed */ 00065 /* for the diagonal elements (hence diagonal not stored in packed format) */ 00066 extern "C" void dtrmm_(const char *side,const char *uplo,const char *transa, 00067 const char *diag,const int *m,const int *n, 00068 const double *alpha,const double *A,const int *lda, 00069 double *B,const int *ldb); 00070 extern "C" void dsygv_(const int *itype,const char *jobz, 00071 const char *uplo,const int *n, 00072 double *A,const int *lda,double *B,const int *ldb, 00073 double* w,double* work,const int *lwork,int *info); 00074 extern "C" void dggev_(const char *jobbl, const char *jobvr, const int *n, 00075 double *A, const int *lda, double *B, const int *ldb, 00076 double *alphar, double *alphai, double *beta, 00077 double *vl, const int *ldvl, 00078 double *vr, const int *ldvr, 00079 double *work, const int *lwork, int *info); 00080 extern "C" void dpotrf_(const char *uplo, const int *n, double *A, 00081 const int *lda, int *info); 00082 extern "C" void dtrtri_(const char *uplo,const char *diag,const int *n, 00083 double *A, const int *lda, int *info); 00084 extern "C" void dsyrk_(const char *uplo, const char *trans, const int *n, 00085 const int *k, const double *alpha, const double *A, 00086 const int *lda, const double *beta, 00087 double *C, const int *ldc); 00088 extern "C" void dsymm_(const char *side,const char *uplo, 00089 const int *m,const int *n, 00090 const double *alpha,const double *A,const int *lda, 00091 const double *B,const int *ldb, const double* beta, 00092 double *C,const int *ldc); 00093 extern "C" void dpocon_(const char *uplo, const int *n, const double *A, 00094 const int *lda, const double *anorm, double *rcond, 00095 double *work, int *iwork, int *info); 00096 extern "C" void dstevx_(const char *jobz, const char *range, const int *n, 00097 double *d, double *e, const double *vl, 00098 const double *vu, const int *il, const int *iu, 00099 const double *abstol, int *m, double *w, double *z, 00100 const int *ldz, double *work, int *iwork, int *ifail, 00101 int *info); 00102 extern "C" void dstevr_(const char *jobz, const char *range, const int *n, 00103 double *d, double *e, const double *vl, 00104 const double *vu, const int *il, const int *iu, 00105 const double *abstol, int *m, double *w, double *z, 00106 const int *ldz, int* isuppz, double *work, int* lwork, 00107 int *iwork, int* liwork, int *info); 00108 extern "C" void dsyev_(const char *jobz, const char *uplo, const int *n, 00109 double *a, const int *lda, double *w, double *work, 00110 const int *lwork, int *info); 00111 00112 /* LEVEL 2 */ 00113 extern "C" void dgemv_(const char *ta, const int *m, const int *n, 00114 const double *alpha, const double *A, const int *lda, 00115 const double *x, const int *incx, const double *beta, 00116 double *y, const int *incy); 00117 extern "C" void dsymv_(const char *uplo, const int *n, 00118 const double *alpha, const double *A, const int *lda, 00119 const double *x, const int *incx, const double *beta, 00120 double *y, const int *incy); 00121 extern "C" void dtrmv_(const char *uplo, const char *trans, const char *diag, 00122 const int *n, const double *A, const int *lda, 00123 double *x, const int *incx); 00124 /* LEVEL 1 */ 00125 extern "C" void dscal_(const int* n,const double* da, double* dx, 00126 const int* incx); 00127 extern "C" double ddot_(const int* n, const double* dx, const int* incx, 00128 const double* dy, const int* incy); 00129 extern "C" void daxpy_(const int* n, const double* da, const double* dx, 00130 const int* incx, double* dy,const int* incy); 00131 00132 /* Single precision */ 00133 /* LEVEL 3 */ 00134 extern "C" void sgemm_(const char *ta,const char *tb, 00135 const int *n, const int *k, const int *l, 00136 const float *alpha,const float *A,const int *lda, 00137 const float *B, const int *ldb, 00138 const float *beta, float *C, const int *ldc); 00139 extern "C" void spptrf_(const char *uplo,const int *n, float* ap, int *info); 00140 extern "C" void sspgst_(const int *itype, const char *uplo,const int *n, 00141 float* ap,const float *bp,int *info); 00142 extern "C" void stptri_(const char *uplo,const char *diag,const int *n, 00143 float* ap,int *info); 00144 /* unit triangular means that a value of 1.0 is assumed */ 00145 /* for the diagonal elements (hence diagonal not stored in packed format) */ 00146 extern "C" void strmm_(const char *side,const char *uplo,const char *transa, 00147 const char *diag,const int *m,const int *n, 00148 const float *alpha,const float *A,const int *lda, 00149 float *B,const int *ldb); 00150 extern "C" void ssygv_(const int *itype,const char *jobz, 00151 const char *uplo,const int *n, 00152 float *A,const int *lda,float *B,const int *ldb, 00153 float* w,float* work,const int *lwork,int *info); 00154 extern "C" void sggev_(const char *jobbl, const char *jobvr, const int *n, 00155 float *A, const int *lda, float *B, const int *ldb, 00156 float *alphar, float *alphai, float *beta, 00157 float *vl, const int *ldvl, 00158 float *vr, const int *ldvr, 00159 float *work, const int *lwork, int *info); 00160 extern "C" void spotrf_(const char *uplo, const int *n, float *A, 00161 const int *lda, int *info); 00162 extern "C" void strtri_(const char *uplo,const char *diag,const int *n, 00163 float *A, const int *lda, int *info); 00164 extern "C" void ssyrk_(const char *uplo, const char *trans, const int *n, 00165 const int *k, const float *alpha, const float *A, 00166 const int *lda, const float *beta, 00167 float *C, const int *ldc); 00168 extern "C" void ssymm_(const char *side,const char *uplo, 00169 const int *m,const int *n, 00170 const float *alpha,const float *A,const int *lda, 00171 const float *B,const int *ldb, const float* beta, 00172 float *C,const int *ldc); 00173 extern "C" void spocon_(const char *uplo, const int *n, const float *A, 00174 const int *lda, const float *anorm, float *rcond, 00175 float *work, int *iwork, int *info); 00176 extern "C" void sstevx_(const char *jobz, const char *range, const int *n, 00177 float *d, float *e, const float *vl, 00178 const float *vu, const int *il, const int *iu, 00179 const float *abstol, int *m, float *w, float *z, 00180 const int *ldz, float *work, int *iwork, int *ifail, 00181 int *info); 00182 extern "C" void sstevr_(const char *jobz, const char *range, const int *n, 00183 float *d, float *e, const float *vl, 00184 const float *vu, const int *il, const int *iu, 00185 const float *abstol, int *m, float *w, float *z, 00186 const int *ldz, int* isuppz, float *work, int* lwork, 00187 int *iwork, int* liwork, int *info); 00188 extern "C" void ssyev_(const char *jobz, const char *uplo, const int *n, 00189 float *a, const int *lda, float *w, float *work, 00190 const int *lwork, int *info); 00191 00192 /* LEVEL 2 */ 00193 extern "C" void sgemv_(const char *ta, const int *m, const int *n, 00194 const float *alpha, const float *A, const int *lda, 00195 const float *x, const int *incx, const float *beta, 00196 float *y, const int *incy); 00197 extern "C" void ssymv_(const char *uplo, const int *n, 00198 const float *alpha, const float *A, const int *lda, 00199 const float *x, const int *incx, const float *beta, 00200 float *y, const int *incy); 00201 extern "C" void strmv_(const char *uplo, const char *trans, const char *diag, 00202 const int *n, const float *A, const int *lda, 00203 float *x, const int *incx); 00204 /* LEVEL 1 */ 00205 extern "C" void sscal_(const int* n,const float* da, float* dx, 00206 const int* incx); 00207 #if 0 00208 // sdot_ is unreliable because of varying return type in different 00209 // implementations. We therefore always use template dot for single precision 00210 extern "C" double sdot_(const int* n, const float* dx, const int* incx, 00211 const float* dy, const int* incy); 00212 #endif 00213 extern "C" void saxpy_(const int* n, const float* da, const float* dx, 00214 const int* incx, float* dy,const int* incy); 00215 00216 namespace mat 00217 { 00218 struct Gblas { 00219 static float time; 00220 static bool timekeeping; 00221 }; 00222 00223 /*************** Default version throws exception */ 00224 template<class T> 00225 inline static void gemm(const char *ta,const char *tb, 00226 const int *n, const int *k, const int *l, 00227 const T *alpha,const T *A,const int *lda, 00228 const T *B, const int *ldb, 00229 const T *beta,T *C, const int *ldc) { 00230 template_blas_gemm(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc); 00231 } 00232 00233 00234 /* Computes the Cholesky factorization of a symmetric * 00235 * positive definite matrix in packed storage. */ 00236 template<class T> 00237 inline static void pptrf(const char *uplo,const int *n, T* ap, int *info) { 00238 template_lapack_pptrf(uplo,n,ap,info); 00239 } 00240 00241 template<class T> 00242 inline static void spgst(const int *itype, const char *uplo,const int *n, 00243 T* ap,const T *bp,int *info) { 00244 template_lapack_spgst(itype,uplo,n,ap,bp,info); 00245 } 00246 00247 /* Computes the inverse of a triangular matrix in packed storage. */ 00248 template<class T> 00249 inline static void tptri(const char *uplo,const char *diag,const int *n, 00250 T* ap,int *info) { 00251 template_lapack_tptri(uplo,diag,n,ap,info); 00252 } 00253 00254 template<class T> 00255 inline static void trmm(const char *side,const char *uplo, 00256 const char *transa, const char *diag, 00257 const int *m,const int *n, 00258 const T *alpha,const T *A,const int *lda, 00259 T *B,const int *ldb) { 00260 template_blas_trmm(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb); 00261 } 00262 00263 /* Computes all eigenvalues and the eigenvectors of a generalized * 00264 * symmetric-definite generalized eigenproblem, * 00265 * Ax= lambda Bx, ABx= lambda x, or BAx= lambda x. */ 00266 template<class T> 00267 inline static void sygv(const int *itype,const char *jobz, 00268 const char *uplo,const int *n, 00269 T *A,const int *lda,T *B,const int *ldb, 00270 T* w,T* work,const int *lwork,int *info) { 00271 template_lapack_sygv(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info); 00272 } 00273 00274 template<class T> 00275 inline static void ggev(const char *jobbl, const char *jobvr, 00276 const int *n, T *A, const int *lda, 00277 T *B, const int *ldb, T *alphar, 00278 T *alphai, T *beta, T *vl, 00279 const int *ldvl, T *vr, const int *ldvr, 00280 T *work, const int *lwork, int *info) { 00281 template_lapack_ggev(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl, 00282 ldvl, vr, ldvr, work, lwork, info); 00283 } 00284 00285 /* Computes the Cholesky factorization of a symmetric * 00286 * positive definite matrix in packed storage. */ 00287 template<class T> 00288 inline static void potrf(const char *uplo, const int *n, T *A, 00289 const int *lda, int *info) { 00290 template_lapack_potrf(uplo, n, A, lda, info); 00291 } 00292 00293 /* Computes the inverse of a triangular matrix. */ 00294 template<class T> 00295 inline static void trtri(const char *uplo,const char *diag,const int *n, 00296 T *A, const int *lda, int *info) { 00297 // Create copies of strings because they cannot be const inside trtri. 00298 char uploCopy[2]; 00299 char diagCopy[2]; 00300 uploCopy[0] = uplo[0]; 00301 uploCopy[1] = '\0'; 00302 diagCopy[0] = diag[0]; 00303 diagCopy[1] = '\0'; 00304 template_lapack_trtri(uploCopy, diagCopy, n, A, lda, info); 00305 } 00306 00307 template<class T> 00308 inline static void syrk(const char *uplo, const char *trans, const int *n, 00309 const int *k, const T *alpha, const T *A, 00310 const int *lda, const T *beta, 00311 T *C, const int *ldc) { 00312 template_blas_syrk(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); 00313 } 00314 00315 template<class T> 00316 inline static void symm(const char *side,const char *uplo, 00317 const int *m,const int *n, 00318 const T *alpha,const T *A,const int *lda, 00319 const T *B,const int *ldb, const T* beta, 00320 T *C,const int *ldc) { 00321 template_blas_symm(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); 00322 } 00323 00324 template<class T> 00325 inline static void pocon(const char *uplo, const int *n, const T *A, 00326 const int *lda, const T *anorm, T *rcond, 00327 T *work, int *iwork, int *info) { 00328 template_lapack_pocon(uplo, n, A, lda, anorm, rcond, work, iwork, info); 00329 } 00330 00331 template<class T> 00332 inline static void stevx(const char *jobz, const char *range, 00333 const int *n, T *d, T *e, const T *vl, 00334 const T *vu, const int *il, const int *iu, 00335 const T *abstol, int *m, T *w, T *z, 00336 const int *ldz, T *work, int *iwork, int *ifail, 00337 int *info) { 00338 template_lapack_stevx(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz, 00339 work, iwork, ifail, info); 00340 } 00341 00342 template<class T> 00343 inline static void stevr(const char *jobz, const char *range, const int *n, 00344 T *d, T *e, const T *vl, 00345 const T *vu, const int *il, const int *iu, 00346 const T *abstol, int *m, T *w, T *z, 00347 const int *ldz, int* isuppz, T *work, int* lwork, 00348 int *iwork, int* liwork, int *info) { 00349 template_lapack_stevr(jobz, range, n, d, e, vl, vu, il, iu, abstol, 00350 m, w, z, ldz, isuppz, 00351 work, lwork, iwork, liwork, info); 00352 } 00353 00354 00355 template<class T> 00356 inline static void syev(const char *jobz, const char *uplo, const int *n, 00357 T *a, const int *lda, T *w, T *work, 00358 const int *lwork, int *info) { 00359 template_lapack_syev(jobz, uplo, n, a, lda, w, work, lwork, info); 00360 } 00361 00362 00363 /* LEVEL 2 */ 00364 template<class T> 00365 inline static void gemv(const char *ta, const int *m, const int *n, 00366 const T *alpha, const T *A, 00367 const int *lda, 00368 const T *x, const int *incx, 00369 const T *beta, T *y, const int *incy) { 00370 template_blas_gemv(ta, m, n, alpha, A, lda, x, incx, beta, y, incy); 00371 } 00372 00373 template<class T> 00374 inline static void symv(const char *uplo, const int *n, 00375 const T *alpha, const T *A, 00376 const int *lda, const T *x, 00377 const int *incx, const T *beta, 00378 T *y, const int *incy) { 00379 template_blas_symv(uplo, n, alpha, A, lda, x, incx, beta, y, incy); 00380 } 00381 00382 template<class T> 00383 inline static void trmv(const char *uplo, const char *trans, 00384 const char *diag, const int *n, 00385 const T *A, const int *lda, 00386 T *x, const int *incx) { 00387 template_blas_trmv(uplo, trans, diag, n, A, lda, x, incx); 00388 } 00389 00390 00391 /* LEVEL 1 */ 00392 template<class T> 00393 inline static void scal(const int* n,const T* da, T* dx, 00394 const int* incx) { 00395 template_blas_scal(n, da, dx, incx); 00396 } 00397 00398 template<class T> 00399 inline static T dot(const int* n, const T* dx, const int* incx, 00400 const T* dy, const int* incy) { 00401 return template_blas_dot(n, dx, incx, dy, incy); 00402 } 00403 00404 template<class T> 00405 inline static void axpy(const int* n, const T* da, const T* dx, 00406 const int* incx, T* dy,const int* incy) { 00407 template_blas_axpy(n, da, dx, incx, dy, incy); 00408 } 00409 00410 00411 00412 00413 /* Below follows specializations for double, single, etc. 00414 These specializations are not needed if template_blas and template_lapack are used, 00415 so in that case we skip this entire section. */ 00416 #ifndef USE_LINALG_TEMPLATES 00417 00418 00419 /*************** Double specialization */ 00420 template<> 00421 inline void gemm<double>(const char *ta,const char *tb, 00422 const int *n, const int *k, const int *l, 00423 const double *alpha, 00424 const double *A,const int *lda, 00425 const double *B, const int *ldb, 00426 const double *beta, 00427 double *C, const int *ldc) { 00428 if (Gblas::timekeeping) { 00429 clock_t start = clock(); 00430 dgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc); 00431 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00432 } 00433 else { 00434 dgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc); 00435 } 00436 } 00437 00438 template<> 00439 inline void pptrf<double>(const char *uplo,const int *n, 00440 double* ap, int *info) { 00441 if (Gblas::timekeeping) { 00442 clock_t start = clock(); 00443 dpptrf_(uplo,n,ap,info); 00444 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00445 } 00446 else { 00447 dpptrf_(uplo,n,ap,info); 00448 } 00449 } 00450 00451 template<> 00452 inline void spgst<double>(const int *itype, const char *uplo, 00453 const int *n, 00454 double* ap,const double *bp,int *info) { 00455 if (Gblas::timekeeping) { 00456 clock_t start = clock(); 00457 dspgst_(itype,uplo,n,ap,bp,info); 00458 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00459 } 00460 else { 00461 dspgst_(itype,uplo,n,ap,bp,info); 00462 } 00463 } 00464 00465 template<> 00466 inline void tptri<double>(const char *uplo,const char *diag,const int *n, 00467 double* ap,int *info) { 00468 if (Gblas::timekeeping) { 00469 clock_t start = clock(); 00470 dtptri_(uplo,diag,n,ap,info); 00471 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00472 } 00473 else { 00474 dtptri_(uplo,diag,n,ap,info); 00475 } 00476 } 00477 00478 template<> 00479 inline void trmm<double>(const char *side,const char *uplo, 00480 const char *transa, 00481 const char *diag,const int *m,const int *n, 00482 const double *alpha, 00483 const double *A,const int *lda, 00484 double *B,const int *ldb) { 00485 if (Gblas::timekeeping) { 00486 clock_t start = clock(); 00487 dtrmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb); 00488 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00489 } 00490 else { 00491 dtrmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb); 00492 } 00493 } 00494 00495 template<> 00496 inline void sygv<double>(const int *itype,const char *jobz, 00497 const char *uplo,const int *n, 00498 double *A,const int *lda, 00499 double *B,const int *ldb, 00500 double* w,double* work, 00501 const int *lwork,int *info) { 00502 if (Gblas::timekeeping) { 00503 clock_t start = clock(); 00504 dsygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info); 00505 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00506 } 00507 else { 00508 dsygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info); 00509 } 00510 } 00511 00512 template<> 00513 inline void ggev<double>(const char *jobbl, const char *jobvr, 00514 const int *n, double *A, const int *lda, 00515 double *B, const int *ldb, double *alphar, 00516 double *alphai, double *beta, double *vl, 00517 const int *ldvl, double *vr, const int *ldvr, 00518 double *work, const int *lwork, int *info) { 00519 if (Gblas::timekeeping) { 00520 clock_t start = clock(); 00521 dggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl, 00522 ldvl, vr, ldvr, work, lwork, info); 00523 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00524 } 00525 else { 00526 dggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl, 00527 ldvl, vr, ldvr, work, lwork, info); 00528 } 00529 } 00530 00531 00532 template<> 00533 inline void potrf<double>(const char *uplo, const int *n, double *A, 00534 const int *lda, int *info) { 00535 if (Gblas::timekeeping) { 00536 clock_t start = clock(); 00537 dpotrf_(uplo, n, A, lda, info); 00538 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00539 } 00540 else { 00541 dpotrf_(uplo, n, A, lda, info); 00542 } 00543 } 00544 00545 template<> 00546 inline void trtri<double>(const char *uplo,const char *diag,const int *n, 00547 double *A, const int *lda, int *info) { 00548 if (Gblas::timekeeping) { 00549 clock_t start = clock(); 00550 dtrtri_(uplo, diag, n, A, lda, info); 00551 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00552 } 00553 else { 00554 dtrtri_(uplo, diag, n, A, lda, info); 00555 } 00556 } 00557 00558 template<> 00559 inline void syrk<double>(const char *uplo, const char *trans, 00560 const int *n, const int *k, const double *alpha, 00561 const double *A, const int *lda, 00562 const double *beta, double *C, const int *ldc) { 00563 if (Gblas::timekeeping) { 00564 clock_t start = clock(); 00565 dsyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); 00566 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00567 } 00568 else { 00569 dsyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); 00570 } 00571 } 00572 00573 template<> 00574 inline void symm<double>(const char *side,const char *uplo, 00575 const int *m,const int *n, const double *alpha, 00576 const double *A,const int *lda, 00577 const double *B,const int *ldb, 00578 const double* beta, 00579 double *C,const int *ldc) { 00580 if (Gblas::timekeeping) { 00581 clock_t start = clock(); 00582 dsymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); 00583 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00584 } 00585 else { 00586 dsymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); 00587 } 00588 } 00589 00590 template<> 00591 inline void pocon<double>(const char *uplo, const int *n, 00592 const double *A, const int *lda, 00593 const double *anorm, double *rcond, 00594 double *work, int *iwork, int *info) { 00595 if (Gblas::timekeeping) { 00596 clock_t start = clock(); 00597 dpocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info); 00598 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00599 } 00600 else { 00601 dpocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info); 00602 } 00603 } 00604 00605 template<> 00606 inline void stevx<double>(const char *jobz, const char *range, 00607 const int *n, double *d, double *e, 00608 const double *vl, 00609 const double *vu, const int *il, const int *iu, 00610 const double *abstol, int *m, double *w, 00611 double *z, 00612 const int *ldz, double *work, int *iwork, 00613 int *ifail, int *info) { 00614 if (Gblas::timekeeping) { 00615 clock_t start = clock(); 00616 dstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz, 00617 work, iwork, ifail, info); 00618 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00619 } 00620 else { 00621 dstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz, 00622 work, iwork, ifail, info); 00623 } 00624 } 00625 00626 template<> 00627 inline void stevr<double>(const char *jobz, const char *range, 00628 const int *n, double *d, double *e, 00629 const double *vl, const double *vu, 00630 const int *il, const int *iu, 00631 const double *abstol, 00632 int *m, double *w, 00633 double *z, const int *ldz, int* isuppz, 00634 double *work, int* lwork, 00635 int *iwork, int* liwork, int *info) { 00636 if (Gblas::timekeeping) { 00637 clock_t start = clock(); 00638 dstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol, 00639 m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info); 00640 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00641 } 00642 else { 00643 dstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol, 00644 m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info); 00645 } 00646 } 00647 00648 00649 00650 template<> 00651 inline void syev<double>(const char *jobz, const char *uplo, const int *n, 00652 double *a, const int *lda, double *w, 00653 double *work, const int *lwork, int *info) { 00654 if (Gblas::timekeeping) { 00655 clock_t start = clock(); 00656 dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info); 00657 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00658 } 00659 else { 00660 dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info); 00661 } 00662 } 00663 00664 00665 /* LEVEL 2 */ 00666 template<> 00667 inline void gemv<double>(const char *ta, const int *m, const int *n, 00668 const double *alpha, const double *A, 00669 const int *lda, 00670 const double *x, const int *incx, 00671 const double *beta, double *y, const int *incy) { 00672 if (Gblas::timekeeping) { 00673 clock_t start = clock(); 00674 dgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy); 00675 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00676 } 00677 else { 00678 dgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy); 00679 } 00680 } 00681 00682 template<> 00683 inline void symv<double>(const char *uplo, const int *n, 00684 const double *alpha, const double *A, 00685 const int *lda, const double *x, 00686 const int *incx, const double *beta, 00687 double *y, const int *incy) { 00688 if (Gblas::timekeeping) { 00689 clock_t start = clock(); 00690 dsymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy); 00691 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00692 } 00693 else { 00694 dsymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy); 00695 } 00696 } 00697 00698 template<> 00699 inline void trmv<double>(const char *uplo, const char *trans, 00700 const char *diag, const int *n, 00701 const double *A, const int *lda, 00702 double *x, const int *incx) { 00703 if (Gblas::timekeeping) { 00704 clock_t start = clock(); 00705 dtrmv_(uplo, trans, diag, n, A, lda, x, incx); 00706 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00707 } 00708 else { 00709 dtrmv_(uplo, trans, diag, n, A, lda, x, incx); 00710 } 00711 } 00712 00713 00714 /* LEVEL 1 */ 00715 template<> 00716 inline void scal<double>(const int* n,const double* da, double* dx, 00717 const int* incx) { 00718 if (Gblas::timekeeping) { 00719 clock_t start = clock(); 00720 dscal_(n, da, dx, incx); 00721 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00722 } 00723 else { 00724 dscal_(n, da, dx, incx); 00725 } 00726 } 00727 00728 template<> 00729 inline double dot<double>(const int* n, const double* dx, const int* incx, 00730 const double* dy, const int* incy) { 00731 double tmp = 0; 00732 if (Gblas::timekeeping) { 00733 clock_t start = clock(); 00734 tmp = ddot_(n, dx, incx, dy, incy); 00735 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00736 } 00737 else { 00738 tmp = ddot_(n, dx, incx, dy, incy); 00739 } 00740 return tmp; 00741 } 00742 00743 template<> 00744 inline void axpy<double>(const int* n, const double* da, const double* dx, 00745 const int* incx, double* dy,const int* incy) { 00746 if (Gblas::timekeeping) { 00747 clock_t start = clock(); 00748 daxpy_(n, da, dx, incx, dy, incy); 00749 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00750 } 00751 else { 00752 daxpy_(n, da, dx, incx, dy, incy); 00753 } 00754 } 00755 00756 00757 /*************** Single specialization */ 00758 template<> 00759 inline void gemm<float>(const char *ta,const char *tb, 00760 const int *n, const int *k, const int *l, 00761 const float *alpha, 00762 const float *A,const int *lda, 00763 const float *B, const int *ldb, 00764 const float *beta, 00765 float *C, const int *ldc) { 00766 if (Gblas::timekeeping) { 00767 clock_t start = clock(); 00768 sgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc); 00769 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00770 } 00771 else { 00772 sgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc); 00773 } 00774 } 00775 00776 template<> 00777 inline void pptrf<float>(const char *uplo,const int *n, 00778 float* ap, int *info) { 00779 if (Gblas::timekeeping) { 00780 clock_t start = clock(); 00781 spptrf_(uplo,n,ap,info); 00782 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00783 } 00784 else { 00785 spptrf_(uplo,n,ap,info); 00786 } 00787 } 00788 00789 template<> 00790 inline void spgst<float>(const int *itype, const char *uplo, 00791 const int *n, 00792 float* ap,const float *bp,int *info) { 00793 if (Gblas::timekeeping) { 00794 clock_t start = clock(); 00795 sspgst_(itype,uplo,n,ap,bp,info); 00796 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00797 } 00798 else { 00799 sspgst_(itype,uplo,n,ap,bp,info); 00800 } 00801 } 00802 00803 template<> 00804 inline void tptri<float>(const char *uplo,const char *diag, 00805 const int *n, 00806 float* ap,int *info) { 00807 if (Gblas::timekeeping) { 00808 clock_t start = clock(); 00809 stptri_(uplo,diag,n,ap,info); 00810 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00811 } 00812 else { 00813 stptri_(uplo,diag,n,ap,info); 00814 } 00815 } 00816 00817 template<> 00818 inline void trmm<float>(const char *side,const char *uplo, 00819 const char *transa, 00820 const char *diag,const int *m,const int *n, 00821 const float *alpha, 00822 const float *A,const int *lda, 00823 float *B,const int *ldb) { 00824 if (Gblas::timekeeping) { 00825 clock_t start = clock(); 00826 strmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb); 00827 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00828 } 00829 else { 00830 strmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb); 00831 } 00832 } 00833 00834 template<> 00835 inline void sygv<float>(const int *itype,const char *jobz, 00836 const char *uplo,const int *n, 00837 float *A,const int *lda, 00838 float *B,const int *ldb, 00839 float* w,float* work, 00840 const int *lwork,int *info) { 00841 if (Gblas::timekeeping) { 00842 clock_t start = clock(); 00843 ssygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info); 00844 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00845 } 00846 else { 00847 ssygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info); 00848 } 00849 } 00850 00851 template<> 00852 inline void ggev<float>(const char *jobbl, const char *jobvr, 00853 const int *n, float *A, const int *lda, 00854 float *B, const int *ldb, float *alphar, 00855 float *alphai, float *beta, float *vl, 00856 const int *ldvl, float *vr, const int *ldvr, 00857 float *work, const int *lwork, int *info) { 00858 if (Gblas::timekeeping) { 00859 clock_t start = clock(); 00860 sggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl, 00861 ldvl, vr, ldvr, work, lwork, info); 00862 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00863 } 00864 else { 00865 sggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl, 00866 ldvl, vr, ldvr, work, lwork, info); 00867 } 00868 } 00869 00870 00871 template<> 00872 inline void potrf<float>(const char *uplo, const int *n, float *A, 00873 const int *lda, int *info) { 00874 if (Gblas::timekeeping) { 00875 clock_t start = clock(); 00876 spotrf_(uplo, n, A, lda, info); 00877 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00878 } 00879 else { 00880 spotrf_(uplo, n, A, lda, info); 00881 } 00882 } 00883 00884 template<> 00885 inline void trtri<float>(const char *uplo,const char *diag,const int *n, 00886 float *A, const int *lda, int *info) { 00887 if (Gblas::timekeeping) { 00888 clock_t start = clock(); 00889 strtri_(uplo, diag, n, A, lda, info); 00890 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00891 } 00892 else { 00893 strtri_(uplo, diag, n, A, lda, info); 00894 } 00895 } 00896 00897 template<> 00898 inline void syrk<float>(const char *uplo, const char *trans, 00899 const int *n, const int *k, const float *alpha, 00900 const float *A, const int *lda, 00901 const float *beta, float *C, const int *ldc) { 00902 if (Gblas::timekeeping) { 00903 clock_t start = clock(); 00904 ssyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); 00905 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00906 } 00907 else { 00908 ssyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); 00909 } 00910 } 00911 00912 template<> 00913 inline void symm<float>(const char *side,const char *uplo, 00914 const int *m,const int *n, const float *alpha, 00915 const float *A,const int *lda, 00916 const float *B,const int *ldb, 00917 const float* beta, 00918 float *C,const int *ldc) { 00919 if (Gblas::timekeeping) { 00920 clock_t start = clock(); 00921 ssymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); 00922 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00923 } 00924 else { 00925 ssymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); 00926 } 00927 } 00928 00929 template<> 00930 inline void pocon<float>(const char *uplo, const int *n, 00931 const float *A, const int *lda, 00932 const float *anorm, float *rcond, 00933 float *work, int *iwork, int *info) { 00934 if (Gblas::timekeeping) { 00935 clock_t start = clock(); 00936 spocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info); 00937 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00938 } 00939 else { 00940 spocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info); 00941 } 00942 } 00943 00944 template<> 00945 inline void stevx<float>(const char *jobz, const char *range, 00946 const int *n, float *d, float *e, 00947 const float *vl, 00948 const float *vu, const int *il, const int *iu, 00949 const float *abstol, int *m, float *w, 00950 float *z, 00951 const int *ldz, float *work, int *iwork, 00952 int *ifail, int *info) { 00953 if (Gblas::timekeeping) { 00954 clock_t start = clock(); 00955 sstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz, 00956 work, iwork, ifail, info); 00957 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00958 } 00959 else { 00960 sstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz, 00961 work, iwork, ifail, info); 00962 } 00963 } 00964 00965 template<> 00966 inline void stevr<float>(const char *jobz, const char *range, 00967 const int *n, float *d, float *e, 00968 const float *vl, const float *vu, 00969 const int *il, const int *iu, 00970 const float *abstol, 00971 int *m, float *w, 00972 float *z, const int *ldz, int* isuppz, 00973 float *work, int* lwork, 00974 int *iwork, int* liwork, int *info) { 00975 if (Gblas::timekeeping) { 00976 clock_t start = clock(); 00977 sstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol, 00978 m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info); 00979 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00980 } 00981 else { 00982 sstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol, 00983 m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info); 00984 } 00985 } 00986 00987 template<> 00988 inline void syev<float>(const char *jobz, const char *uplo, const int *n, 00989 float *a, const int *lda, float *w, 00990 float *work, const int *lwork, int *info) { 00991 if (Gblas::timekeeping) { 00992 clock_t start = clock(); 00993 ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info); 00994 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 00995 } 00996 else { 00997 ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info); 00998 } 00999 } 01000 01001 01002 /* LEVEL 2 */ 01003 template<> 01004 inline void gemv<float>(const char *ta, const int *m, const int *n, 01005 const float *alpha, const float *A, 01006 const int *lda, 01007 const float *x, const int *incx, 01008 const float *beta, float *y, const int *incy) { 01009 if (Gblas::timekeeping) { 01010 clock_t start = clock(); 01011 sgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy); 01012 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 01013 } 01014 else { 01015 sgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy); 01016 } 01017 } 01018 01019 template<> 01020 inline void symv<float>(const char *uplo, const int *n, 01021 const float *alpha, const float *A, 01022 const int *lda, const float *x, 01023 const int *incx, const float *beta, 01024 float *y, const int *incy) { 01025 if (Gblas::timekeeping) { 01026 clock_t start = clock(); 01027 ssymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy); 01028 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 01029 } 01030 else { 01031 ssymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy); 01032 } 01033 } 01034 01035 template<> 01036 inline void trmv<float>(const char *uplo, const char *trans, 01037 const char *diag, const int *n, 01038 const float *A, const int *lda, 01039 float *x, const int *incx) { 01040 if (Gblas::timekeeping) { 01041 clock_t start = clock(); 01042 strmv_(uplo, trans, diag, n, A, lda, x, incx); 01043 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 01044 } 01045 else { 01046 strmv_(uplo, trans, diag, n, A, lda, x, incx); 01047 } 01048 } 01049 01050 /* LEVEL 1 */ 01051 template<> 01052 inline void scal<float>(const int* n,const float* da, float* dx, 01053 const int* incx) { 01054 if (Gblas::timekeeping) { 01055 clock_t start = clock(); 01056 sscal_(n, da, dx, incx); 01057 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 01058 } 01059 else { 01060 sscal_(n, da, dx, incx); 01061 } 01062 } 01063 #if 0 01064 // Sdot has different return type in different BLAS implementations 01065 // Therefore the specialization for single precision is removed 01066 template<> 01067 inline float dot<float>(const int* n, const float* dx, const int* incx, 01068 const float* dy, const int* incy) { 01069 float tmp; 01070 if (Gblas::timekeeping) { 01071 clock_t start = clock(); 01072 sdot_(n, dx, incx, dy, incy); 01073 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 01074 } 01075 else { 01076 sdot_(n, dx, incx, dy, incy); 01077 } 01078 return tmp; 01079 } 01080 #endif 01081 01082 template<> 01083 inline void axpy<float>(const int* n, const float* da, const float* dx, 01084 const int* incx, float* dy,const int* incy) { 01085 if (Gblas::timekeeping) { 01086 clock_t start = clock(); 01087 saxpy_(n, da, dx, incx, dy, incy); 01088 Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC); 01089 } 01090 else { 01091 saxpy_(n, da, dx, incx, dy, incy); 01092 } 01093 } 01094 01095 /* END OF SPECIALIZATIONS */ 01096 #endif 01097 01098 01099 01100 01101 01102 01103 01104 01105 01106 /* Other */ 01107 01108 template<class Treal> 01109 static void fulltopacked(const Treal* full, Treal* packed, const int size){ 01110 int pind=0; 01111 for (int col=0;col<size;col++) 01112 { 01113 for(int row=0;row<=col;row++) 01114 { 01115 packed[pind]=full[col*size+row]; 01116 pind++; 01117 } 01118 } 01119 } 01120 01121 template<class Treal> 01122 static void packedtofull(const Treal* packed, Treal* full, const int size){ 01123 int psize=(size+1)*size/2; 01124 int col=0; 01125 int row=0; 01126 for(int pind=0;pind<psize;pind++) 01127 { 01128 if (col==row) 01129 { 01130 full[col*size+row]=packed[pind]; 01131 col++; 01132 row=0; 01133 } 01134 else 01135 { 01136 full[col*size+row]=packed[pind]; 01137 full[row*size+col]=packed[pind]; 01138 row++; 01139 } 01140 } 01141 } 01142 01143 template<class Treal> 01144 static void tripackedtofull(const Treal* packed,Treal* full, 01145 const int size) { 01146 int psize=(size+1)*size/2; 01147 int col=0; 01148 int row=0; 01149 for(int pind=0;pind<psize;pind++) 01150 { 01151 if (col==row) 01152 { 01153 full[col*size+row]=packed[pind]; 01154 col++; 01155 row=0; 01156 } 01157 else 01158 { 01159 full[col*size+row]=packed[pind]; 01160 full[row*size+col]=0; 01161 row++; 01162 } 01163 } 01164 } 01165 01166 template<class Treal> 01167 static void trifulltofull(Treal* trifull, const int size) { 01168 for(int col = 0; col < size - 1; col++) 01169 for(int row = col + 1; row < size; row++) 01170 trifull[col * size + row] = 0; 01171 } 01172 01173 01174 } /* namespace mat */ 01175 01176 #endif /* GBLAS */