ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_BLAS_SYR2K_HEADER 00036 #define TEMPLATE_BLAS_SYR2K_HEADER 00037 00038 00039 template<class Treal> 00040 int template_blas_syr2k(const char *uplo, const char *trans, const integer *n, 00041 const integer *k, const Treal *alpha, const Treal *a, 00042 const integer *lda, const Treal *b, const integer *ldb, 00043 const Treal *beta, Treal *c__, const integer *ldc) 00044 { 00045 /* System generated locals */ 00046 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 00047 i__3; 00048 /* Local variables */ 00049 integer info; 00050 Treal temp1, temp2; 00051 integer i__, j, l; 00052 integer nrowa; 00053 logical upper; 00054 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00055 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] 00056 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] 00057 /* Purpose 00058 ======= 00059 DSYR2K performs one of the symmetric rank 2k operations 00060 C := alpha*A*B' + alpha*B*A' + beta*C, 00061 or 00062 C := alpha*A'*B + alpha*B'*A + beta*C, 00063 where alpha and beta are scalars, C is an n by n symmetric matrix 00064 and A and B are n by k matrices in the first case and k by n 00065 matrices in the second case. 00066 Parameters 00067 ========== 00068 UPLO - CHARACTER*1. 00069 On entry, UPLO specifies whether the upper or lower 00070 triangular part of the array C is to be referenced as 00071 follows: 00072 UPLO = 'U' or 'u' Only the upper triangular part of C 00073 is to be referenced. 00074 UPLO = 'L' or 'l' Only the lower triangular part of C 00075 is to be referenced. 00076 Unchanged on exit. 00077 TRANS - CHARACTER*1. 00078 On entry, TRANS specifies the operation to be performed as 00079 follows: 00080 TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + 00081 beta*C. 00082 TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + 00083 beta*C. 00084 TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + 00085 beta*C. 00086 Unchanged on exit. 00087 N - INTEGER. 00088 On entry, N specifies the order of the matrix C. N must be 00089 at least zero. 00090 Unchanged on exit. 00091 K - INTEGER. 00092 On entry with TRANS = 'N' or 'n', K specifies the number 00093 of columns of the matrices A and B, and on entry with 00094 TRANS = 'T' or 't' or 'C' or 'c', K specifies the number 00095 of rows of the matrices A and B. K must be at least zero. 00096 Unchanged on exit. 00097 ALPHA - DOUBLE PRECISION. 00098 On entry, ALPHA specifies the scalar alpha. 00099 Unchanged on exit. 00100 A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is 00101 k when TRANS = 'N' or 'n', and is n otherwise. 00102 Before entry with TRANS = 'N' or 'n', the leading n by k 00103 part of the array A must contain the matrix A, otherwise 00104 the leading k by n part of the array A must contain the 00105 matrix A. 00106 Unchanged on exit. 00107 LDA - INTEGER. 00108 On entry, LDA specifies the first dimension of A as declared 00109 in the calling (sub) program. When TRANS = 'N' or 'n' 00110 then LDA must be at least max( 1, n ), otherwise LDA must 00111 be at least max( 1, k ). 00112 Unchanged on exit. 00113 B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is 00114 k when TRANS = 'N' or 'n', and is n otherwise. 00115 Before entry with TRANS = 'N' or 'n', the leading n by k 00116 part of the array B must contain the matrix B, otherwise 00117 the leading k by n part of the array B must contain the 00118 matrix B. 00119 Unchanged on exit. 00120 LDB - INTEGER. 00121 On entry, LDB specifies the first dimension of B as declared 00122 in the calling (sub) program. When TRANS = 'N' or 'n' 00123 then LDB must be at least max( 1, n ), otherwise LDB must 00124 be at least max( 1, k ). 00125 Unchanged on exit. 00126 BETA - DOUBLE PRECISION. 00127 On entry, BETA specifies the scalar beta. 00128 Unchanged on exit. 00129 C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). 00130 Before entry with UPLO = 'U' or 'u', the leading n by n 00131 upper triangular part of the array C must contain the upper 00132 triangular part of the symmetric matrix and the strictly 00133 lower triangular part of C is not referenced. On exit, the 00134 upper triangular part of the array C is overwritten by the 00135 upper triangular part of the updated matrix. 00136 Before entry with UPLO = 'L' or 'l', the leading n by n 00137 lower triangular part of the array C must contain the lower 00138 triangular part of the symmetric matrix and the strictly 00139 upper triangular part of C is not referenced. On exit, the 00140 lower triangular part of the array C is overwritten by the 00141 lower triangular part of the updated matrix. 00142 LDC - INTEGER. 00143 On entry, LDC specifies the first dimension of C as declared 00144 in the calling (sub) program. LDC must be at least 00145 max( 1, n ). 00146 Unchanged on exit. 00147 Level 3 Blas routine. 00148 -- Written on 8-February-1989. 00149 Jack Dongarra, Argonne National Laboratory. 00150 Iain Duff, AERE Harwell. 00151 Jeremy Du Croz, Numerical Algorithms Group Ltd. 00152 Sven Hammarling, Numerical Algorithms Group Ltd. 00153 Test the input parameters. 00154 Parameter adjustments */ 00155 a_dim1 = *lda; 00156 a_offset = 1 + a_dim1 * 1; 00157 a -= a_offset; 00158 b_dim1 = *ldb; 00159 b_offset = 1 + b_dim1 * 1; 00160 b -= b_offset; 00161 c_dim1 = *ldc; 00162 c_offset = 1 + c_dim1 * 1; 00163 c__ -= c_offset; 00164 /* Function Body */ 00165 if (template_blas_lsame(trans, "N")) { 00166 nrowa = *n; 00167 } else { 00168 nrowa = *k; 00169 } 00170 upper = template_blas_lsame(uplo, "U"); 00171 info = 0; 00172 if (! upper && ! template_blas_lsame(uplo, "L")) { 00173 info = 1; 00174 } else if (! template_blas_lsame(trans, "N") && ! template_blas_lsame(trans, 00175 "T") && ! template_blas_lsame(trans, "C")) { 00176 info = 2; 00177 } else if (*n < 0) { 00178 info = 3; 00179 } else if (*k < 0) { 00180 info = 4; 00181 } else if (*lda < maxMACRO(1,nrowa)) { 00182 info = 7; 00183 } else if (*ldb < maxMACRO(1,nrowa)) { 00184 info = 9; 00185 } else if (*ldc < maxMACRO(1,*n)) { 00186 info = 12; 00187 } 00188 if (info != 0) { 00189 template_blas_erbla("SYR2K ", &info); 00190 return 0; 00191 } 00192 /* Quick return if possible. */ 00193 if (*n == 0 || ( (*alpha == 0. || *k == 0) && *beta == 1. ) ) { 00194 return 0; 00195 } 00196 /* And when alpha.eq.zero. */ 00197 if (*alpha == 0.) { 00198 if (upper) { 00199 if (*beta == 0.) { 00200 i__1 = *n; 00201 for (j = 1; j <= i__1; ++j) { 00202 i__2 = j; 00203 for (i__ = 1; i__ <= i__2; ++i__) { 00204 c___ref(i__, j) = 0.; 00205 /* L10: */ 00206 } 00207 /* L20: */ 00208 } 00209 } else { 00210 i__1 = *n; 00211 for (j = 1; j <= i__1; ++j) { 00212 i__2 = j; 00213 for (i__ = 1; i__ <= i__2; ++i__) { 00214 c___ref(i__, j) = *beta * c___ref(i__, j); 00215 /* L30: */ 00216 } 00217 /* L40: */ 00218 } 00219 } 00220 } else { 00221 if (*beta == 0.) { 00222 i__1 = *n; 00223 for (j = 1; j <= i__1; ++j) { 00224 i__2 = *n; 00225 for (i__ = j; i__ <= i__2; ++i__) { 00226 c___ref(i__, j) = 0.; 00227 /* L50: */ 00228 } 00229 /* L60: */ 00230 } 00231 } else { 00232 i__1 = *n; 00233 for (j = 1; j <= i__1; ++j) { 00234 i__2 = *n; 00235 for (i__ = j; i__ <= i__2; ++i__) { 00236 c___ref(i__, j) = *beta * c___ref(i__, j); 00237 /* L70: */ 00238 } 00239 /* L80: */ 00240 } 00241 } 00242 } 00243 return 0; 00244 } 00245 /* Start the operations. */ 00246 if (template_blas_lsame(trans, "N")) { 00247 /* Form C := alpha*A*B' + alpha*B*A' + C. */ 00248 if (upper) { 00249 i__1 = *n; 00250 for (j = 1; j <= i__1; ++j) { 00251 if (*beta == 0.) { 00252 i__2 = j; 00253 for (i__ = 1; i__ <= i__2; ++i__) { 00254 c___ref(i__, j) = 0.; 00255 /* L90: */ 00256 } 00257 } else if (*beta != 1.) { 00258 i__2 = j; 00259 for (i__ = 1; i__ <= i__2; ++i__) { 00260 c___ref(i__, j) = *beta * c___ref(i__, j); 00261 /* L100: */ 00262 } 00263 } 00264 i__2 = *k; 00265 for (l = 1; l <= i__2; ++l) { 00266 if (a_ref(j, l) != 0. || b_ref(j, l) != 0.) { 00267 temp1 = *alpha * b_ref(j, l); 00268 temp2 = *alpha * a_ref(j, l); 00269 i__3 = j; 00270 for (i__ = 1; i__ <= i__3; ++i__) { 00271 c___ref(i__, j) = c___ref(i__, j) + a_ref(i__, l) 00272 * temp1 + b_ref(i__, l) * temp2; 00273 /* L110: */ 00274 } 00275 } 00276 /* L120: */ 00277 } 00278 /* L130: */ 00279 } 00280 } else { 00281 i__1 = *n; 00282 for (j = 1; j <= i__1; ++j) { 00283 if (*beta == 0.) { 00284 i__2 = *n; 00285 for (i__ = j; i__ <= i__2; ++i__) { 00286 c___ref(i__, j) = 0.; 00287 /* L140: */ 00288 } 00289 } else if (*beta != 1.) { 00290 i__2 = *n; 00291 for (i__ = j; i__ <= i__2; ++i__) { 00292 c___ref(i__, j) = *beta * c___ref(i__, j); 00293 /* L150: */ 00294 } 00295 } 00296 i__2 = *k; 00297 for (l = 1; l <= i__2; ++l) { 00298 if (a_ref(j, l) != 0. || b_ref(j, l) != 0.) { 00299 temp1 = *alpha * b_ref(j, l); 00300 temp2 = *alpha * a_ref(j, l); 00301 i__3 = *n; 00302 for (i__ = j; i__ <= i__3; ++i__) { 00303 c___ref(i__, j) = c___ref(i__, j) + a_ref(i__, l) 00304 * temp1 + b_ref(i__, l) * temp2; 00305 /* L160: */ 00306 } 00307 } 00308 /* L170: */ 00309 } 00310 /* L180: */ 00311 } 00312 } 00313 } else { 00314 /* Form C := alpha*A'*B + alpha*B'*A + C. */ 00315 if (upper) { 00316 i__1 = *n; 00317 for (j = 1; j <= i__1; ++j) { 00318 i__2 = j; 00319 for (i__ = 1; i__ <= i__2; ++i__) { 00320 temp1 = 0.; 00321 temp2 = 0.; 00322 i__3 = *k; 00323 for (l = 1; l <= i__3; ++l) { 00324 temp1 += a_ref(l, i__) * b_ref(l, j); 00325 temp2 += b_ref(l, i__) * a_ref(l, j); 00326 /* L190: */ 00327 } 00328 if (*beta == 0.) { 00329 c___ref(i__, j) = *alpha * temp1 + *alpha * temp2; 00330 } else { 00331 c___ref(i__, j) = *beta * c___ref(i__, j) + *alpha * 00332 temp1 + *alpha * temp2; 00333 } 00334 /* L200: */ 00335 } 00336 /* L210: */ 00337 } 00338 } else { 00339 i__1 = *n; 00340 for (j = 1; j <= i__1; ++j) { 00341 i__2 = *n; 00342 for (i__ = j; i__ <= i__2; ++i__) { 00343 temp1 = 0.; 00344 temp2 = 0.; 00345 i__3 = *k; 00346 for (l = 1; l <= i__3; ++l) { 00347 temp1 += a_ref(l, i__) * b_ref(l, j); 00348 temp2 += b_ref(l, i__) * a_ref(l, j); 00349 /* L220: */ 00350 } 00351 if (*beta == 0.) { 00352 c___ref(i__, j) = *alpha * temp1 + *alpha * temp2; 00353 } else { 00354 c___ref(i__, j) = *beta * c___ref(i__, j) + *alpha * 00355 temp1 + *alpha * temp2; 00356 } 00357 /* L230: */ 00358 } 00359 /* L240: */ 00360 } 00361 } 00362 } 00363 return 0; 00364 /* End of DSYR2K. */ 00365 } /* dsyr2k_ */ 00366 #undef c___ref 00367 #undef b_ref 00368 #undef a_ref 00369 00370 #endif