ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_BLAS_GEMM_HEADER 00036 #define TEMPLATE_BLAS_GEMM_HEADER 00037 00038 #include "template_blas_common.h" 00039 00040 template<class Treal> 00041 int template_blas_gemm(const char *transa, const char *transb, const integer *m, const integer * 00042 n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda, 00043 const Treal *b, const integer *ldb, const Treal *beta, Treal *c__, 00044 const integer *ldc) 00045 { 00046 /* System generated locals */ 00047 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 00048 i__3; 00049 /* Local variables */ 00050 integer info; 00051 logical nota, notb; 00052 Treal temp; 00053 integer i__, j, l; 00054 integer nrowa, nrowb; 00055 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00056 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] 00057 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] 00058 /* Purpose 00059 ======= 00060 DGEMM performs one of the matrix-matrix operations 00061 C := alpha*op( A )*op( B ) + beta*C, 00062 where op( X ) is one of 00063 op( X ) = X or op( X ) = X', 00064 alpha and beta are scalars, and A, B and C are matrices, with op( A ) 00065 an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. 00066 Parameters 00067 ========== 00068 TRANSA - CHARACTER*1. 00069 On entry, TRANSA specifies the form of op( A ) to be used in 00070 the matrix multiplication as follows: 00071 TRANSA = 'N' or 'n', op( A ) = A. 00072 TRANSA = 'T' or 't', op( A ) = A'. 00073 TRANSA = 'C' or 'c', op( A ) = A'. 00074 Unchanged on exit. 00075 TRANSB - CHARACTER*1. 00076 On entry, TRANSB specifies the form of op( B ) to be used in 00077 the matrix multiplication as follows: 00078 TRANSB = 'N' or 'n', op( B ) = B. 00079 TRANSB = 'T' or 't', op( B ) = B'. 00080 TRANSB = 'C' or 'c', op( B ) = B'. 00081 Unchanged on exit. 00082 M - INTEGER. 00083 On entry, M specifies the number of rows of the matrix 00084 op( A ) and of the matrix C. M must be at least zero. 00085 Unchanged on exit. 00086 N - INTEGER. 00087 On entry, N specifies the number of columns of the matrix 00088 op( B ) and the number of columns of the matrix C. N must be 00089 at least zero. 00090 Unchanged on exit. 00091 K - INTEGER. 00092 On entry, K specifies the number of columns of the matrix 00093 op( A ) and the number of rows of the matrix op( B ). K must 00094 be at least zero. 00095 Unchanged on exit. 00096 ALPHA - DOUBLE PRECISION. 00097 On entry, ALPHA specifies the scalar alpha. 00098 Unchanged on exit. 00099 A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is 00100 k when TRANSA = 'N' or 'n', and is m otherwise. 00101 Before entry with TRANSA = 'N' or 'n', the leading m by k 00102 part of the array A must contain the matrix A, otherwise 00103 the leading k by m part of the array A must contain the 00104 matrix A. 00105 Unchanged on exit. 00106 LDA - INTEGER. 00107 On entry, LDA specifies the first dimension of A as declared 00108 in the calling (sub) program. When TRANSA = 'N' or 'n' then 00109 LDA must be at least max( 1, m ), otherwise LDA must be at 00110 least max( 1, k ). 00111 Unchanged on exit. 00112 B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is 00113 n when TRANSB = 'N' or 'n', and is k otherwise. 00114 Before entry with TRANSB = 'N' or 'n', the leading k by n 00115 part of the array B must contain the matrix B, otherwise 00116 the leading n by k part of the array B must contain the 00117 matrix B. 00118 Unchanged on exit. 00119 LDB - INTEGER. 00120 On entry, LDB specifies the first dimension of B as declared 00121 in the calling (sub) program. When TRANSB = 'N' or 'n' then 00122 LDB must be at least max( 1, k ), otherwise LDB must be at 00123 least max( 1, n ). 00124 Unchanged on exit. 00125 BETA - DOUBLE PRECISION. 00126 On entry, BETA specifies the scalar beta. When BETA is 00127 supplied as zero then C need not be set on input. 00128 Unchanged on exit. 00129 C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). 00130 Before entry, the leading m by n part of the array C must 00131 contain the matrix C, except when beta is zero, in which 00132 case C need not be set on entry. 00133 On exit, the array C is overwritten by the m by n matrix 00134 ( alpha*op( A )*op( B ) + beta*C ). 00135 LDC - INTEGER. 00136 On entry, LDC specifies the first dimension of C as declared 00137 in the calling (sub) program. LDC must be at least 00138 max( 1, m ). 00139 Unchanged on exit. 00140 Level 3 Blas routine. 00141 -- Written on 8-February-1989. 00142 Jack Dongarra, Argonne National Laboratory. 00143 Iain Duff, AERE Harwell. 00144 Jeremy Du Croz, Numerical Algorithms Group Ltd. 00145 Sven Hammarling, Numerical Algorithms Group Ltd. 00146 Set NOTA and NOTB as true if A and B respectively are not 00147 transposed and set NROWA, NCOLA and NROWB as the number of rows 00148 and columns of A and the number of rows of B respectively. 00149 Parameter adjustments */ 00150 a_dim1 = *lda; 00151 a_offset = 1 + a_dim1 * 1; 00152 a -= a_offset; 00153 b_dim1 = *ldb; 00154 b_offset = 1 + b_dim1 * 1; 00155 b -= b_offset; 00156 c_dim1 = *ldc; 00157 c_offset = 1 + c_dim1 * 1; 00158 c__ -= c_offset; 00159 /* Function Body */ 00160 nota = template_blas_lsame(transa, "N"); 00161 notb = template_blas_lsame(transb, "N"); 00162 if (nota) { 00163 nrowa = *m; 00164 } else { 00165 nrowa = *k; 00166 } 00167 if (notb) { 00168 nrowb = *k; 00169 } else { 00170 nrowb = *n; 00171 } 00172 /* Test the input parameters. */ 00173 info = 0; 00174 if (! nota && ! template_blas_lsame(transa, "C") && ! template_blas_lsame( 00175 transa, "T")) { 00176 info = 1; 00177 } else if (! notb && ! template_blas_lsame(transb, "C") && ! 00178 template_blas_lsame(transb, "T")) { 00179 info = 2; 00180 } else if (*m < 0) { 00181 info = 3; 00182 } else if (*n < 0) { 00183 info = 4; 00184 } else if (*k < 0) { 00185 info = 5; 00186 } else if (*lda < maxMACRO(1,nrowa)) { 00187 info = 8; 00188 } else if (*ldb < maxMACRO(1,nrowb)) { 00189 info = 10; 00190 } else if (*ldc < maxMACRO(1,*m)) { 00191 info = 13; 00192 } 00193 if (info != 0) { 00194 template_blas_erbla("DGEMM ", &info); 00195 return 0; 00196 } 00197 /* Quick return if possible. */ 00198 if (*m == 0 || *n == 0 || ( (*alpha == 0. || *k == 0) && *beta == 1.) ) { 00199 return 0; 00200 } 00201 /* And if alpha.eq.zero. */ 00202 if (*alpha == 0.) { 00203 if (*beta == 0.) { 00204 i__1 = *n; 00205 for (j = 1; j <= i__1; ++j) { 00206 i__2 = *m; 00207 for (i__ = 1; i__ <= i__2; ++i__) { 00208 c___ref(i__, j) = 0.; 00209 /* L10: */ 00210 } 00211 /* L20: */ 00212 } 00213 } else { 00214 i__1 = *n; 00215 for (j = 1; j <= i__1; ++j) { 00216 i__2 = *m; 00217 for (i__ = 1; i__ <= i__2; ++i__) { 00218 c___ref(i__, j) = *beta * c___ref(i__, j); 00219 /* L30: */ 00220 } 00221 /* L40: */ 00222 } 00223 } 00224 return 0; 00225 } 00226 /* Start the operations. */ 00227 if (notb) { 00228 if (nota) { 00229 /* Form C := alpha*A*B + beta*C. */ 00230 i__1 = *n; 00231 for (j = 1; j <= i__1; ++j) { 00232 if (*beta == 0.) { 00233 i__2 = *m; 00234 for (i__ = 1; i__ <= i__2; ++i__) { 00235 c___ref(i__, j) = 0.; 00236 /* L50: */ 00237 } 00238 } else if (*beta != 1.) { 00239 i__2 = *m; 00240 for (i__ = 1; i__ <= i__2; ++i__) { 00241 c___ref(i__, j) = *beta * c___ref(i__, j); 00242 /* L60: */ 00243 } 00244 } 00245 i__2 = *k; 00246 for (l = 1; l <= i__2; ++l) { 00247 if (b_ref(l, j) != 0.) { 00248 temp = *alpha * b_ref(l, j); 00249 i__3 = *m; 00250 for (i__ = 1; i__ <= i__3; ++i__) { 00251 c___ref(i__, j) = c___ref(i__, j) + temp * a_ref( 00252 i__, l); 00253 /* L70: */ 00254 } 00255 } 00256 /* L80: */ 00257 } 00258 /* L90: */ 00259 } 00260 } else { 00261 /* Form C := alpha*A'*B + beta*C */ 00262 i__1 = *n; 00263 for (j = 1; j <= i__1; ++j) { 00264 i__2 = *m; 00265 for (i__ = 1; i__ <= i__2; ++i__) { 00266 temp = 0.; 00267 i__3 = *k; 00268 for (l = 1; l <= i__3; ++l) { 00269 temp += a_ref(l, i__) * b_ref(l, j); 00270 /* L100: */ 00271 } 00272 if (*beta == 0.) { 00273 c___ref(i__, j) = *alpha * temp; 00274 } else { 00275 c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__, 00276 j); 00277 } 00278 /* L110: */ 00279 } 00280 /* L120: */ 00281 } 00282 } 00283 } else { 00284 if (nota) { 00285 /* Form C := alpha*A*B' + beta*C */ 00286 i__1 = *n; 00287 for (j = 1; j <= i__1; ++j) { 00288 if (*beta == 0.) { 00289 i__2 = *m; 00290 for (i__ = 1; i__ <= i__2; ++i__) { 00291 c___ref(i__, j) = 0.; 00292 /* L130: */ 00293 } 00294 } else if (*beta != 1.) { 00295 i__2 = *m; 00296 for (i__ = 1; i__ <= i__2; ++i__) { 00297 c___ref(i__, j) = *beta * c___ref(i__, j); 00298 /* L140: */ 00299 } 00300 } 00301 i__2 = *k; 00302 for (l = 1; l <= i__2; ++l) { 00303 if (b_ref(j, l) != 0.) { 00304 temp = *alpha * b_ref(j, l); 00305 i__3 = *m; 00306 for (i__ = 1; i__ <= i__3; ++i__) { 00307 c___ref(i__, j) = c___ref(i__, j) + temp * a_ref( 00308 i__, l); 00309 /* L150: */ 00310 } 00311 } 00312 /* L160: */ 00313 } 00314 /* L170: */ 00315 } 00316 } else { 00317 /* Form C := alpha*A'*B' + beta*C */ 00318 i__1 = *n; 00319 for (j = 1; j <= i__1; ++j) { 00320 i__2 = *m; 00321 for (i__ = 1; i__ <= i__2; ++i__) { 00322 temp = 0.; 00323 i__3 = *k; 00324 for (l = 1; l <= i__3; ++l) { 00325 temp += a_ref(l, i__) * b_ref(j, l); 00326 /* L180: */ 00327 } 00328 if (*beta == 0.) { 00329 c___ref(i__, j) = *alpha * temp; 00330 } else { 00331 c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__, 00332 j); 00333 } 00334 /* L190: */ 00335 } 00336 /* L200: */ 00337 } 00338 } 00339 } 00340 return 0; 00341 /* End of DGEMM . */ 00342 } /* dgemm_ */ 00343 #undef c___ref 00344 #undef b_ref 00345 #undef a_ref 00346 00347 #endif