ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_BLAS_TRMM_HEADER 00036 #define TEMPLATE_BLAS_TRMM_HEADER 00037 00038 #include "template_blas_common.h" 00039 00040 template<class Treal> 00041 int template_blas_trmm(const char *side, const char *uplo, const char *transa, const char *diag, 00042 const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer * 00043 lda, Treal *b, const integer *ldb) 00044 { 00045 /* System generated locals */ 00046 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; 00047 /* Local variables */ 00048 integer info; 00049 Treal temp; 00050 integer i__, j, k; 00051 logical lside; 00052 integer nrowa; 00053 logical upper; 00054 logical nounit; 00055 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00056 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] 00057 /* Purpose 00058 ======= 00059 DTRMM performs one of the matrix-matrix operations 00060 B := alpha*op( A )*B, or B := alpha*B*op( A ), 00061 where alpha is a scalar, B is an m by n matrix, A is a unit, or 00062 non-unit, upper or lower triangular matrix and op( A ) is one of 00063 op( A ) = A or op( A ) = A'. 00064 Parameters 00065 ========== 00066 SIDE - CHARACTER*1. 00067 On entry, SIDE specifies whether op( A ) multiplies B from 00068 the left or right as follows: 00069 SIDE = 'L' or 'l' B := alpha*op( A )*B. 00070 SIDE = 'R' or 'r' B := alpha*B*op( A ). 00071 Unchanged on exit. 00072 UPLO - CHARACTER*1. 00073 On entry, UPLO specifies whether the matrix A is an upper or 00074 lower triangular matrix as follows: 00075 UPLO = 'U' or 'u' A is an upper triangular matrix. 00076 UPLO = 'L' or 'l' A is a lower triangular matrix. 00077 Unchanged on exit. 00078 TRANSA - CHARACTER*1. 00079 On entry, TRANSA specifies the form of op( A ) to be used in 00080 the matrix multiplication as follows: 00081 TRANSA = 'N' or 'n' op( A ) = A. 00082 TRANSA = 'T' or 't' op( A ) = A'. 00083 TRANSA = 'C' or 'c' op( A ) = A'. 00084 Unchanged on exit. 00085 DIAG - CHARACTER*1. 00086 On entry, DIAG specifies whether or not A is unit triangular 00087 as follows: 00088 DIAG = 'U' or 'u' A is assumed to be unit triangular. 00089 DIAG = 'N' or 'n' A is not assumed to be unit 00090 triangular. 00091 Unchanged on exit. 00092 M - INTEGER. 00093 On entry, M specifies the number of rows of B. M must be at 00094 least zero. 00095 Unchanged on exit. 00096 N - INTEGER. 00097 On entry, N specifies the number of columns of B. N must be 00098 at least zero. 00099 Unchanged on exit. 00100 ALPHA - DOUBLE PRECISION. 00101 On entry, ALPHA specifies the scalar alpha. When alpha is 00102 zero then A is not referenced and B need not be set before 00103 entry. 00104 Unchanged on exit. 00105 A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m 00106 when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. 00107 Before entry with UPLO = 'U' or 'u', the leading k by k 00108 upper triangular part of the array A must contain the upper 00109 triangular matrix and the strictly lower triangular part of 00110 A is not referenced. 00111 Before entry with UPLO = 'L' or 'l', the leading k by k 00112 lower triangular part of the array A must contain the lower 00113 triangular matrix and the strictly upper triangular part of 00114 A is not referenced. 00115 Note that when DIAG = 'U' or 'u', the diagonal elements of 00116 A are not referenced either, but are assumed to be unity. 00117 Unchanged on exit. 00118 LDA - INTEGER. 00119 On entry, LDA specifies the first dimension of A as declared 00120 in the calling (sub) program. When SIDE = 'L' or 'l' then 00121 LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' 00122 then LDA must be at least max( 1, n ). 00123 Unchanged on exit. 00124 B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). 00125 Before entry, the leading m by n part of the array B must 00126 contain the matrix B, and on exit is overwritten by the 00127 transformed matrix. 00128 LDB - INTEGER. 00129 On entry, LDB specifies the first dimension of B as declared 00130 in the calling (sub) program. LDB must be at least 00131 max( 1, m ). 00132 Unchanged on exit. 00133 Level 3 Blas routine. 00134 -- Written on 8-February-1989. 00135 Jack Dongarra, Argonne National Laboratory. 00136 Iain Duff, AERE Harwell. 00137 Jeremy Du Croz, Numerical Algorithms Group Ltd. 00138 Sven Hammarling, Numerical Algorithms Group Ltd. 00139 Test the input parameters. 00140 Parameter adjustments */ 00141 a_dim1 = *lda; 00142 a_offset = 1 + a_dim1 * 1; 00143 a -= a_offset; 00144 b_dim1 = *ldb; 00145 b_offset = 1 + b_dim1 * 1; 00146 b -= b_offset; 00147 /* Function Body */ 00148 lside = template_blas_lsame(side, "L"); 00149 if (lside) { 00150 nrowa = *m; 00151 } else { 00152 nrowa = *n; 00153 } 00154 nounit = template_blas_lsame(diag, "N"); 00155 upper = template_blas_lsame(uplo, "U"); 00156 info = 0; 00157 if (! lside && ! template_blas_lsame(side, "R")) { 00158 info = 1; 00159 } else if (! upper && ! template_blas_lsame(uplo, "L")) { 00160 info = 2; 00161 } else if (! template_blas_lsame(transa, "N") && ! template_blas_lsame(transa, 00162 "T") && ! template_blas_lsame(transa, "C")) { 00163 info = 3; 00164 } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag, 00165 "N")) { 00166 info = 4; 00167 } else if (*m < 0) { 00168 info = 5; 00169 } else if (*n < 0) { 00170 info = 6; 00171 } else if (*lda < maxMACRO(1,nrowa)) { 00172 info = 9; 00173 } else if (*ldb < maxMACRO(1,*m)) { 00174 info = 11; 00175 } 00176 if (info != 0) { 00177 template_blas_erbla("TRMM ", &info); 00178 return 0; 00179 } 00180 /* Quick return if possible. */ 00181 if (*n == 0) { 00182 return 0; 00183 } 00184 /* And when alpha.eq.zero. */ 00185 if (*alpha == 0.) { 00186 i__1 = *n; 00187 for (j = 1; j <= i__1; ++j) { 00188 i__2 = *m; 00189 for (i__ = 1; i__ <= i__2; ++i__) { 00190 b_ref(i__, j) = 0.; 00191 /* L10: */ 00192 } 00193 /* L20: */ 00194 } 00195 return 0; 00196 } 00197 /* Start the operations. */ 00198 if (lside) { 00199 if (template_blas_lsame(transa, "N")) { 00200 /* Form B := alpha*A*B. */ 00201 if (upper) { 00202 i__1 = *n; 00203 for (j = 1; j <= i__1; ++j) { 00204 i__2 = *m; 00205 for (k = 1; k <= i__2; ++k) { 00206 if (b_ref(k, j) != 0.) { 00207 temp = *alpha * b_ref(k, j); 00208 i__3 = k - 1; 00209 for (i__ = 1; i__ <= i__3; ++i__) { 00210 b_ref(i__, j) = b_ref(i__, j) + temp * a_ref( 00211 i__, k); 00212 /* L30: */ 00213 } 00214 if (nounit) { 00215 temp *= a_ref(k, k); 00216 } 00217 b_ref(k, j) = temp; 00218 } 00219 /* L40: */ 00220 } 00221 /* L50: */ 00222 } 00223 } else { 00224 i__1 = *n; 00225 for (j = 1; j <= i__1; ++j) { 00226 for (k = *m; k >= 1; --k) { 00227 if (b_ref(k, j) != 0.) { 00228 temp = *alpha * b_ref(k, j); 00229 b_ref(k, j) = temp; 00230 if (nounit) { 00231 b_ref(k, j) = b_ref(k, j) * a_ref(k, k); 00232 } 00233 i__2 = *m; 00234 for (i__ = k + 1; i__ <= i__2; ++i__) { 00235 b_ref(i__, j) = b_ref(i__, j) + temp * a_ref( 00236 i__, k); 00237 /* L60: */ 00238 } 00239 } 00240 /* L70: */ 00241 } 00242 /* L80: */ 00243 } 00244 } 00245 } else { 00246 /* Form B := alpha*A'*B. */ 00247 if (upper) { 00248 i__1 = *n; 00249 for (j = 1; j <= i__1; ++j) { 00250 for (i__ = *m; i__ >= 1; --i__) { 00251 temp = b_ref(i__, j); 00252 if (nounit) { 00253 temp *= a_ref(i__, i__); 00254 } 00255 i__2 = i__ - 1; 00256 for (k = 1; k <= i__2; ++k) { 00257 temp += a_ref(k, i__) * b_ref(k, j); 00258 /* L90: */ 00259 } 00260 b_ref(i__, j) = *alpha * temp; 00261 /* L100: */ 00262 } 00263 /* L110: */ 00264 } 00265 } else { 00266 i__1 = *n; 00267 for (j = 1; j <= i__1; ++j) { 00268 i__2 = *m; 00269 for (i__ = 1; i__ <= i__2; ++i__) { 00270 temp = b_ref(i__, j); 00271 if (nounit) { 00272 temp *= a_ref(i__, i__); 00273 } 00274 i__3 = *m; 00275 for (k = i__ + 1; k <= i__3; ++k) { 00276 temp += a_ref(k, i__) * b_ref(k, j); 00277 /* L120: */ 00278 } 00279 b_ref(i__, j) = *alpha * temp; 00280 /* L130: */ 00281 } 00282 /* L140: */ 00283 } 00284 } 00285 } 00286 } else { 00287 if (template_blas_lsame(transa, "N")) { 00288 /* Form B := alpha*B*A. */ 00289 if (upper) { 00290 for (j = *n; j >= 1; --j) { 00291 temp = *alpha; 00292 if (nounit) { 00293 temp *= a_ref(j, j); 00294 } 00295 i__1 = *m; 00296 for (i__ = 1; i__ <= i__1; ++i__) { 00297 b_ref(i__, j) = temp * b_ref(i__, j); 00298 /* L150: */ 00299 } 00300 i__1 = j - 1; 00301 for (k = 1; k <= i__1; ++k) { 00302 if (a_ref(k, j) != 0.) { 00303 temp = *alpha * a_ref(k, j); 00304 i__2 = *m; 00305 for (i__ = 1; i__ <= i__2; ++i__) { 00306 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( 00307 i__, k); 00308 /* L160: */ 00309 } 00310 } 00311 /* L170: */ 00312 } 00313 /* L180: */ 00314 } 00315 } else { 00316 i__1 = *n; 00317 for (j = 1; j <= i__1; ++j) { 00318 temp = *alpha; 00319 if (nounit) { 00320 temp *= a_ref(j, j); 00321 } 00322 i__2 = *m; 00323 for (i__ = 1; i__ <= i__2; ++i__) { 00324 b_ref(i__, j) = temp * b_ref(i__, j); 00325 /* L190: */ 00326 } 00327 i__2 = *n; 00328 for (k = j + 1; k <= i__2; ++k) { 00329 if (a_ref(k, j) != 0.) { 00330 temp = *alpha * a_ref(k, j); 00331 i__3 = *m; 00332 for (i__ = 1; i__ <= i__3; ++i__) { 00333 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( 00334 i__, k); 00335 /* L200: */ 00336 } 00337 } 00338 /* L210: */ 00339 } 00340 /* L220: */ 00341 } 00342 } 00343 } else { 00344 /* Form B := alpha*B*A'. */ 00345 if (upper) { 00346 i__1 = *n; 00347 for (k = 1; k <= i__1; ++k) { 00348 i__2 = k - 1; 00349 for (j = 1; j <= i__2; ++j) { 00350 if (a_ref(j, k) != 0.) { 00351 temp = *alpha * a_ref(j, k); 00352 i__3 = *m; 00353 for (i__ = 1; i__ <= i__3; ++i__) { 00354 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( 00355 i__, k); 00356 /* L230: */ 00357 } 00358 } 00359 /* L240: */ 00360 } 00361 temp = *alpha; 00362 if (nounit) { 00363 temp *= a_ref(k, k); 00364 } 00365 if (temp != 1.) { 00366 i__2 = *m; 00367 for (i__ = 1; i__ <= i__2; ++i__) { 00368 b_ref(i__, k) = temp * b_ref(i__, k); 00369 /* L250: */ 00370 } 00371 } 00372 /* L260: */ 00373 } 00374 } else { 00375 for (k = *n; k >= 1; --k) { 00376 i__1 = *n; 00377 for (j = k + 1; j <= i__1; ++j) { 00378 if (a_ref(j, k) != 0.) { 00379 temp = *alpha * a_ref(j, k); 00380 i__2 = *m; 00381 for (i__ = 1; i__ <= i__2; ++i__) { 00382 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( 00383 i__, k); 00384 /* L270: */ 00385 } 00386 } 00387 /* L280: */ 00388 } 00389 temp = *alpha; 00390 if (nounit) { 00391 temp *= a_ref(k, k); 00392 } 00393 if (temp != 1.) { 00394 i__1 = *m; 00395 for (i__ = 1; i__ <= i__1; ++i__) { 00396 b_ref(i__, k) = temp * b_ref(i__, k); 00397 /* L290: */ 00398 } 00399 } 00400 /* L300: */ 00401 } 00402 } 00403 } 00404 } 00405 return 0; 00406 /* End of DTRMM . */ 00407 } /* dtrmm_ */ 00408 #undef b_ref 00409 #undef a_ref 00410 00411 #endif