ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_ORMQR_HEADER 00036 #define TEMPLATE_LAPACK_ORMQR_HEADER 00037 00038 00039 template<class Treal> 00040 int template_lapack_ormqr(char *side, char *trans, const integer *m, const integer *n, 00041 const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal * 00042 c__, const integer *ldc, Treal *work, const integer *lwork, integer *info) 00043 { 00044 /* -- LAPACK routine (version 3.0) -- 00045 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00046 Courant Institute, Argonne National Lab, and Rice University 00047 June 30, 1999 00048 00049 00050 Purpose 00051 ======= 00052 00053 DORMQR overwrites the general real M-by-N matrix C with 00054 00055 SIDE = 'L' SIDE = 'R' 00056 TRANS = 'N': Q * C C * Q 00057 TRANS = 'T': Q**T * C C * Q**T 00058 00059 where Q is a real orthogonal matrix defined as the product of k 00060 elementary reflectors 00061 00062 Q = H(1) H(2) . . . H(k) 00063 00064 as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N 00065 if SIDE = 'R'. 00066 00067 Arguments 00068 ========= 00069 00070 SIDE (input) CHARACTER*1 00071 = 'L': apply Q or Q**T from the Left; 00072 = 'R': apply Q or Q**T from the Right. 00073 00074 TRANS (input) CHARACTER*1 00075 = 'N': No transpose, apply Q; 00076 = 'T': Transpose, apply Q**T. 00077 00078 M (input) INTEGER 00079 The number of rows of the matrix C. M >= 0. 00080 00081 N (input) INTEGER 00082 The number of columns of the matrix C. N >= 0. 00083 00084 K (input) INTEGER 00085 The number of elementary reflectors whose product defines 00086 the matrix Q. 00087 If SIDE = 'L', M >= K >= 0; 00088 if SIDE = 'R', N >= K >= 0. 00089 00090 A (input) DOUBLE PRECISION array, dimension (LDA,K) 00091 The i-th column must contain the vector which defines the 00092 elementary reflector H(i), for i = 1,2,...,k, as returned by 00093 DGEQRF in the first k columns of its array argument A. 00094 A is modified by the routine but restored on exit. 00095 00096 LDA (input) INTEGER 00097 The leading dimension of the array A. 00098 If SIDE = 'L', LDA >= max(1,M); 00099 if SIDE = 'R', LDA >= max(1,N). 00100 00101 TAU (input) DOUBLE PRECISION array, dimension (K) 00102 TAU(i) must contain the scalar factor of the elementary 00103 reflector H(i), as returned by DGEQRF. 00104 00105 C (input/output) DOUBLE PRECISION array, dimension (LDC,N) 00106 On entry, the M-by-N matrix C. 00107 On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. 00108 00109 LDC (input) INTEGER 00110 The leading dimension of the array C. LDC >= max(1,M). 00111 00112 WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) 00113 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 00114 00115 LWORK (input) INTEGER 00116 The dimension of the array WORK. 00117 If SIDE = 'L', LWORK >= max(1,N); 00118 if SIDE = 'R', LWORK >= max(1,M). 00119 For optimum performance LWORK >= N*NB if SIDE = 'L', and 00120 LWORK >= M*NB if SIDE = 'R', where NB is the optimal 00121 blocksize. 00122 00123 If LWORK = -1, then a workspace query is assumed; the routine 00124 only calculates the optimal size of the WORK array, returns 00125 this value as the first entry of the WORK array, and no error 00126 message related to LWORK is issued by XERBLA. 00127 00128 INFO (output) INTEGER 00129 = 0: successful exit 00130 < 0: if INFO = -i, the i-th argument had an illegal value 00131 00132 ===================================================================== 00133 00134 00135 Test the input arguments 00136 00137 Parameter adjustments */ 00138 /* Table of constant values */ 00139 integer c__1 = 1; 00140 integer c_n1 = -1; 00141 integer c__2 = 2; 00142 integer c__65 = 65; 00143 00144 /* System generated locals */ 00145 address a__1[2]; 00146 integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, 00147 i__5; 00148 char ch__1[2]; 00149 /* Local variables */ 00150 logical left; 00151 integer i__; 00152 Treal t[4160] /* was [65][64] */; 00153 integer nbmin, iinfo, i1, i2, i3; 00154 integer ib, ic, jc, nb, mi, ni; 00155 integer nq, nw; 00156 logical notran; 00157 integer ldwork, lwkopt; 00158 logical lquery; 00159 integer iws; 00160 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00161 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] 00162 00163 00164 a_dim1 = *lda; 00165 a_offset = 1 + a_dim1 * 1; 00166 a -= a_offset; 00167 --tau; 00168 c_dim1 = *ldc; 00169 c_offset = 1 + c_dim1 * 1; 00170 c__ -= c_offset; 00171 --work; 00172 00173 /* Initialization added by Elias to get rid of compiler warnings. */ 00174 lwkopt = 0; 00175 nb = 0; 00176 /* Function Body */ 00177 *info = 0; 00178 left = template_blas_lsame(side, "L"); 00179 notran = template_blas_lsame(trans, "N"); 00180 lquery = *lwork == -1; 00181 00182 /* NQ is the order of Q and NW is the minimum dimension of WORK */ 00183 00184 if (left) { 00185 nq = *m; 00186 nw = *n; 00187 } else { 00188 nq = *n; 00189 nw = *m; 00190 } 00191 if (! left && ! template_blas_lsame(side, "R")) { 00192 *info = -1; 00193 } else if (! notran && ! template_blas_lsame(trans, "T")) { 00194 *info = -2; 00195 } else if (*m < 0) { 00196 *info = -3; 00197 } else if (*n < 0) { 00198 *info = -4; 00199 } else if (*k < 0 || *k > nq) { 00200 *info = -5; 00201 } else if (*lda < maxMACRO(1,nq)) { 00202 *info = -7; 00203 } else if (*ldc < maxMACRO(1,*m)) { 00204 *info = -10; 00205 } else if (*lwork < maxMACRO(1,nw) && ! lquery) { 00206 *info = -12; 00207 } 00208 00209 if (*info == 0) { 00210 00211 /* Determine the block size. NB may be at most NBMAX, where NBMAX 00212 is used to define the local array T. 00213 00214 Computing MIN 00215 Writing concatenation */ 00216 i__3[0] = 1, a__1[0] = side; 00217 i__3[1] = 1, a__1[1] = trans; 00218 template_blas_s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); 00219 i__1 = 64, i__2 = template_lapack_ilaenv(&c__1, "DORMQR", ch__1, m, n, k, &c_n1, ( 00220 ftnlen)6, (ftnlen)2); 00221 nb = minMACRO(i__1,i__2); 00222 lwkopt = maxMACRO(1,nw) * nb; 00223 work[1] = (Treal) lwkopt; 00224 } 00225 00226 if (*info != 0) { 00227 i__1 = -(*info); 00228 template_blas_erbla("ORMQR ", &i__1); 00229 return 0; 00230 } else if (lquery) { 00231 return 0; 00232 } 00233 00234 /* Quick return if possible */ 00235 00236 if (*m == 0 || *n == 0 || *k == 0) { 00237 work[1] = 1.; 00238 return 0; 00239 } 00240 00241 nbmin = 2; 00242 ldwork = nw; 00243 if (nb > 1 && nb < *k) { 00244 iws = nw * nb; 00245 if (*lwork < iws) { 00246 nb = *lwork / ldwork; 00247 /* Computing MAX 00248 Writing concatenation */ 00249 i__3[0] = 1, a__1[0] = side; 00250 i__3[1] = 1, a__1[1] = trans; 00251 template_blas_s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); 00252 i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORMQR", ch__1, m, n, k, &c_n1, ( 00253 ftnlen)6, (ftnlen)2); 00254 nbmin = maxMACRO(i__1,i__2); 00255 } 00256 } else { 00257 iws = nw; 00258 } 00259 00260 if (nb < nbmin || nb >= *k) { 00261 00262 /* Use unblocked code */ 00263 00264 template_lapack_orm2r(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ 00265 c_offset], ldc, &work[1], &iinfo); 00266 } else { 00267 00268 /* Use blocked code */ 00269 00270 if ( ( left && ! notran ) || ( ! left && notran ) ) { 00271 i1 = 1; 00272 i2 = *k; 00273 i3 = nb; 00274 } else { 00275 i1 = (*k - 1) / nb * nb + 1; 00276 i2 = 1; 00277 i3 = -nb; 00278 } 00279 00280 if (left) { 00281 ni = *n; 00282 jc = 1; 00283 } else { 00284 mi = *m; 00285 ic = 1; 00286 } 00287 00288 i__1 = i2; 00289 i__2 = i3; 00290 for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { 00291 /* Computing MIN */ 00292 i__4 = nb, i__5 = *k - i__ + 1; 00293 ib = minMACRO(i__4,i__5); 00294 00295 /* Form the triangular factor of the block reflector 00296 H = H(i) H(i+1) . . . H(i+ib-1) */ 00297 00298 i__4 = nq - i__ + 1; 00299 template_lapack_larft("Forward", "Columnwise", &i__4, &ib, &a_ref(i__, i__), 00300 lda, &tau[i__], t, &c__65); 00301 if (left) { 00302 00303 /* H or H' is applied to C(i:m,1:n) */ 00304 00305 mi = *m - i__ + 1; 00306 ic = i__; 00307 } else { 00308 00309 /* H or H' is applied to C(1:m,i:n) */ 00310 00311 ni = *n - i__ + 1; 00312 jc = i__; 00313 } 00314 00315 /* Apply H or H' */ 00316 00317 template_lapack_larfb(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, & 00318 a_ref(i__, i__), lda, t, &c__65, &c___ref(ic, jc), ldc, & 00319 work[1], &ldwork); 00320 /* L10: */ 00321 } 00322 } 00323 work[1] = (Treal) lwkopt; 00324 return 0; 00325 00326 /* End of DORMQR */ 00327 00328 } /* dormqr_ */ 00329 00330 #undef c___ref 00331 #undef a_ref 00332 00333 00334 #endif