ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_ORGQL_HEADER 00036 #define TEMPLATE_LAPACK_ORGQL_HEADER 00037 00038 00039 template<class Treal> 00040 int template_lapack_orgql(const integer *m, const integer *n, const integer *k, Treal * 00041 a, const integer *lda, const Treal *tau, Treal *work, const integer *lwork, 00042 integer *info) 00043 { 00044 /* -- LAPACK routine (version 3.0) -- 00045 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00046 Courant Institute, Argonne National Lab, and Rice University 00047 June 30, 1999 00048 00049 00050 Purpose 00051 ======= 00052 00053 DORGQL generates an M-by-N real matrix Q with orthonormal columns, 00054 which is defined as the last N columns of a product of K elementary 00055 reflectors of order M 00056 00057 Q = H(k) . . . H(2) H(1) 00058 00059 as returned by DGEQLF. 00060 00061 Arguments 00062 ========= 00063 00064 M (input) INTEGER 00065 The number of rows of the matrix Q. M >= 0. 00066 00067 N (input) INTEGER 00068 The number of columns of the matrix Q. M >= N >= 0. 00069 00070 K (input) INTEGER 00071 The number of elementary reflectors whose product defines the 00072 matrix Q. N >= K >= 0. 00073 00074 A (input/output) DOUBLE PRECISION array, dimension (LDA,N) 00075 On entry, the (n-k+i)-th column must contain the vector which 00076 defines the elementary reflector H(i), for i = 1,2,...,k, as 00077 returned by DGEQLF in the last k columns of its array 00078 argument A. 00079 On exit, the M-by-N matrix Q. 00080 00081 LDA (input) INTEGER 00082 The first dimension of the array A. LDA >= max(1,M). 00083 00084 TAU (input) DOUBLE PRECISION array, dimension (K) 00085 TAU(i) must contain the scalar factor of the elementary 00086 reflector H(i), as returned by DGEQLF. 00087 00088 WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) 00089 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 00090 00091 LWORK (input) INTEGER 00092 The dimension of the array WORK. LWORK >= max(1,N). 00093 For optimum performance LWORK >= N*NB, where NB is the 00094 optimal blocksize. 00095 00096 If LWORK = -1, then a workspace query is assumed; the routine 00097 only calculates the optimal size of the WORK array, returns 00098 this value as the first entry of the WORK array, and no error 00099 message related to LWORK is issued by XERBLA. 00100 00101 INFO (output) INTEGER 00102 = 0: successful exit 00103 < 0: if INFO = -i, the i-th argument has an illegal value 00104 00105 ===================================================================== 00106 00107 00108 Test the input arguments 00109 00110 Parameter adjustments */ 00111 /* Table of constant values */ 00112 integer c__1 = 1; 00113 integer c_n1 = -1; 00114 integer c__3 = 3; 00115 integer c__2 = 2; 00116 00117 /* System generated locals */ 00118 integer a_dim1, a_offset, i__1, i__2, i__3, i__4; 00119 /* Local variables */ 00120 integer i__, j, l, nbmin, iinfo; 00121 integer ib, nb, kk; 00122 integer nx; 00123 integer ldwork, lwkopt; 00124 logical lquery; 00125 integer iws; 00126 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00127 00128 00129 a_dim1 = *lda; 00130 a_offset = 1 + a_dim1 * 1; 00131 a -= a_offset; 00132 --tau; 00133 --work; 00134 00135 /* Function Body */ 00136 *info = 0; 00137 nb = template_lapack_ilaenv(&c__1, "DORGQL", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); 00138 lwkopt = maxMACRO(1,*n) * nb; 00139 work[1] = (Treal) lwkopt; 00140 lquery = *lwork == -1; 00141 if (*m < 0) { 00142 *info = -1; 00143 } else if (*n < 0 || *n > *m) { 00144 *info = -2; 00145 } else if (*k < 0 || *k > *n) { 00146 *info = -3; 00147 } else if (*lda < maxMACRO(1,*m)) { 00148 *info = -5; 00149 } else if (*lwork < maxMACRO(1,*n) && ! lquery) { 00150 *info = -8; 00151 } 00152 if (*info != 0) { 00153 i__1 = -(*info); 00154 template_blas_erbla("ORGQL ", &i__1); 00155 return 0; 00156 } else if (lquery) { 00157 return 0; 00158 } 00159 00160 /* Quick return if possible */ 00161 00162 if (*n <= 0) { 00163 work[1] = 1.; 00164 return 0; 00165 } 00166 00167 nbmin = 2; 00168 nx = 0; 00169 iws = *n; 00170 if (nb > 1 && nb < *k) { 00171 00172 /* Determine when to cross over from blocked to unblocked code. 00173 00174 Computing MAX */ 00175 i__1 = 0, i__2 = template_lapack_ilaenv(&c__3, "DORGQL", " ", m, n, k, &c_n1, ( 00176 ftnlen)6, (ftnlen)1); 00177 nx = maxMACRO(i__1,i__2); 00178 if (nx < *k) { 00179 00180 /* Determine if workspace is large enough for blocked code. */ 00181 00182 ldwork = *n; 00183 iws = ldwork * nb; 00184 if (*lwork < iws) { 00185 00186 /* Not enough workspace to use optimal NB: reduce NB and 00187 determine the minimum value of NB. */ 00188 00189 nb = *lwork / ldwork; 00190 /* Computing MAX */ 00191 i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORGQL", " ", m, n, k, &c_n1, 00192 (ftnlen)6, (ftnlen)1); 00193 nbmin = maxMACRO(i__1,i__2); 00194 } 00195 } 00196 } 00197 00198 if (nb >= nbmin && nb < *k && nx < *k) { 00199 00200 /* Use blocked code after the first block. 00201 The last kk columns are handled by the block method. 00202 00203 Computing MIN */ 00204 i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb; 00205 kk = minMACRO(i__1,i__2); 00206 00207 /* Set A(m-kk+1:m,1:n-kk) to zero. */ 00208 00209 i__1 = *n - kk; 00210 for (j = 1; j <= i__1; ++j) { 00211 i__2 = *m; 00212 for (i__ = *m - kk + 1; i__ <= i__2; ++i__) { 00213 a_ref(i__, j) = 0.; 00214 /* L10: */ 00215 } 00216 /* L20: */ 00217 } 00218 } else { 00219 kk = 0; 00220 } 00221 00222 /* Use unblocked code for the first or only block. */ 00223 00224 i__1 = *m - kk; 00225 i__2 = *n - kk; 00226 i__3 = *k - kk; 00227 template_lapack_org2l(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], &iinfo) 00228 ; 00229 00230 if (kk > 0) { 00231 00232 /* Use blocked code */ 00233 00234 i__1 = *k; 00235 i__2 = nb; 00236 for (i__ = *k - kk + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 00237 i__2) { 00238 /* Computing MIN */ 00239 i__3 = nb, i__4 = *k - i__ + 1; 00240 ib = minMACRO(i__3,i__4); 00241 if (*n - *k + i__ > 1) { 00242 00243 /* Form the triangular factor of the block reflector 00244 H = H(i+ib-1) . . . H(i+1) H(i) */ 00245 00246 i__3 = *m - *k + i__ + ib - 1; 00247 template_lapack_larft("Backward", "Columnwise", &i__3, &ib, &a_ref(1, *n - * 00248 k + i__), lda, &tau[i__], &work[1], &ldwork); 00249 00250 /* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */ 00251 00252 i__3 = *m - *k + i__ + ib - 1; 00253 i__4 = *n - *k + i__ - 1; 00254 template_lapack_larfb("Left", "No transpose", "Backward", "Columnwise", & 00255 i__3, &i__4, &ib, &a_ref(1, *n - *k + i__), lda, & 00256 work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], & 00257 ldwork); 00258 } 00259 00260 /* Apply H to rows 1:m-k+i+ib-1 of current block */ 00261 00262 i__3 = *m - *k + i__ + ib - 1; 00263 template_lapack_org2l(&i__3, &ib, &ib, &a_ref(1, *n - *k + i__), lda, &tau[i__], 00264 &work[1], &iinfo); 00265 00266 /* Set rows m-k+i+ib:m of current block to zero */ 00267 00268 i__3 = *n - *k + i__ + ib - 1; 00269 for (j = *n - *k + i__; j <= i__3; ++j) { 00270 i__4 = *m; 00271 for (l = *m - *k + i__ + ib; l <= i__4; ++l) { 00272 a_ref(l, j) = 0.; 00273 /* L30: */ 00274 } 00275 /* L40: */ 00276 } 00277 /* L50: */ 00278 } 00279 } 00280 00281 work[1] = (Treal) iws; 00282 return 0; 00283 00284 /* End of DORGQL */ 00285 00286 } /* dorgql_ */ 00287 00288 #undef a_ref 00289 00290 00291 #endif