ergo
template_lapack_geqrf.h
Go to the documentation of this file.
1 /* Ergo, version 3.3, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2013 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_GEQRF_HEADER
36 #define TEMPLATE_LAPACK_GEQRF_HEADER
37 
38 
39 template<class Treal>
40 int template_lapack_geqrf(const integer *m, const integer *n, Treal *a, const integer *
41  lda, Treal *tau, Treal *work, const integer *lwork, integer *info)
42 {
43 /* -- LAPACK routine (version 3.0) --
44  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
45  Courant Institute, Argonne National Lab, and Rice University
46  June 30, 1999
47 
48 
49  Purpose
50  =======
51 
52  DGEQRF computes a QR factorization of a real M-by-N matrix A:
53  A = Q * R.
54 
55  Arguments
56  =========
57 
58  M (input) INTEGER
59  The number of rows of the matrix A. M >= 0.
60 
61  N (input) INTEGER
62  The number of columns of the matrix A. N >= 0.
63 
64  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
65  On entry, the M-by-N matrix A.
66  On exit, the elements on and above the diagonal of the array
67  contain the min(M,N)-by-N upper trapezoidal matrix R (R is
68  upper triangular if m >= n); the elements below the diagonal,
69  with the array TAU, represent the orthogonal matrix Q as a
70  product of min(m,n) elementary reflectors (see Further
71  Details).
72 
73  LDA (input) INTEGER
74  The leading dimension of the array A. LDA >= max(1,M).
75 
76  TAU (output) DOUBLE PRECISION array, dimension (min(M,N))
77  The scalar factors of the elementary reflectors (see Further
78  Details).
79 
80  WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
81  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
82 
83  LWORK (input) INTEGER
84  The dimension of the array WORK. LWORK >= max(1,N).
85  For optimum performance LWORK >= N*NB, where NB is
86  the optimal blocksize.
87 
88  If LWORK = -1, then a workspace query is assumed; the routine
89  only calculates the optimal size of the WORK array, returns
90  this value as the first entry of the WORK array, and no error
91  message related to LWORK is issued by XERBLA.
92 
93  INFO (output) INTEGER
94  = 0: successful exit
95  < 0: if INFO = -i, the i-th argument had an illegal value
96 
97  Further Details
98  ===============
99 
100  The matrix Q is represented as a product of elementary reflectors
101 
102  Q = H(1) H(2) . . . H(k), where k = min(m,n).
103 
104  Each H(i) has the form
105 
106  H(i) = I - tau * v * v'
107 
108  where tau is a real scalar, and v is a real vector with
109  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
110  and tau in TAU(i).
111 
112  =====================================================================
113 
114 
115  Test the input arguments
116 
117  Parameter adjustments */
118  /* Table of constant values */
119  integer c__1 = 1;
120  integer c_n1 = -1;
121  integer c__3 = 3;
122  integer c__2 = 2;
123 
124  /* System generated locals */
125  integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
126  /* Local variables */
127  integer i__, k, nbmin, iinfo;
128  integer ib, nb;
129  integer nx;
130  integer ldwork, lwkopt;
131  logical lquery;
132  integer iws;
133 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
134 
135 
136  a_dim1 = *lda;
137  a_offset = 1 + a_dim1 * 1;
138  a -= a_offset;
139  --tau;
140  --work;
141 
142  /* Function Body */
143  *info = 0;
144  nb = template_lapack_ilaenv(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
145  1);
146  lwkopt = *n * nb;
147  work[1] = (Treal) lwkopt;
148  lquery = *lwork == -1;
149  if (*m < 0) {
150  *info = -1;
151  } else if (*n < 0) {
152  *info = -2;
153  } else if (*lda < maxMACRO(1,*m)) {
154  *info = -4;
155  } else if (*lwork < maxMACRO(1,*n) && ! lquery) {
156  *info = -7;
157  }
158  if (*info != 0) {
159  i__1 = -(*info);
160  template_blas_erbla("GEQRF ", &i__1);
161  return 0;
162  } else if (lquery) {
163  return 0;
164  }
165 
166 /* Quick return if possible */
167 
168  k = minMACRO(*m,*n);
169  if (k == 0) {
170  work[1] = 1.;
171  return 0;
172  }
173 
174  nbmin = 2;
175  nx = 0;
176  iws = *n;
177  if (nb > 1 && nb < k) {
178 
179 /* Determine when to cross over from blocked to unblocked code.
180 
181  Computing MAX */
182  i__1 = 0, i__2 = template_lapack_ilaenv(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, (
183  ftnlen)6, (ftnlen)1);
184  nx = maxMACRO(i__1,i__2);
185  if (nx < k) {
186 
187 /* Determine if workspace is large enough for blocked code. */
188 
189  ldwork = *n;
190  iws = ldwork * nb;
191  if (*lwork < iws) {
192 
193 /* Not enough workspace to use optimal NB: reduce NB and
194  determine the minimum value of NB. */
195 
196  nb = *lwork / ldwork;
197 /* Computing MAX */
198  i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DGEQRF", " ", m, n, &c_n1, &
199  c_n1, (ftnlen)6, (ftnlen)1);
200  nbmin = maxMACRO(i__1,i__2);
201  }
202  }
203  }
204 
205  if (nb >= nbmin && nb < k && nx < k) {
206 
207 /* Use blocked code initially */
208 
209  i__1 = k - nx;
210  i__2 = nb;
211  for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
212 /* Computing MIN */
213  i__3 = k - i__ + 1;
214  ib = minMACRO(i__3,nb);
215 
216 /* Compute the QR factorization of the current block
217  A(i:m,i:i+ib-1) */
218 
219  i__3 = *m - i__ + 1;
220  template_lapack_geqr2(&i__3, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
221  iinfo);
222  if (i__ + ib <= *n) {
223 
224 /* Form the triangular factor of the block reflector
225  H = H(i) H(i+1) . . . H(i+ib-1) */
226 
227  i__3 = *m - i__ + 1;
228  template_lapack_larft("Forward", "Columnwise", &i__3, &ib, &a_ref(i__, i__),
229  lda, &tau[i__], &work[1], &ldwork);
230 
231 /* Apply H' to A(i:m,i+ib:n) from the left */
232 
233  i__3 = *m - i__ + 1;
234  i__4 = *n - i__ - ib + 1;
235  template_lapack_larfb("Left", "Transpose", "Forward", "Columnwise", &i__3, &
236  i__4, &ib, &a_ref(i__, i__), lda, &work[1], &ldwork, &
237  a_ref(i__, i__ + ib), lda, &work[ib + 1], &ldwork);
238  }
239 /* L10: */
240  }
241  } else {
242  i__ = 1;
243  }
244 
245 /* Use unblocked code to factor the last or only block. */
246 
247  if (i__ <= k) {
248  i__2 = *m - i__ + 1;
249  i__1 = *n - i__ + 1;
250  template_lapack_geqr2(&i__2, &i__1, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
251  iinfo);
252  }
253 
254  work[1] = (Treal) iws;
255  return 0;
256 
257 /* End of DGEQRF */
258 
259 } /* dgeqrf_ */
260 
261 #undef a_ref
262 
263 
264 #endif
#define a_ref(a_1, a_2)
int integer
Definition: template_blas_common.h:38
integer template_lapack_ilaenv(const integer *ispec, const char *name__, const char *opts, const integer *n1, const integer *n2, const integer *n3, const integer *n4, ftnlen name_len, ftnlen opts_len)
Definition: template_lapack_common.cc:279
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
int template_lapack_geqrf(const integer *m, const integer *n, Treal *a, const integer *lda, Treal *tau, Treal *work, const integer *lwork, integer *info)
Definition: template_lapack_geqrf.h:40
int template_lapack_geqr2(const integer *m, const integer *n, Treal *a, const integer *lda, Treal *tau, Treal *work, integer *info)
Definition: template_lapack_geqr2.h:40
#define minMACRO(a, b)
Definition: template_blas_common.h:44
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:144
int template_lapack_larfb(const char *side, const char *trans, const char *direct, const char *storev, const integer *m, const integer *n, const integer *k, const Treal *v, const integer *ldv, const Treal *t, const integer *ldt, Treal *c__, const integer *ldc, Treal *work, const integer *ldwork)
Definition: template_lapack_larfb.h:40
bool logical
Definition: template_blas_common.h:39
int ftnlen
Definition: template_blas_common.h:40
int template_lapack_larft(const char *direct, const char *storev, const integer *n, const integer *k, Treal *v, const integer *ldv, const Treal *tau, Treal *t, const integer *ldt)
Definition: template_lapack_larft.h:40