ergo
template_lapack_stein.h
Go to the documentation of this file.
1 /* Ergo, version 3.3, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2013 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_STEIN_HEADER
36 #define TEMPLATE_LAPACK_STEIN_HEADER
37 
38 
39 template<class Treal>
40 int template_lapack_stein(const integer *n, const Treal *d__, const Treal *e,
41  const integer *m, const Treal *w, const integer *iblock, const integer *isplit,
42  Treal *z__, const integer *ldz, Treal *work, integer *iwork,
43  integer *ifail, integer *info)
44 {
45 /* -- LAPACK routine (version 3.0) --
46  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
47  Courant Institute, Argonne National Lab, and Rice University
48  September 30, 1994
49 
50 
51  Purpose
52  =======
53 
54  DSTEIN computes the eigenvectors of a real symmetric tridiagonal
55  matrix T corresponding to specified eigenvalues, using inverse
56  iteration.
57 
58  The maximum number of iterations allowed for each eigenvector is
59  specified by an internal parameter MAXITS (currently set to 5).
60 
61  Arguments
62  =========
63 
64  N (input) INTEGER
65  The order of the matrix. N >= 0.
66 
67  D (input) DOUBLE PRECISION array, dimension (N)
68  The n diagonal elements of the tridiagonal matrix T.
69 
70  E (input) DOUBLE PRECISION array, dimension (N)
71  The (n-1) subdiagonal elements of the tridiagonal matrix
72  T, in elements 1 to N-1. E(N) need not be set.
73 
74  M (input) INTEGER
75  The number of eigenvectors to be found. 0 <= M <= N.
76 
77  W (input) DOUBLE PRECISION array, dimension (N)
78  The first M elements of W contain the eigenvalues for
79  which eigenvectors are to be computed. The eigenvalues
80  should be grouped by split-off block and ordered from
81  smallest to largest within the block. ( The output array
82  W from DSTEBZ with ORDER = 'B' is expected here. )
83 
84  IBLOCK (input) INTEGER array, dimension (N)
85  The submatrix indices associated with the corresponding
86  eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to
87  the first submatrix from the top, =2 if W(i) belongs to
88  the second submatrix, etc. ( The output array IBLOCK
89  from DSTEBZ is expected here. )
90 
91  ISPLIT (input) INTEGER array, dimension (N)
92  The splitting points, at which T breaks up into submatrices.
93  The first submatrix consists of rows/columns 1 to
94  ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
95  through ISPLIT( 2 ), etc.
96  ( The output array ISPLIT from DSTEBZ is expected here. )
97 
98  Z (output) DOUBLE PRECISION array, dimension (LDZ, M)
99  The computed eigenvectors. The eigenvector associated
100  with the eigenvalue W(i) is stored in the i-th column of
101  Z. Any vector which fails to converge is set to its current
102  iterate after MAXITS iterations.
103 
104  LDZ (input) INTEGER
105  The leading dimension of the array Z. LDZ >= max(1,N).
106 
107  WORK (workspace) DOUBLE PRECISION array, dimension (5*N)
108 
109  IWORK (workspace) INTEGER array, dimension (N)
110 
111  IFAIL (output) INTEGER array, dimension (M)
112  On normal exit, all elements of IFAIL are zero.
113  If one or more eigenvectors fail to converge after
114  MAXITS iterations, then their indices are stored in
115  array IFAIL.
116 
117  INFO (output) INTEGER
118  = 0: successful exit.
119  < 0: if INFO = -i, the i-th argument had an illegal value
120  > 0: if INFO = i, then i eigenvectors failed to converge
121  in MAXITS iterations. Their indices are stored in
122  array IFAIL.
123 
124  Internal Parameters
125  ===================
126 
127  MAXITS INTEGER, default = 5
128  The maximum number of iterations performed.
129 
130  EXTRA INTEGER, default = 2
131  The number of iterations performed after norm growth
132  criterion is satisfied, should be at least 1.
133 
134  =====================================================================
135 
136 
137  Test the input parameters.
138 
139  Parameter adjustments */
140  /* Table of constant values */
141  integer c__2 = 2;
142  integer c__1 = 1;
143  integer c_n1 = -1;
144 
145  /* System generated locals */
146  integer z_dim1, z_offset, i__1, i__2, i__3;
147  Treal d__1, d__2, d__3, d__4, d__5;
148  /* Local variables */
149  integer jblk, nblk;
150  integer jmax;
151  integer i__, j;
152  integer iseed[4], gpind, iinfo;
153  integer b1;
154  integer j1;
155  Treal ortol;
156  integer indrv1, indrv2, indrv3, indrv4, indrv5, bn;
157  Treal xj;
158  integer nrmchk;
159  integer blksiz;
160  Treal onenrm, dtpcrt, pertol, scl, eps, sep, nrm, tol;
161  integer its;
162  Treal xjm, ztr, eps1;
163 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
164 
165 
166  --d__;
167  --e;
168  --w;
169  --iblock;
170  --isplit;
171  z_dim1 = *ldz;
172  z_offset = 1 + z_dim1 * 1;
173  z__ -= z_offset;
174  --work;
175  --iwork;
176  --ifail;
177 
178  /* Initialization added by Elias to get rid of compiler warnings. */
179  ortol = dtpcrt = xjm = onenrm = gpind = 0;
180  /* Function Body */
181  *info = 0;
182  i__1 = *m;
183  for (i__ = 1; i__ <= i__1; ++i__) {
184  ifail[i__] = 0;
185 /* L10: */
186  }
187 
188  if (*n < 0) {
189  *info = -1;
190  } else if (*m < 0 || *m > *n) {
191  *info = -4;
192  } else if (*ldz < maxMACRO(1,*n)) {
193  *info = -9;
194  } else {
195  i__1 = *m;
196  for (j = 2; j <= i__1; ++j) {
197  if (iblock[j] < iblock[j - 1]) {
198  *info = -6;
199  goto L30;
200  }
201  if (iblock[j] == iblock[j - 1] && w[j] < w[j - 1]) {
202  *info = -5;
203  goto L30;
204  }
205 /* L20: */
206  }
207 L30:
208  ;
209  }
210 
211  if (*info != 0) {
212  i__1 = -(*info);
213  template_blas_erbla("STEIN ", &i__1);
214  return 0;
215  }
216 
217 /* Quick return if possible */
218 
219  if (*n == 0 || *m == 0) {
220  return 0;
221  } else if (*n == 1) {
222  z___ref(1, 1) = 1.;
223  return 0;
224  }
225 
226 /* Get machine constants. */
227 
228  eps = template_lapack_lamch("Precision", (Treal)0);
229 
230 /* Initialize seed for random number generator DLARNV. */
231 
232  for (i__ = 1; i__ <= 4; ++i__) {
233  iseed[i__ - 1] = 1;
234 /* L40: */
235  }
236 
237 /* Initialize pointers. */
238 
239  indrv1 = 0;
240  indrv2 = indrv1 + *n;
241  indrv3 = indrv2 + *n;
242  indrv4 = indrv3 + *n;
243  indrv5 = indrv4 + *n;
244 
245 /* Compute eigenvectors of matrix blocks. */
246 
247  j1 = 1;
248  i__1 = iblock[*m];
249  for (nblk = 1; nblk <= i__1; ++nblk) {
250 
251 /* Find starting and ending indices of block nblk. */
252 
253  if (nblk == 1) {
254  b1 = 1;
255  } else {
256  b1 = isplit[nblk - 1] + 1;
257  }
258  bn = isplit[nblk];
259  blksiz = bn - b1 + 1;
260  if (blksiz == 1) {
261  goto L60;
262  }
263  gpind = b1;
264 
265 /* Compute reorthogonalization criterion and stopping criterion. */
266 
267  onenrm = (d__1 = d__[b1], absMACRO(d__1)) + (d__2 = e[b1], absMACRO(d__2));
268 /* Computing MAX */
269  d__3 = onenrm, d__4 = (d__1 = d__[bn], absMACRO(d__1)) + (d__2 = e[bn - 1],
270  absMACRO(d__2));
271  onenrm = maxMACRO(d__3,d__4);
272  i__2 = bn - 1;
273  for (i__ = b1 + 1; i__ <= i__2; ++i__) {
274 /* Computing MAX */
275  d__4 = onenrm, d__5 = (d__1 = d__[i__], absMACRO(d__1)) + (d__2 = e[
276  i__ - 1], absMACRO(d__2)) + (d__3 = e[i__], absMACRO(d__3));
277  onenrm = maxMACRO(d__4,d__5);
278 /* L50: */
279  }
280  ortol = onenrm * .001;
281 
282  dtpcrt = template_blas_sqrt(.1 / blksiz);
283 
284 /* Loop through eigenvalues of block nblk. */
285 
286 L60:
287  jblk = 0;
288  i__2 = *m;
289  for (j = j1; j <= i__2; ++j) {
290  if (iblock[j] != nblk) {
291  j1 = j;
292  goto L160;
293  }
294  ++jblk;
295  xj = w[j];
296 
297 /* Skip all the work if the block size is one. */
298 
299  if (blksiz == 1) {
300  work[indrv1 + 1] = 1.;
301  goto L120;
302  }
303 
304 /* If eigenvalues j and j-1 are too close, add a relatively
305  small perturbation. */
306 
307  if (jblk > 1) {
308  eps1 = (d__1 = eps * xj, absMACRO(d__1));
309  pertol = eps1 * 10.;
310  sep = xj - xjm;
311  if (sep < pertol) {
312  xj = xjm + pertol;
313  }
314  }
315 
316  its = 0;
317  nrmchk = 0;
318 
319 /* Get random starting vector. */
320 
321  template_lapack_larnv(&c__2, iseed, &blksiz, &work[indrv1 + 1]);
322 
323 /* Copy the matrix T so it won't be destroyed in factorization. */
324 
325  template_blas_copy(&blksiz, &d__[b1], &c__1, &work[indrv4 + 1], &c__1);
326  i__3 = blksiz - 1;
327  template_blas_copy(&i__3, &e[b1], &c__1, &work[indrv2 + 2], &c__1);
328  i__3 = blksiz - 1;
329  template_blas_copy(&i__3, &e[b1], &c__1, &work[indrv3 + 1], &c__1);
330 
331 /* Compute LU factors with partial pivoting ( PT = LU ) */
332 
333  tol = 0.;
334  template_lapack_lagtf(&blksiz, &work[indrv4 + 1], &xj, &work[indrv2 + 2], &work[
335  indrv3 + 1], &tol, &work[indrv5 + 1], &iwork[1], &iinfo);
336 
337 /* Update iteration count. */
338 
339 L70:
340  ++its;
341  if (its > 5) {
342  goto L100;
343  }
344 
345 /* Normalize and scale the righthand side vector Pb.
346 
347  Computing MAX */
348  d__2 = eps, d__3 = (d__1 = work[indrv4 + blksiz], absMACRO(d__1));
349  scl = blksiz * onenrm * maxMACRO(d__2,d__3) / template_blas_asum(&blksiz, &work[
350  indrv1 + 1], &c__1);
351  template_blas_scal(&blksiz, &scl, &work[indrv1 + 1], &c__1);
352 
353 /* Solve the system LU = Pb. */
354 
355  template_lapack_lagts(&c_n1, &blksiz, &work[indrv4 + 1], &work[indrv2 + 2], &
356  work[indrv3 + 1], &work[indrv5 + 1], &iwork[1], &work[
357  indrv1 + 1], &tol, &iinfo);
358 
359 /* Reorthogonalize by modified Gram-Schmidt if eigenvalues are
360  close enough. */
361 
362  if (jblk == 1) {
363  goto L90;
364  }
365  if ((d__1 = xj - xjm, absMACRO(d__1)) > ortol) {
366  gpind = j;
367  }
368  if (gpind != j) {
369  i__3 = j - 1;
370  for (i__ = gpind; i__ <= i__3; ++i__) {
371  ztr = -template_blas_dot(&blksiz, &work[indrv1 + 1], &c__1, &z___ref(
372  b1, i__), &c__1);
373  template_blas_axpy(&blksiz, &ztr, &z___ref(b1, i__), &c__1, &work[
374  indrv1 + 1], &c__1);
375 /* L80: */
376  }
377  }
378 
379 /* Check the infinity norm of the iterate. */
380 
381 L90:
382  jmax = template_blas_idamax(&blksiz, &work[indrv1 + 1], &c__1);
383  nrm = (d__1 = work[indrv1 + jmax], absMACRO(d__1));
384 
385 /* Continue for additional iterations after norm reaches
386  stopping criterion. */
387 
388  if (nrm < dtpcrt) {
389  goto L70;
390  }
391  ++nrmchk;
392  if (nrmchk < 3) {
393  goto L70;
394  }
395 
396  goto L110;
397 
398 /* If stopping criterion was not satisfied, update info and
399  store eigenvector number in array ifail. */
400 
401 L100:
402  ++(*info);
403  ifail[*info] = j;
404 
405 /* Accept iterate as jth eigenvector. */
406 
407 L110:
408  scl = 1. / template_blas_nrm2(&blksiz, &work[indrv1 + 1], &c__1);
409  jmax = template_blas_idamax(&blksiz, &work[indrv1 + 1], &c__1);
410  if (work[indrv1 + jmax] < 0.) {
411  scl = -scl;
412  }
413  template_blas_scal(&blksiz, &scl, &work[indrv1 + 1], &c__1);
414 L120:
415  i__3 = *n;
416  for (i__ = 1; i__ <= i__3; ++i__) {
417  z___ref(i__, j) = 0.;
418 /* L130: */
419  }
420  i__3 = blksiz;
421  for (i__ = 1; i__ <= i__3; ++i__) {
422  z___ref(b1 + i__ - 1, j) = work[indrv1 + i__];
423 /* L140: */
424  }
425 
426 /* Save the shift to check eigenvalue spacing at next
427  iteration. */
428 
429  xjm = xj;
430 
431 /* L150: */
432  }
433 L160:
434  ;
435  }
436 
437  return 0;
438 
439 /* End of DSTEIN */
440 
441 } /* dstein_ */
442 
443 #undef z___ref
444 
445 
446 #endif
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition: template_blas_scal.h:41
Treal template_blas_nrm2(const integer *n, const Treal *x, const integer *incx)
Definition: template_blas_nrm2.h:40
#define absMACRO(x)
Definition: template_blas_common.h:45
int template_lapack_larnv(const integer *idist, integer *iseed, const integer *n, Treal *x)
Definition: template_lapack_larnv.h:40
#define z___ref(a_1, a_2)
integer template_blas_idamax(const integer *n, const Treal *dx, const integer *incx)
Definition: template_blas_idamax.h:40
int integer
Definition: template_blas_common.h:38
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
int template_lapack_lagtf(const integer *n, Treal *a, const Treal *lambda, Treal *b, Treal *c__, const Treal *tol, Treal *d__, integer *in, integer *info)
Definition: template_lapack_lagtf.h:40
Treal template_blas_asum(const integer *n, const Treal *dx, const integer *incx)
Definition: template_blas_asum.h:40
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:144
int template_lapack_stein(const integer *n, const Treal *d__, const Treal *e, const integer *m, const Treal *w, const integer *iblock, const integer *isplit, Treal *z__, const integer *ldz, Treal *work, integer *iwork, integer *ifail, integer *info)
Definition: template_lapack_stein.h:40
int template_lapack_lagts(const integer *job, const integer *n, const Treal *a, const Treal *b, const Treal *c__, const Treal *d__, const integer *in, Treal *y, Treal *tol, integer *info)
Definition: template_lapack_lagts.h:40
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:199
int template_blas_copy(const integer *n, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_copy.h:40
int template_blas_axpy(const integer *n, const Treal *da, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_axpy.h:41
Treal template_blas_sqrt(Treal x)
Treal template_blas_dot(const integer *n, const Treal *dx, const integer *incx, const Treal *dy, const integer *incy)
Definition: template_blas_dot.h:41