ergo
template_lapack_lasq4.h
Go to the documentation of this file.
1 /* Ergo, version 3.4, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2014 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LASQ4_HEADER
36 #define TEMPLATE_LAPACK_LASQ4_HEADER
37 
38 template<class Treal>
39 int template_lapack_lasq4(integer *i0, integer *n0, Treal *z__,
40  integer *pp, integer *n0in, Treal *dmin__, Treal *dmin1,
41  Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2,
42  Treal *tau, integer *ttype, Treal *g)
43 {
44  /* System generated locals */
45  integer i__1;
46  Treal d__1, d__2;
47 
48 
49  /* Local variables */
50  Treal s = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
51  Treal a2, b1, b2;
52  integer i4, nn, np;
53  Treal gam, gap1, gap2;
54 
55 
56 /* -- LAPACK routine (version 3.2) -- */
57 
58 /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */
59 /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */
60 /* -- Berkeley -- */
61 /* -- November 2008 -- */
62 
63 /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
64 /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
65 
66 /* .. Scalar Arguments .. */
67 /* .. */
68 /* .. Array Arguments .. */
69 /* .. */
70 
71 /* Purpose */
72 /* ======= */
73 
74 /* DLASQ4 computes an approximation TAU to the smallest eigenvalue */
75 /* using values of d from the previous transform. */
76 
77 /* I0 (input) INTEGER */
78 /* First index. */
79 
80 /* N0 (input) INTEGER */
81 /* Last index. */
82 
83 /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */
84 /* Z holds the qd array. */
85 
86 /* PP (input) INTEGER */
87 /* PP=0 for ping, PP=1 for pong. */
88 
89 /* NOIN (input) INTEGER */
90 /* The value of N0 at start of EIGTEST. */
91 
92 /* DMIN (input) DOUBLE PRECISION */
93 /* Minimum value of d. */
94 
95 /* DMIN1 (input) DOUBLE PRECISION */
96 /* Minimum value of d, excluding D( N0 ). */
97 
98 /* DMIN2 (input) DOUBLE PRECISION */
99 /* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
100 
101 /* DN (input) DOUBLE PRECISION */
102 /* d(N) */
103 
104 /* DN1 (input) DOUBLE PRECISION */
105 /* d(N-1) */
106 
107 /* DN2 (input) DOUBLE PRECISION */
108 /* d(N-2) */
109 
110 /* TAU (output) DOUBLE PRECISION */
111 /* This is the shift. */
112 
113 /* TTYPE (output) INTEGER */
114 /* Shift type. */
115 
116 /* G (input/output) REAL */
117 /* G is passed as an argument in order to save its value between */
118 /* calls to DLASQ4. */
119 
120 /* Further Details */
121 /* =============== */
122 /* CNST1 = 9/16 */
123 
124 /* ===================================================================== */
125 
126 /* .. Parameters .. */
127 /* .. */
128 /* .. Local Scalars .. */
129 /* .. */
130 /* .. Intrinsic Functions .. */
131 /* .. */
132 /* .. Executable Statements .. */
133 
134 /* A negative DMIN forces the shift to take that absolute value */
135 /* TTYPE records the type of shift. */
136 
137  /* Parameter adjustments */
138  --z__;
139 
140  /* Function Body */
141  if (*dmin__ <= 0.) {
142  *tau = -(*dmin__);
143  *ttype = -1;
144  return 0;
145  }
146 
147  nn = (*n0 << 2) + *pp;
148  if (*n0in == *n0) {
149 
150 /* No eigenvalues deflated. */
151 
152  if (*dmin__ == *dn || *dmin__ == *dn1) {
153 
154  b1 = template_blas_sqrt(z__[nn - 3]) * template_blas_sqrt(z__[nn - 5]);
155  b2 = template_blas_sqrt(z__[nn - 7]) * template_blas_sqrt(z__[nn - 9]);
156  a2 = z__[nn - 7] + z__[nn - 5];
157 
158 /* Cases 2 and 3. */
159 
160  if (*dmin__ == *dn && *dmin1 == *dn1) {
161  gap2 = *dmin2 - a2 - *dmin2 * .25;
162  if (gap2 > 0. && gap2 > b2) {
163  gap1 = a2 - *dn - b2 / gap2 * b2;
164  } else {
165  gap1 = a2 - *dn - (b1 + b2);
166  }
167  if (gap1 > 0. && gap1 > b1) {
168 /* Computing MAX */
169  d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5;
170  s = maxMACRO(d__1,d__2);
171  *ttype = -2;
172  } else {
173  s = 0.;
174  if (*dn > b1) {
175  s = *dn - b1;
176  }
177  if (a2 > b1 + b2) {
178 /* Computing MIN */
179  d__1 = s, d__2 = a2 - (b1 + b2);
180  s = minMACRO(d__1,d__2);
181  }
182 /* Computing MAX */
183  d__1 = s, d__2 = *dmin__ * .333;
184  s = maxMACRO(d__1,d__2);
185  *ttype = -3;
186  }
187  } else {
188 
189 /* Case 4. */
190 
191  *ttype = -4;
192  s = *dmin__ * .25;
193  if (*dmin__ == *dn) {
194  gam = *dn;
195  a2 = 0.;
196  if (z__[nn - 5] > z__[nn - 7]) {
197  return 0;
198  }
199  b2 = z__[nn - 5] / z__[nn - 7];
200  np = nn - 9;
201  } else {
202  np = nn - (*pp << 1);
203  b2 = z__[np - 2];
204  gam = *dn1;
205  if (z__[np - 4] > z__[np - 2]) {
206  return 0;
207  }
208  a2 = z__[np - 4] / z__[np - 2];
209  if (z__[nn - 9] > z__[nn - 11]) {
210  return 0;
211  }
212  b2 = z__[nn - 9] / z__[nn - 11];
213  np = nn - 13;
214  }
215 
216 /* Approximate contribution to norm squared from I < NN-1. */
217 
218  a2 += b2;
219  i__1 = (*i0 << 2) - 1 + *pp;
220  for (i4 = np; i4 >= i__1; i4 += -4) {
221  if (b2 == 0.) {
222  goto L20;
223  }
224  b1 = b2;
225  if (z__[i4] > z__[i4 - 2]) {
226  return 0;
227  }
228  b2 *= z__[i4] / z__[i4 - 2];
229  a2 += b2;
230  if (maxMACRO(b2,b1) * 100. < a2 || .563 < a2) {
231  goto L20;
232  }
233 /* L10: */
234  }
235 L20:
236  a2 *= 1.05;
237 
238 /* Rayleigh quotient residual bound. */
239 
240  if (a2 < .563) {
241  s = gam * (1. - template_blas_sqrt(a2)) / (a2 + 1.);
242  }
243  }
244  } else if (*dmin__ == *dn2) {
245 
246 /* Case 5. */
247 
248  *ttype = -5;
249  s = *dmin__ * .25;
250 
251 /* Compute contribution to norm squared from I > NN-2. */
252 
253  np = nn - (*pp << 1);
254  b1 = z__[np - 2];
255  b2 = z__[np - 6];
256  gam = *dn2;
257  if (z__[np - 8] > b2 || z__[np - 4] > b1) {
258  return 0;
259  }
260  a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.);
261 
262 /* Approximate contribution to norm squared from I < NN-2. */
263 
264  if (*n0 - *i0 > 2) {
265  b2 = z__[nn - 13] / z__[nn - 15];
266  a2 += b2;
267  i__1 = (*i0 << 2) - 1 + *pp;
268  for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
269  if (b2 == 0.) {
270  goto L40;
271  }
272  b1 = b2;
273  if (z__[i4] > z__[i4 - 2]) {
274  return 0;
275  }
276  b2 *= z__[i4] / z__[i4 - 2];
277  a2 += b2;
278  if (maxMACRO(b2,b1) * 100. < a2 || .563 < a2) {
279  goto L40;
280  }
281 /* L30: */
282  }
283 L40:
284  a2 *= 1.05;
285  }
286 
287  if (a2 < .563) {
288  s = gam * (1. - template_blas_sqrt(a2)) / (a2 + 1.);
289  }
290  } else {
291 
292 /* Case 6, no information to guide us. */
293 
294  if (*ttype == -6) {
295  *g += (1. - *g) * .333;
296  } else if (*ttype == -18) {
297  *g = .083250000000000005;
298  } else {
299  *g = .25;
300  }
301  s = *g * *dmin__;
302  *ttype = -6;
303  }
304 
305  } else if (*n0in == *n0 + 1) {
306 
307 /* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */
308 
309  if (*dmin1 == *dn1 && *dmin2 == *dn2) {
310 
311 /* Cases 7 and 8. */
312 
313  *ttype = -7;
314  s = *dmin1 * .333;
315  if (z__[nn - 5] > z__[nn - 7]) {
316  return 0;
317  }
318  b1 = z__[nn - 5] / z__[nn - 7];
319  b2 = b1;
320  if (b2 == 0.) {
321  goto L60;
322  }
323  i__1 = (*i0 << 2) - 1 + *pp;
324  for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
325  a2 = b1;
326  if (z__[i4] > z__[i4 - 2]) {
327  return 0;
328  }
329  b1 *= z__[i4] / z__[i4 - 2];
330  b2 += b1;
331  if (maxMACRO(b1,a2) * 100. < b2) {
332  goto L60;
333  }
334 /* L50: */
335  }
336 L60:
337  b2 = template_blas_sqrt(b2 * 1.05);
338 /* Computing 2nd power */
339  d__1 = b2;
340  a2 = *dmin1 / (d__1 * d__1 + 1.);
341  gap2 = *dmin2 * .5 - a2;
342  if (gap2 > 0. && gap2 > b2 * a2) {
343 /* Computing MAX */
344  d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
345  s = maxMACRO(d__1,d__2);
346  } else {
347 /* Computing MAX */
348  d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
349  s = maxMACRO(d__1,d__2);
350  *ttype = -8;
351  }
352  } else {
353 
354 /* Case 9. */
355 
356  s = *dmin1 * .25;
357  if (*dmin1 == *dn1) {
358  s = *dmin1 * .5;
359  }
360  *ttype = -9;
361  }
362 
363  } else if (*n0in == *n0 + 2) {
364 
365 /* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */
366 
367 /* Cases 10 and 11. */
368 
369  if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) {
370  *ttype = -10;
371  s = *dmin2 * .333;
372  if (z__[nn - 5] > z__[nn - 7]) {
373  return 0;
374  }
375  b1 = z__[nn - 5] / z__[nn - 7];
376  b2 = b1;
377  if (b2 == 0.) {
378  goto L80;
379  }
380  i__1 = (*i0 << 2) - 1 + *pp;
381  for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
382  if (z__[i4] > z__[i4 - 2]) {
383  return 0;
384  }
385  b1 *= z__[i4] / z__[i4 - 2];
386  b2 += b1;
387  if (b1 * 100. < b2) {
388  goto L80;
389  }
390 /* L70: */
391  }
392 L80:
393  b2 = template_blas_sqrt(b2 * 1.05);
394 /* Computing 2nd power */
395  d__1 = b2;
396  a2 = *dmin2 / (d__1 * d__1 + 1.);
397  gap2 = z__[nn - 7] + z__[nn - 9] - template_blas_sqrt(z__[nn - 11]) * template_blas_sqrt(z__[
398  nn - 9]) - a2;
399  if (gap2 > 0. && gap2 > b2 * a2) {
400 /* Computing MAX */
401  d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
402  s = maxMACRO(d__1,d__2);
403  } else {
404 /* Computing MAX */
405  d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
406  s = maxMACRO(d__1,d__2);
407  }
408  } else {
409  s = *dmin2 * .25;
410  *ttype = -11;
411  }
412  } else if (*n0in > *n0 + 2) {
413 
414 /* Case 12, more than two eigenvalues deflated. No information. */
415 
416  s = 0.;
417  *ttype = -12;
418  }
419 
420  *tau = s;
421  return 0;
422 
423 /* End of DLASQ4 */
424 
425 } /* dlasq4_ */
426 
427 #endif
int integer
Definition: template_blas_common.h:38
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
#define minMACRO(a, b)
Definition: template_blas_common.h:44
Treal template_blas_sqrt(Treal x)
int template_lapack_lasq4(integer *i0, integer *n0, Treal *z__, integer *pp, integer *n0in, Treal *dmin__, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2, Treal *tau, integer *ttype, Treal *g)
Definition: template_lapack_lasq4.h:39