851 lines
30 KiB
FortranFixed
851 lines
30 KiB
FortranFixed
|
c\BeginDoc
|
||
|
c
|
||
|
c\Name: cnaitr
|
||
|
c
|
||
|
c\Description:
|
||
|
c Reverse communication interface for applying NP additional steps to
|
||
|
c a K step nonsymmetric Arnoldi factorization.
|
||
|
c
|
||
|
c Input: OP*V_{k} - V_{k}*H = r_{k}*e_{k}^T
|
||
|
c
|
||
|
c with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0.
|
||
|
c
|
||
|
c Output: OP*V_{k+p} - V_{k+p}*H = r_{k+p}*e_{k+p}^T
|
||
|
c
|
||
|
c with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0.
|
||
|
c
|
||
|
c where OP and B are as in cnaupd. The B-norm of r_{k+p} is also
|
||
|
c computed and returned.
|
||
|
c
|
||
|
c\Usage:
|
||
|
c call cnaitr
|
||
|
c ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH,
|
||
|
c IPNTR, WORKD, INFO )
|
||
|
c
|
||
|
c\Arguments
|
||
|
c IDO Integer. (INPUT/OUTPUT)
|
||
|
c Reverse communication flag.
|
||
|
c -------------------------------------------------------------
|
||
|
c IDO = 0: first call to the reverse communication interface
|
||
|
c IDO = -1: compute Y = OP * X where
|
||
|
c IPNTR(1) is the pointer into WORK for X,
|
||
|
c IPNTR(2) is the pointer into WORK for Y.
|
||
|
c This is for the restart phase to force the new
|
||
|
c starting vector into the range of OP.
|
||
|
c IDO = 1: compute Y = OP * X where
|
||
|
c IPNTR(1) is the pointer into WORK for X,
|
||
|
c IPNTR(2) is the pointer into WORK for Y,
|
||
|
c IPNTR(3) is the pointer into WORK for B * X.
|
||
|
c IDO = 2: compute Y = B * X where
|
||
|
c IPNTR(1) is the pointer into WORK for X,
|
||
|
c IPNTR(2) is the pointer into WORK for Y.
|
||
|
c IDO = 99: done
|
||
|
c -------------------------------------------------------------
|
||
|
c When the routine is used in the "shift-and-invert" mode, the
|
||
|
c vector B * Q is already available and do not need to be
|
||
|
c recomputed in forming OP * Q.
|
||
|
c
|
||
|
c BMAT Character*1. (INPUT)
|
||
|
c BMAT specifies the type of the matrix B that defines the
|
||
|
c semi-inner product for the operator OP. See cnaupd.
|
||
|
c B = 'I' -> standard eigenvalue problem A*x = lambda*x
|
||
|
c B = 'G' -> generalized eigenvalue problem A*x = lambda*M**x
|
||
|
c
|
||
|
c N Integer. (INPUT)
|
||
|
c Dimension of the eigenproblem.
|
||
|
c
|
||
|
c K Integer. (INPUT)
|
||
|
c Current size of V and H.
|
||
|
c
|
||
|
c NP Integer. (INPUT)
|
||
|
c Number of additional Arnoldi steps to take.
|
||
|
c
|
||
|
c NB Integer. (INPUT)
|
||
|
c Blocksize to be used in the recurrence.
|
||
|
c Only work for NB = 1 right now. The goal is to have a
|
||
|
c program that implement both the block and non-block method.
|
||
|
c
|
||
|
c RESID Complex array of length N. (INPUT/OUTPUT)
|
||
|
c On INPUT: RESID contains the residual vector r_{k}.
|
||
|
c On OUTPUT: RESID contains the residual vector r_{k+p}.
|
||
|
c
|
||
|
c RNORM Real scalar. (INPUT/OUTPUT)
|
||
|
c B-norm of the starting residual on input.
|
||
|
c B-norm of the updated residual r_{k+p} on output.
|
||
|
c
|
||
|
c V Complex N by K+NP array. (INPUT/OUTPUT)
|
||
|
c On INPUT: V contains the Arnoldi vectors in the first K
|
||
|
c columns.
|
||
|
c On OUTPUT: V contains the new NP Arnoldi vectors in the next
|
||
|
c NP columns. The first K columns are unchanged.
|
||
|
c
|
||
|
c LDV Integer. (INPUT)
|
||
|
c Leading dimension of V exactly as declared in the calling
|
||
|
c program.
|
||
|
c
|
||
|
c H Complex (K+NP) by (K+NP) array. (INPUT/OUTPUT)
|
||
|
c H is used to store the generated upper Hessenberg matrix.
|
||
|
c
|
||
|
c LDH Integer. (INPUT)
|
||
|
c Leading dimension of H exactly as declared in the calling
|
||
|
c program.
|
||
|
c
|
||
|
c IPNTR Integer array of length 3. (OUTPUT)
|
||
|
c Pointer to mark the starting locations in the WORK for
|
||
|
c vectors used by the Arnoldi iteration.
|
||
|
c -------------------------------------------------------------
|
||
|
c IPNTR(1): pointer to the current operand vector X.
|
||
|
c IPNTR(2): pointer to the current result vector Y.
|
||
|
c IPNTR(3): pointer to the vector B * X when used in the
|
||
|
c shift-and-invert mode. X is the current operand.
|
||
|
c -------------------------------------------------------------
|
||
|
c
|
||
|
c WORKD Complex work array of length 3*N. (REVERSE COMMUNICATION)
|
||
|
c Distributed array to be used in the basic Arnoldi iteration
|
||
|
c for reverse communication. The calling program should not
|
||
|
c use WORKD as temporary workspace during the iteration !!!!!!
|
||
|
c On input, WORKD(1:N) = B*RESID and is used to save some
|
||
|
c computation at the first step.
|
||
|
c
|
||
|
c INFO Integer. (OUTPUT)
|
||
|
c = 0: Normal exit.
|
||
|
c > 0: Size of the spanning invariant subspace of OP found.
|
||
|
c
|
||
|
c\EndDoc
|
||
|
c
|
||
|
c-----------------------------------------------------------------------
|
||
|
c
|
||
|
c\BeginLib
|
||
|
c
|
||
|
c\Local variables:
|
||
|
c xxxxxx Complex
|
||
|
c
|
||
|
c\References:
|
||
|
c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
|
||
|
c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
|
||
|
c pp 357-385.
|
||
|
c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
|
||
|
c Restarted Arnoldi Iteration", Rice University Technical Report
|
||
|
c TR95-13, Department of Computational and Applied Mathematics.
|
||
|
c
|
||
|
c\Routines called:
|
||
|
c cgetv0 ARPACK routine to generate the initial vector.
|
||
|
c ivout ARPACK utility routine that prints integers.
|
||
|
c second ARPACK utility routine for timing.
|
||
|
c cmout ARPACK utility routine that prints matrices
|
||
|
c cvout ARPACK utility routine that prints vectors.
|
||
|
c clanhs LAPACK routine that computes various norms of a matrix.
|
||
|
c clascl LAPACK routine for careful scaling of a matrix.
|
||
|
c slabad LAPACK routine for defining the underflow and overflow
|
||
|
c limits.
|
||
|
c slamch LAPACK routine that determines machine constants.
|
||
|
c slapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully.
|
||
|
c cgemv Level 2 BLAS routine for matrix vector multiplication.
|
||
|
c caxpy Level 1 BLAS that computes a vector triad.
|
||
|
c ccopy Level 1 BLAS that copies one vector to another .
|
||
|
c cdotc Level 1 BLAS that computes the scalar product of two vectors.
|
||
|
c cscal Level 1 BLAS that scales a vector.
|
||
|
c csscal Level 1 BLAS that scales a complex vector by a real number.
|
||
|
c scnrm2 Level 1 BLAS that computes the norm of a vector.
|
||
|
c
|
||
|
c\Author
|
||
|
c Danny Sorensen Phuong Vu
|
||
|
c Richard Lehoucq CRPC / Rice University
|
||
|
c Dept. of Computational & Houston, Texas
|
||
|
c Applied Mathematics
|
||
|
c Rice University
|
||
|
c Houston, Texas
|
||
|
c
|
||
|
c\SCCS Information: @(#)
|
||
|
c FILE: naitr.F SID: 2.3 DATE OF SID: 8/27/96 RELEASE: 2
|
||
|
c
|
||
|
c\Remarks
|
||
|
c The algorithm implemented is:
|
||
|
c
|
||
|
c restart = .false.
|
||
|
c Given V_{k} = [v_{1}, ..., v_{k}], r_{k};
|
||
|
c r_{k} contains the initial residual vector even for k = 0;
|
||
|
c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already
|
||
|
c computed by the calling program.
|
||
|
c
|
||
|
c betaj = rnorm ; p_{k+1} = B*r_{k} ;
|
||
|
c For j = k+1, ..., k+np Do
|
||
|
c 1) if ( betaj < tol ) stop or restart depending on j.
|
||
|
c ( At present tol is zero )
|
||
|
c if ( restart ) generate a new starting vector.
|
||
|
c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}];
|
||
|
c p_{j} = p_{j}/betaj
|
||
|
c 3) r_{j} = OP*v_{j} where OP is defined as in cnaupd
|
||
|
c For shift-invert mode p_{j} = B*v_{j} is already available.
|
||
|
c wnorm = || OP*v_{j} ||
|
||
|
c 4) Compute the j-th step residual vector.
|
||
|
c w_{j} = V_{j}^T * B * OP * v_{j}
|
||
|
c r_{j} = OP*v_{j} - V_{j} * w_{j}
|
||
|
c H(:,j) = w_{j};
|
||
|
c H(j,j-1) = rnorm
|
||
|
c rnorm = || r_(j) ||
|
||
|
c If (rnorm > 0.717*wnorm) accept step and go back to 1)
|
||
|
c 5) Re-orthogonalization step:
|
||
|
c s = V_{j}'*B*r_{j}
|
||
|
c r_{j} = r_{j} - V_{j}*s; rnorm1 = || r_{j} ||
|
||
|
c alphaj = alphaj + s_{j};
|
||
|
c 6) Iterative refinement step:
|
||
|
c If (rnorm1 > 0.717*rnorm) then
|
||
|
c rnorm = rnorm1
|
||
|
c accept step and go back to 1)
|
||
|
c Else
|
||
|
c rnorm = rnorm1
|
||
|
c If this is the first time in step 6), go to 5)
|
||
|
c Else r_{j} lies in the span of V_{j} numerically.
|
||
|
c Set r_{j} = 0 and rnorm = 0; go to 1)
|
||
|
c EndIf
|
||
|
c End Do
|
||
|
c
|
||
|
c\EndLib
|
||
|
c
|
||
|
c-----------------------------------------------------------------------
|
||
|
c
|
||
|
subroutine cnaitr
|
||
|
& (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh,
|
||
|
& ipntr, workd, info)
|
||
|
c
|
||
|
c %----------------------------------------------------%
|
||
|
c | Include files for debugging and timing information |
|
||
|
c %----------------------------------------------------%
|
||
|
c
|
||
|
include 'debug.h'
|
||
|
include 'stat.h'
|
||
|
c
|
||
|
c %------------------%
|
||
|
c | Scalar Arguments |
|
||
|
c %------------------%
|
||
|
c
|
||
|
character bmat*1
|
||
|
integer ido, info, k, ldh, ldv, n, nb, np
|
||
|
Real
|
||
|
& rnorm
|
||
|
c
|
||
|
c %-----------------%
|
||
|
c | Array Arguments |
|
||
|
c %-----------------%
|
||
|
c
|
||
|
integer ipntr(3)
|
||
|
Complex
|
||
|
& h(ldh,k+np), resid(n), v(ldv,k+np), workd(3*n)
|
||
|
c
|
||
|
c %------------%
|
||
|
c | Parameters |
|
||
|
c %------------%
|
||
|
c
|
||
|
Complex
|
||
|
& one, zero
|
||
|
Real
|
||
|
& rone, rzero
|
||
|
parameter (one = (1.0E+0, 0.0E+0), zero = (0.0E+0, 0.0E+0),
|
||
|
& rone = 1.0E+0, rzero = 0.0E+0)
|
||
|
c
|
||
|
c %--------------%
|
||
|
c | Local Arrays |
|
||
|
c %--------------%
|
||
|
c
|
||
|
Real
|
||
|
& rtemp(2)
|
||
|
c
|
||
|
c %---------------%
|
||
|
c | Local Scalars |
|
||
|
c %---------------%
|
||
|
c
|
||
|
logical first, orth1, orth2, rstart, step3, step4
|
||
|
integer ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl,
|
||
|
& jj
|
||
|
Real
|
||
|
& ovfl, smlnum, tst1, ulp, unfl, betaj,
|
||
|
& temp1, rnorm1, wnorm
|
||
|
Complex
|
||
|
& cnorm
|
||
|
c
|
||
|
save first, orth1, orth2, rstart, step3, step4,
|
||
|
& ierr, ipj, irj, ivj, iter, itry, j, msglvl, ovfl,
|
||
|
& betaj, rnorm1, smlnum, ulp, unfl, wnorm
|
||
|
c
|
||
|
c %----------------------%
|
||
|
c | External Subroutines |
|
||
|
c %----------------------%
|
||
|
c
|
||
|
external caxpy, ccopy, cscal, csscal, cgemv, cgetv0,
|
||
|
& slabad, cvout, cmout, ivout, second
|
||
|
c
|
||
|
c %--------------------%
|
||
|
c | External Functions |
|
||
|
c %--------------------%
|
||
|
c
|
||
|
Complex
|
||
|
& cdotc
|
||
|
Real
|
||
|
& slamch, scnrm2, clanhs, slapy2
|
||
|
external cdotc, scnrm2, clanhs, slamch, slapy2
|
||
|
c
|
||
|
c %---------------------%
|
||
|
c | Intrinsic Functions |
|
||
|
c %---------------------%
|
||
|
c
|
||
|
intrinsic aimag, real, max, sqrt
|
||
|
c
|
||
|
c %-----------------%
|
||
|
c | Data statements |
|
||
|
c %-----------------%
|
||
|
c
|
||
|
data first / .true. /
|
||
|
c
|
||
|
c %-----------------------%
|
||
|
c | Executable Statements |
|
||
|
c %-----------------------%
|
||
|
c
|
||
|
if (first) then
|
||
|
c
|
||
|
c %-----------------------------------------%
|
||
|
c | Set machine-dependent constants for the |
|
||
|
c | the splitting and deflation criterion. |
|
||
|
c | If norm(H) <= sqrt(OVFL), |
|
||
|
c | overflow should not occur. |
|
||
|
c | REFERENCE: LAPACK subroutine clahqr |
|
||
|
c %-----------------------------------------%
|
||
|
c
|
||
|
unfl = slamch( 'safe minimum' )
|
||
|
ovfl = real(one / unfl)
|
||
|
call slabad( unfl, ovfl )
|
||
|
ulp = slamch( 'precision' )
|
||
|
smlnum = unfl*( n / ulp )
|
||
|
first = .false.
|
||
|
end if
|
||
|
c
|
||
|
if (ido .eq. 0) then
|
||
|
c
|
||
|
c %-------------------------------%
|
||
|
c | Initialize timing statistics |
|
||
|
c | & message level for debugging |
|
||
|
c %-------------------------------%
|
||
|
c
|
||
|
call second (t0)
|
||
|
msglvl = mcaitr
|
||
|
c
|
||
|
c %------------------------------%
|
||
|
c | Initial call to this routine |
|
||
|
c %------------------------------%
|
||
|
c
|
||
|
info = 0
|
||
|
step3 = .false.
|
||
|
step4 = .false.
|
||
|
rstart = .false.
|
||
|
orth1 = .false.
|
||
|
orth2 = .false.
|
||
|
j = k + 1
|
||
|
ipj = 1
|
||
|
irj = ipj + n
|
||
|
ivj = irj + n
|
||
|
end if
|
||
|
c
|
||
|
c %-------------------------------------------------%
|
||
|
c | When in reverse communication mode one of: |
|
||
|
c | STEP3, STEP4, ORTH1, ORTH2, RSTART |
|
||
|
c | will be .true. when .... |
|
||
|
c | STEP3: return from computing OP*v_{j}. |
|
||
|
c | STEP4: return from computing B-norm of OP*v_{j} |
|
||
|
c | ORTH1: return from computing B-norm of r_{j+1} |
|
||
|
c | ORTH2: return from computing B-norm of |
|
||
|
c | correction to the residual vector. |
|
||
|
c | RSTART: return from OP computations needed by |
|
||
|
c | cgetv0. |
|
||
|
c %-------------------------------------------------%
|
||
|
c
|
||
|
if (step3) go to 50
|
||
|
if (step4) go to 60
|
||
|
if (orth1) go to 70
|
||
|
if (orth2) go to 90
|
||
|
if (rstart) go to 30
|
||
|
c
|
||
|
c %-----------------------------%
|
||
|
c | Else this is the first step |
|
||
|
c %-----------------------------%
|
||
|
c
|
||
|
c %--------------------------------------------------------------%
|
||
|
c | |
|
||
|
c | A R N O L D I I T E R A T I O N L O O P |
|
||
|
c | |
|
||
|
c | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) |
|
||
|
c %--------------------------------------------------------------%
|
||
|
|
||
|
1000 continue
|
||
|
c
|
||
|
if (msglvl .gt. 1) then
|
||
|
call ivout (logfil, 1, j, ndigit,
|
||
|
& '_naitr: generating Arnoldi vector number')
|
||
|
call svout (logfil, 1, rnorm, ndigit,
|
||
|
& '_naitr: B-norm of the current residual is')
|
||
|
end if
|
||
|
c
|
||
|
c %---------------------------------------------------%
|
||
|
c | STEP 1: Check if the B norm of j-th residual |
|
||
|
c | vector is zero. Equivalent to determine whether |
|
||
|
c | an exact j-step Arnoldi factorization is present. |
|
||
|
c %---------------------------------------------------%
|
||
|
c
|
||
|
betaj = rnorm
|
||
|
if (rnorm .gt. rzero) go to 40
|
||
|
c
|
||
|
c %---------------------------------------------------%
|
||
|
c | Invariant subspace found, generate a new starting |
|
||
|
c | vector which is orthogonal to the current Arnoldi |
|
||
|
c | basis and continue the iteration. |
|
||
|
c %---------------------------------------------------%
|
||
|
c
|
||
|
if (msglvl .gt. 0) then
|
||
|
call ivout (logfil, 1, j, ndigit,
|
||
|
& '_naitr: ****** RESTART AT STEP ******')
|
||
|
end if
|
||
|
c
|
||
|
c %---------------------------------------------%
|
||
|
c | ITRY is the loop variable that controls the |
|
||
|
c | maximum amount of times that a restart is |
|
||
|
c | attempted. NRSTRT is used by stat.h |
|
||
|
c %---------------------------------------------%
|
||
|
c
|
||
|
betaj = rzero
|
||
|
nrstrt = nrstrt + 1
|
||
|
itry = 1
|
||
|
20 continue
|
||
|
rstart = .true.
|
||
|
ido = 0
|
||
|
30 continue
|
||
|
c
|
||
|
c %--------------------------------------%
|
||
|
c | If in reverse communication mode and |
|
||
|
c | RSTART = .true. flow returns here. |
|
||
|
c %--------------------------------------%
|
||
|
c
|
||
|
call cgetv0 (ido, bmat, itry, .false., n, j, v, ldv,
|
||
|
& resid, rnorm, ipntr, workd, ierr)
|
||
|
if (ido .ne. 99) go to 9000
|
||
|
if (ierr .lt. 0) then
|
||
|
itry = itry + 1
|
||
|
if (itry .le. 3) go to 20
|
||
|
c
|
||
|
c %------------------------------------------------%
|
||
|
c | Give up after several restart attempts. |
|
||
|
c | Set INFO to the size of the invariant subspace |
|
||
|
c | which spans OP and exit. |
|
||
|
c %------------------------------------------------%
|
||
|
c
|
||
|
info = j - 1
|
||
|
call second (t1)
|
||
|
tcaitr = tcaitr + (t1 - t0)
|
||
|
ido = 99
|
||
|
go to 9000
|
||
|
end if
|
||
|
c
|
||
|
40 continue
|
||
|
c
|
||
|
c %---------------------------------------------------------%
|
||
|
c | STEP 2: v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm |
|
||
|
c | Note that p_{j} = B*r_{j-1}. In order to avoid overflow |
|
||
|
c | when reciprocating a small RNORM, test against lower |
|
||
|
c | machine bound. |
|
||
|
c %---------------------------------------------------------%
|
||
|
c
|
||
|
call ccopy (n, resid, 1, v(1,j), 1)
|
||
|
if ( rnorm .ge. unfl) then
|
||
|
temp1 = rone / rnorm
|
||
|
call csscal (n, temp1, v(1,j), 1)
|
||
|
call csscal (n, temp1, workd(ipj), 1)
|
||
|
else
|
||
|
c
|
||
|
c %-----------------------------------------%
|
||
|
c | To scale both v_{j} and p_{j} carefully |
|
||
|
c | use LAPACK routine clascl |
|
||
|
c %-----------------------------------------%
|
||
|
c
|
||
|
call clascl ('General', i, i, rnorm, rone,
|
||
|
& n, 1, v(1,j), n, infol)
|
||
|
call clascl ('General', i, i, rnorm, rone,
|
||
|
& n, 1, workd(ipj), n, infol)
|
||
|
end if
|
||
|
c
|
||
|
c %------------------------------------------------------%
|
||
|
c | STEP 3: r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} |
|
||
|
c | Note that this is not quite yet r_{j}. See STEP 4 |
|
||
|
c %------------------------------------------------------%
|
||
|
c
|
||
|
step3 = .true.
|
||
|
nopx = nopx + 1
|
||
|
call second (t2)
|
||
|
call ccopy (n, v(1,j), 1, workd(ivj), 1)
|
||
|
ipntr(1) = ivj
|
||
|
ipntr(2) = irj
|
||
|
ipntr(3) = ipj
|
||
|
ido = 1
|
||
|
c
|
||
|
c %-----------------------------------%
|
||
|
c | Exit in order to compute OP*v_{j} |
|
||
|
c %-----------------------------------%
|
||
|
c
|
||
|
go to 9000
|
||
|
50 continue
|
||
|
c
|
||
|
c %----------------------------------%
|
||
|
c | Back from reverse communication; |
|
||
|
c | WORKD(IRJ:IRJ+N-1) := OP*v_{j} |
|
||
|
c | if step3 = .true. |
|
||
|
c %----------------------------------%
|
||
|
c
|
||
|
call second (t3)
|
||
|
tmvopx = tmvopx + (t3 - t2)
|
||
|
|
||
|
step3 = .false.
|
||
|
c
|
||
|
c %------------------------------------------%
|
||
|
c | Put another copy of OP*v_{j} into RESID. |
|
||
|
c %------------------------------------------%
|
||
|
c
|
||
|
call ccopy (n, workd(irj), 1, resid, 1)
|
||
|
c
|
||
|
c %---------------------------------------%
|
||
|
c | STEP 4: Finish extending the Arnoldi |
|
||
|
c | factorization to length j. |
|
||
|
c %---------------------------------------%
|
||
|
c
|
||
|
call second (t2)
|
||
|
if (bmat .eq. 'G') then
|
||
|
nbx = nbx + 1
|
||
|
step4 = .true.
|
||
|
ipntr(1) = irj
|
||
|
ipntr(2) = ipj
|
||
|
ido = 2
|
||
|
c
|
||
|
c %-------------------------------------%
|
||
|
c | Exit in order to compute B*OP*v_{j} |
|
||
|
c %-------------------------------------%
|
||
|
c
|
||
|
go to 9000
|
||
|
else if (bmat .eq. 'I') then
|
||
|
call ccopy (n, resid, 1, workd(ipj), 1)
|
||
|
end if
|
||
|
60 continue
|
||
|
c
|
||
|
c %----------------------------------%
|
||
|
c | Back from reverse communication; |
|
||
|
c | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} |
|
||
|
c | if step4 = .true. |
|
||
|
c %----------------------------------%
|
||
|
c
|
||
|
if (bmat .eq. 'G') then
|
||
|
call second (t3)
|
||
|
tmvbx = tmvbx + (t3 - t2)
|
||
|
end if
|
||
|
c
|
||
|
step4 = .false.
|
||
|
c
|
||
|
c %-------------------------------------%
|
||
|
c | The following is needed for STEP 5. |
|
||
|
c | Compute the B-norm of OP*v_{j}. |
|
||
|
c %-------------------------------------%
|
||
|
c
|
||
|
if (bmat .eq. 'G') then
|
||
|
cnorm = cdotc (n, resid, 1, workd(ipj), 1)
|
||
|
wnorm = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
|
||
|
else if (bmat .eq. 'I') then
|
||
|
wnorm = scnrm2(n, resid, 1)
|
||
|
end if
|
||
|
c
|
||
|
c %-----------------------------------------%
|
||
|
c | Compute the j-th residual corresponding |
|
||
|
c | to the j step factorization. |
|
||
|
c | Use Classical Gram Schmidt and compute: |
|
||
|
c | w_{j} <- V_{j}^T * B * OP * v_{j} |
|
||
|
c | r_{j} <- OP*v_{j} - V_{j} * w_{j} |
|
||
|
c %-----------------------------------------%
|
||
|
c
|
||
|
c
|
||
|
c %------------------------------------------%
|
||
|
c | Compute the j Fourier coefficients w_{j} |
|
||
|
c | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. |
|
||
|
c %------------------------------------------%
|
||
|
c
|
||
|
call cgemv ('C', n, j, one, v, ldv, workd(ipj), 1,
|
||
|
& zero, h(1,j), 1)
|
||
|
c
|
||
|
c %--------------------------------------%
|
||
|
c | Orthogonalize r_{j} against V_{j}. |
|
||
|
c | RESID contains OP*v_{j}. See STEP 3. |
|
||
|
c %--------------------------------------%
|
||
|
c
|
||
|
call cgemv ('N', n, j, -one, v, ldv, h(1,j), 1,
|
||
|
& one, resid, 1)
|
||
|
c
|
||
|
if (j .gt. 1) h(j,j-1) = cmplx(betaj, rzero)
|
||
|
c
|
||
|
call second (t4)
|
||
|
c
|
||
|
orth1 = .true.
|
||
|
c
|
||
|
call second (t2)
|
||
|
if (bmat .eq. 'G') then
|
||
|
nbx = nbx + 1
|
||
|
call ccopy (n, resid, 1, workd(irj), 1)
|
||
|
ipntr(1) = irj
|
||
|
ipntr(2) = ipj
|
||
|
ido = 2
|
||
|
c
|
||
|
c %----------------------------------%
|
||
|
c | Exit in order to compute B*r_{j} |
|
||
|
c %----------------------------------%
|
||
|
c
|
||
|
go to 9000
|
||
|
else if (bmat .eq. 'I') then
|
||
|
call ccopy (n, resid, 1, workd(ipj), 1)
|
||
|
end if
|
||
|
70 continue
|
||
|
c
|
||
|
c %---------------------------------------------------%
|
||
|
c | Back from reverse communication if ORTH1 = .true. |
|
||
|
c | WORKD(IPJ:IPJ+N-1) := B*r_{j}. |
|
||
|
c %---------------------------------------------------%
|
||
|
c
|
||
|
if (bmat .eq. 'G') then
|
||
|
call second (t3)
|
||
|
tmvbx = tmvbx + (t3 - t2)
|
||
|
end if
|
||
|
c
|
||
|
orth1 = .false.
|
||
|
c
|
||
|
c %------------------------------%
|
||
|
c | Compute the B-norm of r_{j}. |
|
||
|
c %------------------------------%
|
||
|
c
|
||
|
if (bmat .eq. 'G') then
|
||
|
cnorm = cdotc (n, resid, 1, workd(ipj), 1)
|
||
|
rnorm = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
|
||
|
else if (bmat .eq. 'I') then
|
||
|
rnorm = scnrm2(n, resid, 1)
|
||
|
end if
|
||
|
c
|
||
|
c %-----------------------------------------------------------%
|
||
|
c | STEP 5: Re-orthogonalization / Iterative refinement phase |
|
||
|
c | Maximum NITER_ITREF tries. |
|
||
|
c | |
|
||
|
c | s = V_{j}^T * B * r_{j} |
|
||
|
c | r_{j} = r_{j} - V_{j}*s |
|
||
|
c | alphaj = alphaj + s_{j} |
|
||
|
c | |
|
||
|
c | The stopping criteria used for iterative refinement is |
|
||
|
c | discussed in Parlett's book SEP, page 107 and in Gragg & |
|
||
|
c | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990. |
|
||
|
c | Determine if we need to correct the residual. The goal is |
|
||
|
c | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} || |
|
||
|
c | The following test determines whether the sine of the |
|
||
|
c | angle between OP*x and the computed residual is less |
|
||
|
c | than or equal to 0.717. |
|
||
|
c %-----------------------------------------------------------%
|
||
|
c
|
||
|
if ( rnorm .gt. 0.717*wnorm ) go to 100
|
||
|
c
|
||
|
iter = 0
|
||
|
nrorth = nrorth + 1
|
||
|
c
|
||
|
c %---------------------------------------------------%
|
||
|
c | Enter the Iterative refinement phase. If further |
|
||
|
c | refinement is necessary, loop back here. The loop |
|
||
|
c | variable is ITER. Perform a step of Classical |
|
||
|
c | Gram-Schmidt using all the Arnoldi vectors V_{j} |
|
||
|
c %---------------------------------------------------%
|
||
|
c
|
||
|
80 continue
|
||
|
c
|
||
|
if (msglvl .gt. 2) then
|
||
|
rtemp(1) = wnorm
|
||
|
rtemp(2) = rnorm
|
||
|
call svout (logfil, 2, rtemp, ndigit,
|
||
|
& '_naitr: re-orthogonalization; wnorm and rnorm are')
|
||
|
call cvout (logfil, j, h(1,j), ndigit,
|
||
|
& '_naitr: j-th column of H')
|
||
|
end if
|
||
|
c
|
||
|
c %----------------------------------------------------%
|
||
|
c | Compute V_{j}^T * B * r_{j}. |
|
||
|
c | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). |
|
||
|
c %----------------------------------------------------%
|
||
|
c
|
||
|
call cgemv ('C', n, j, one, v, ldv, workd(ipj), 1,
|
||
|
& zero, workd(irj), 1)
|
||
|
c
|
||
|
c %---------------------------------------------%
|
||
|
c | Compute the correction to the residual: |
|
||
|
c | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). |
|
||
|
c | The correction to H is v(:,1:J)*H(1:J,1:J) |
|
||
|
c | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j. |
|
||
|
c %---------------------------------------------%
|
||
|
c
|
||
|
call cgemv ('N', n, j, -one, v, ldv, workd(irj), 1,
|
||
|
& one, resid, 1)
|
||
|
call caxpy (j, one, workd(irj), 1, h(1,j), 1)
|
||
|
c
|
||
|
orth2 = .true.
|
||
|
call second (t2)
|
||
|
if (bmat .eq. 'G') then
|
||
|
nbx = nbx + 1
|
||
|
call ccopy (n, resid, 1, workd(irj), 1)
|
||
|
ipntr(1) = irj
|
||
|
ipntr(2) = ipj
|
||
|
ido = 2
|
||
|
c
|
||
|
c %-----------------------------------%
|
||
|
c | Exit in order to compute B*r_{j}. |
|
||
|
c | r_{j} is the corrected residual. |
|
||
|
c %-----------------------------------%
|
||
|
c
|
||
|
go to 9000
|
||
|
else if (bmat .eq. 'I') then
|
||
|
call ccopy (n, resid, 1, workd(ipj), 1)
|
||
|
end if
|
||
|
90 continue
|
||
|
c
|
||
|
c %---------------------------------------------------%
|
||
|
c | Back from reverse communication if ORTH2 = .true. |
|
||
|
c %---------------------------------------------------%
|
||
|
c
|
||
|
if (bmat .eq. 'G') then
|
||
|
call second (t3)
|
||
|
tmvbx = tmvbx + (t3 - t2)
|
||
|
end if
|
||
|
c
|
||
|
c %-----------------------------------------------------%
|
||
|
c | Compute the B-norm of the corrected residual r_{j}. |
|
||
|
c %-----------------------------------------------------%
|
||
|
c
|
||
|
if (bmat .eq. 'G') then
|
||
|
cnorm = cdotc (n, resid, 1, workd(ipj), 1)
|
||
|
rnorm1 = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
|
||
|
else if (bmat .eq. 'I') then
|
||
|
rnorm1 = scnrm2(n, resid, 1)
|
||
|
end if
|
||
|
c
|
||
|
if (msglvl .gt. 0 .and. iter .gt. 0 ) then
|
||
|
call ivout (logfil, 1, j, ndigit,
|
||
|
& '_naitr: Iterative refinement for Arnoldi residual')
|
||
|
if (msglvl .gt. 2) then
|
||
|
rtemp(1) = rnorm
|
||
|
rtemp(2) = rnorm1
|
||
|
call svout (logfil, 2, rtemp, ndigit,
|
||
|
& '_naitr: iterative refinement ; rnorm and rnorm1 are')
|
||
|
end if
|
||
|
end if
|
||
|
c
|
||
|
c %-----------------------------------------%
|
||
|
c | Determine if we need to perform another |
|
||
|
c | step of re-orthogonalization. |
|
||
|
c %-----------------------------------------%
|
||
|
c
|
||
|
if ( rnorm1 .gt. 0.717*rnorm ) then
|
||
|
c
|
||
|
c %---------------------------------------%
|
||
|
c | No need for further refinement. |
|
||
|
c | The cosine of the angle between the |
|
||
|
c | corrected residual vector and the old |
|
||
|
c | residual vector is greater than 0.717 |
|
||
|
c | In other words the corrected residual |
|
||
|
c | and the old residual vector share an |
|
||
|
c | angle of less than arcCOS(0.717) |
|
||
|
c %---------------------------------------%
|
||
|
c
|
||
|
rnorm = rnorm1
|
||
|
c
|
||
|
else
|
||
|
c
|
||
|
c %-------------------------------------------%
|
||
|
c | Another step of iterative refinement step |
|
||
|
c | is required. NITREF is used by stat.h |
|
||
|
c %-------------------------------------------%
|
||
|
c
|
||
|
nitref = nitref + 1
|
||
|
rnorm = rnorm1
|
||
|
iter = iter + 1
|
||
|
if (iter .le. 1) go to 80
|
||
|
c
|
||
|
c %-------------------------------------------------%
|
||
|
c | Otherwise RESID is numerically in the span of V |
|
||
|
c %-------------------------------------------------%
|
||
|
c
|
||
|
do 95 jj = 1, n
|
||
|
resid(jj) = zero
|
||
|
95 continue
|
||
|
rnorm = rzero
|
||
|
end if
|
||
|
c
|
||
|
c %----------------------------------------------%
|
||
|
c | Branch here directly if iterative refinement |
|
||
|
c | wasn't necessary or after at most NITER_REF |
|
||
|
c | steps of iterative refinement. |
|
||
|
c %----------------------------------------------%
|
||
|
c
|
||
|
100 continue
|
||
|
c
|
||
|
rstart = .false.
|
||
|
orth2 = .false.
|
||
|
c
|
||
|
call second (t5)
|
||
|
titref = titref + (t5 - t4)
|
||
|
c
|
||
|
c %------------------------------------%
|
||
|
c | STEP 6: Update j = j+1; Continue |
|
||
|
c %------------------------------------%
|
||
|
c
|
||
|
j = j + 1
|
||
|
if (j .gt. k+np) then
|
||
|
call second (t1)
|
||
|
tcaitr = tcaitr + (t1 - t0)
|
||
|
ido = 99
|
||
|
do 110 i = max(1,k), k+np-1
|
||
|
c
|
||
|
c %--------------------------------------------%
|
||
|
c | Check for splitting and deflation. |
|
||
|
c | Use a standard test as in the QR algorithm |
|
||
|
c | REFERENCE: LAPACK subroutine clahqr |
|
||
|
c %--------------------------------------------%
|
||
|
c
|
||
|
tst1 = slapy2(real(h(i,i)),aimag(h(i,i)))
|
||
|
& + slapy2(real(h(i+1,i+1)), aimag(h(i+1,i+1)))
|
||
|
if( tst1.eq.real(zero) )
|
||
|
& tst1 = clanhs( '1', k+np, h, ldh, workd(n+1) )
|
||
|
if( slapy2(real(h(i+1,i)),aimag(h(i+1,i))) .le.
|
||
|
& max( ulp*tst1, smlnum ) )
|
||
|
& h(i+1,i) = zero
|
||
|
110 continue
|
||
|
c
|
||
|
if (msglvl .gt. 2) then
|
||
|
call cmout (logfil, k+np, k+np, h, ldh, ndigit,
|
||
|
& '_naitr: Final upper Hessenberg matrix H of order K+NP')
|
||
|
end if
|
||
|
c
|
||
|
go to 9000
|
||
|
end if
|
||
|
c
|
||
|
c %--------------------------------------------------------%
|
||
|
c | Loop back to extend the factorization by another step. |
|
||
|
c %--------------------------------------------------------%
|
||
|
c
|
||
|
go to 1000
|
||
|
c
|
||
|
c %---------------------------------------------------------------%
|
||
|
c | |
|
||
|
c | E N D O F M A I N I T E R A T I O N L O O P |
|
||
|
c | |
|
||
|
c %---------------------------------------------------------------%
|
||
|
c
|
||
|
9000 continue
|
||
|
return
|
||
|
c
|
||
|
c %---------------%
|
||
|
c | End of cnaitr |
|
||
|
c %---------------%
|
||
|
c
|
||
|
end
|