c c Rasmus Munk Larsen, Stanford University, 1999, 2004. c subroutine sreorth(n,k,V,ldv,vnew,normvnew,index,alpha,work, c iflag) c c Orthogonalize the N-vector VNEW against a subset of the columns of c the N-by-K matrix V(1:N,1:K) using iterated classical or modified c Gram-Schmidt. LDV is the leading dimension of the array containing c V. c c Which columns to orthogonalize against is decided by the integer c array INDEX = [s_1,e_1, s_2,e_2,..., s_k,e_l, s_{l+1}], which c selects the columns V(:,[s_1:e_1 s_2:e_2 ... s_l:e_l]). s_{l+1} c must be larger than k and marks the end of INDEX. c c The reorthogonalization is repeated until c c ||VNEW'|| > ALPHA * ||VNEW|| , c c where VNEW' is the vector obtained by orthogonalizing VNEW. If c VNEW' fails to satisfy this after 4 tries, VNEW is deemed to lie c numerically in the span of V(:,[s_1:e_1 s_2:e_2 ... s_l:e_l]), and c is set to the zero vector. c c On return NORMVNEW contains ||VNEW||. c c WORK is a workspace array of length at least c c max e_i-s_i+1, i=1...l. c c WORK is only used if IFLAG==1. c c If IFLAG==0 then iterated modified Gram-Schmidt is used. c If IFLAG==1 then iterated classical Gram-Schmidt is used. c c References: c Aake Bjorck, "Numerical Methods for Least Squares Problems", c SIAM, Philadelphia, 1996, pp. 68-69. c c J.~W. Daniel, W.~B. Gragg, L. Kaufman and G.~W. Stewart, c ``Reorthogonalization and Stable Algorithms Updating the c Gram-Schmidt QR Factorization'', Math. Comp., 30 (1976), no. c 136, pp. 772-795. c c B. N. Parlett, ``The Symmetric Eigenvalue Problem'', c Prentice-Hall, Englewood Cliffs, NJ, 1980. pp. 105-109 c %-----------% c | Arguments | c %-----------% implicit none include 'stat.h' integer n,k,ldv,iflag,index(*) real V(ldv,*),vnew(*),work(*),normvnew c %------------% c | Parameters | c %------------% integer NTRY real one, zero parameter(one = 1.0, zero = 0.0, NTRY=5) c %-----------------% c | Local variables | c %-----------------% integer itry real alpha,normvnew_0 real t2,t3 c %----------------------% c | External Subroutines | c %----------------------% external smgs,scgs c %--------------------% c | External Functions | c %--------------------% real psnrm2 external psnrm2,pszero if (k.le.0 .or. n.le.0) return call second(t2) do itry=1,NTRY normvnew_0 = normvnew if ( iflag.eq.1 ) then call scgs(n,k,V,ldv,vnew,index,work) else call smgs(n,k,V,ldv,vnew,index) endif ndot = ndot + k normvnew = psnrm2(n,vnew,1) if (normvnew.gt.alpha*normvnew_0) goto 9999 enddo normvnew = zero c vnew is numerically in span(V) => return vnew = (0,0,...,0)^T call pszero(n,vnew,1) 9999 call second(t3) treorth = treorth + (t3-t2) nreorth = nreorth + 1 return end c c**************************************************************************** c subroutine scgs(n,k,V,ldv,vnew,index,work) c Block Gram-Schmidt orthogonalization: c FOR i= 1:l c vnew = vnew - V(:,[s_i:e_i])*(V(:,[s_i:e_i])'*vnew) c c If l=1 and s_1=1 and e_1=k then this becomes classical Gram-Schmidt. c %-----------% c | Arguments | c %-----------% implicit none include 'stat.h' integer n,k,ldv,index(*) real V(ldv,*),vnew(*),work(*) c %------------% c | Parameters | c %------------% real one, zero parameter(one = 1.0, zero = 0.0) c %-----------------% c | Local variables | c %-----------------% integer i,j,p,q,l, ld integer tid, nt, cnk,st real ylocal(n) c %--------------------% c | External Functions | c %--------------------% external sgemv #ifdef _OPENMP integer omp_get_thread_num, omp_get_num_threads external omp_get_thread_num, omp_get_num_threads #endif c The local variable ld was introduced to circumvent an apparent c bug in Intel's OpenMP implementation. ld = ldv #ifdef _OPENMP c$OMP PARALLEL private(i,p,q,l,tid,nt,cnk,st,j,ylocal) c$OMP& firstprivate(ld) shared(ndot) tid = omp_get_thread_num() nt = omp_get_num_threads() #else tid = 0 nt = 1 #endif cnk = n/nt st = tid*cnk+1 i=1 do while(index(i).le.k .and. index(i).gt.0) c c Select the next block of columns from V c p = index(i) q = index(i+1) l = q-p+1 if (tid.eq.0) then ndot = ndot + l endif c Classical Gram-Schmidt: vnew = vnew - V(:,p:q)*(V(:,p:q)'*vnew) if (l.gt.0) then if (tid.eq.nt-1) then cnk = n-st+1 endif call sgemv('T',cnk,l,one,V(st,p),ld,vnew(st),1,zero, c ylocal,1) if (tid.eq.0) then do j=1,l work(j) = ylocal(j) enddo endif #ifdef _OPENMP c$OMP BARRIER #endif if (.not.tid.eq.0) then #ifdef _OPENMP c$OMP CRITICAL #endif do j=1,l work(j) = work(j) + ylocal(j) enddo #ifdef _OPENMP c$OMP END CRITICAL #endif endif #ifdef _OPENMP c$OMP BARRIER #endif call sgemv('N',cnk,l,-one,V(st,p),ld,work,1,zero, c ylocal,1) do j=1,cnk vnew(st+j-1) = vnew(st+j-1) + ylocal(j) enddo endif i = i+2 enddo #ifdef _OPENMP c$OMP END PARALLEL #endif end