// Copyright ©2016 The Gonum Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package gonum import ( "math" "gonum.org/v1/gonum/blas" "gonum.org/v1/gonum/blas/blas64" "gonum.org/v1/gonum/lapack" ) // Dlaqr23 performs the orthogonal similarity transformation of an n×n upper // Hessenberg matrix to detect and deflate fully converged eigenvalues from a // trailing principal submatrix using aggressive early deflation [1]. // // On return, H will be overwritten by a new Hessenberg matrix that is a // perturbation of an orthogonal similarity transformation of H. It is hoped // that on output H will have many zero subdiagonal entries. // // If wantt is true, the matrix H will be fully updated so that the // quasi-triangular Schur factor can be computed. If wantt is false, then only // enough of H will be updated to preserve the eigenvalues. // // If wantz is true, the orthogonal similarity transformation will be // accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced. // // ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal // of H. It must hold that // 0 <= ilo <= ihi < n, if n > 0, // ilo == 0 and ihi == -1, if n == 0, // and the block must be isolated, that is, it must hold that // ktop == 0 or H[ktop,ktop-1] == 0, // kbot == n-1 or H[kbot+1,kbot] == 0, // otherwise Dlaqr23 will panic. // // nw is the deflation window size. It must hold that // 0 <= nw <= kbot-ktop+1, // otherwise Dlaqr23 will panic. // // iloz and ihiz specify the rows of the n×n matrix Z to which transformations // will be applied if wantz is true. It must hold that // 0 <= iloz <= ktop, and kbot <= ihiz < n, // otherwise Dlaqr23 will panic. // // sr and si must have length kbot+1, otherwise Dlaqr23 will panic. // // v and ldv represent an nw×nw work matrix. // t and ldt represent an nw×nh work matrix, and nh must be at least nw. // wv and ldwv represent an nv×nw work matrix. // // work must have length at least lwork and lwork must be at least max(1,2*nw), // otherwise Dlaqr23 will panic. Larger values of lwork may result in greater // efficiency. On return, work[0] will contain the optimal value of lwork. // // If lwork is -1, instead of performing Dlaqr23, the function only estimates the // optimal workspace size and stores it into work[0]. Neither h nor z are // accessed. // // recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves // as DLAQR3, for recur == 0 it behaves as DLAQR2. // // On return, ns and nd will contain respectively the number of unconverged // (i.e., approximate) eigenvalues and converged eigenvalues that are stored in // sr and si. // // On return, the real and imaginary parts of approximate eigenvalues that may // be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1] // and si[kbot-nd-ns+1:kbot-nd+1]. // // On return, the real and imaginary parts of converged eigenvalues will be // stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1]. // // References: // [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II: // Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973 // URL: http://dx.doi.org/10.1137/S0895479801384585 // func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) { switch { case n < 0: panic(nLT0) case ktop < 0 || max(0, n-1) < ktop: panic(badKtop) case kbot < min(ktop, n-1) || n <= kbot: panic(badKbot) case nw < 0 || kbot-ktop+1+1 < nw: panic(badNw) case ldh < max(1, n): panic(badLdH) case wantz && (iloz < 0 || ktop < iloz): panic(badIloz) case wantz && (ihiz < kbot || n <= ihiz): panic(badIhiz) case ldz < 1, wantz && ldz < n: panic(badLdZ) case ldv < max(1, nw): panic(badLdV) case nh < nw: panic(badNh) case ldt < max(1, nh): panic(badLdT) case nv < 0: panic(nvLT0) case ldwv < max(1, nw): panic(badLdWV) case lwork < max(1, 2*nw) && lwork != -1: panic(badLWork) case len(work) < max(1, lwork): panic(shortWork) case recur < 0: panic(recurLT0) } // Quick return for zero window size. if nw == 0 { work[0] = 1 return 0, 0 } // LAPACK code does not enforce the documented behavior // nw <= kbot-ktop+1 // but we do (we panic above). jw := nw lwkopt := max(1, 2*nw) if jw > 2 { // Workspace query call to Dgehrd. impl.Dgehrd(jw, 0, jw-2, t, ldt, work, work, -1) lwk1 := int(work[0]) // Workspace query call to Dormhr. impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1) lwk2 := int(work[0]) if recur > 0 { // Workspace query call to Dlaqr04. impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr, si, 0, jw-1, v, ldv, work, -1, recur-1) lwk3 := int(work[0]) // Optimal workspace. lwkopt = max(jw+max(lwk1, lwk2), lwk3) } else { // Optimal workspace. lwkopt = jw + max(lwk1, lwk2) } } // Quick return in case of workspace query. if lwork == -1 { work[0] = float64(lwkopt) return 0, 0 } // Check input slices only if not doing workspace query. switch { case len(h) < (n-1)*ldh+n: panic(shortH) case len(v) < (nw-1)*ldv+nw: panic(shortV) case len(t) < (nw-1)*ldt+nh: panic(shortT) case len(wv) < (nv-1)*ldwv+nw: panic(shortWV) case wantz && len(z) < (n-1)*ldz+n: panic(shortZ) case len(sr) != kbot+1: panic(badLenSr) case len(si) != kbot+1: panic(badLenSi) case ktop > 0 && h[ktop*ldh+ktop-1] != 0: panic(notIsolated) case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0: panic(notIsolated) } // Machine constants. ulp := dlamchP smlnum := float64(n) / ulp * dlamchS // Setup deflation window. var s float64 kwtop := kbot - jw + 1 if kwtop != ktop { s = h[kwtop*ldh+kwtop-1] } if kwtop == kbot { // 1×1 deflation window. sr[kwtop] = h[kwtop*ldh+kwtop] si[kwtop] = 0 ns = 1 nd = 0 if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) { ns = 0 nd = 1 if kwtop > ktop { h[kwtop*ldh+kwtop-1] = 0 } } work[0] = 1 return ns, nd } // Convert to spike-triangular form. In case of a rare QR failure, this // routine continues to do aggressive early deflation using that part of // the deflation window that converged using infqr here and there to // keep track. impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt) bi := blas64.Implementation() bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1) impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv) nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork) var infqr int if recur > 0 && jw > nmin { infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1) } else { infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv) } // Note that ilo == 0 which conveniently coincides with the success // value of infqr, that is, infqr as an index always points to the first // converged eigenvalue. // Dtrexc needs a clean margin near the diagonal. for j := 0; j < jw-3; j++ { t[(j+2)*ldt+j] = 0 t[(j+3)*ldt+j] = 0 } if jw >= 3 { t[(jw-1)*ldt+jw-3] = 0 } ns = jw ilst := infqr // Deflation detection loop. for ilst < ns { bulge := false if ns >= 2 { bulge = t[(ns-1)*ldt+ns-2] != 0 } if !bulge { // Real eigenvalue. abst := math.Abs(t[(ns-1)*ldt+ns-1]) if abst == 0 { abst = math.Abs(s) } if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) { // Deflatable. ns-- } else { // Undeflatable, move it up out of the way. // Dtrexc can not fail in this case. _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work) ilst++ } continue } // Complex conjugate pair. abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1])) if abst == 0 { abst = math.Abs(s) } if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) { // Deflatable. ns -= 2 } else { // Undeflatable, move them up out of the way. // Dtrexc does the right thing with ilst in case of a // rare exchange failure. _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work) ilst += 2 } } // Return to Hessenberg form. if ns == 0 { s = 0 } if ns < jw { // Sorting diagonal blocks of T improves accuracy for graded // matrices. Bubble sort deals well with exchange failures. sorted := false i := ns for !sorted { sorted = true kend := i - 1 i = infqr var k int if i == ns-1 || t[(i+1)*ldt+i] == 0 { k = i + 1 } else { k = i + 2 } for k <= kend { var evi float64 if k == i+1 { evi = math.Abs(t[i*ldt+i]) } else { evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1])) } var evk float64 if k == kend || t[(k+1)*ldt+k] == 0 { evk = math.Abs(t[k*ldt+k]) } else { evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1])) } if evi >= evk { i = k } else { sorted = false _, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work) if ok { i = ilst } else { i = k } } if i == kend || t[(i+1)*ldt+i] == 0 { k = i + 1 } else { k = i + 2 } } } } // Restore shift/eigenvalue array from T. for i := jw - 1; i >= infqr; { if i == infqr || t[i*ldt+i-1] == 0 { sr[kwtop+i] = t[i*ldt+i] si[kwtop+i] = 0 i-- continue } aa := t[(i-1)*ldt+i-1] bb := t[(i-1)*ldt+i] cc := t[i*ldt+i-1] dd := t[i*ldt+i] _, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd) i -= 2 } if ns < jw || s == 0 { if ns > 1 && s != 0 { // Reflect spike back into lower triangle. bi.Dcopy(ns, v[:ns], 1, work[:ns], 1) _, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1) work[0] = 1 impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt) impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:]) impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:]) impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:]) impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw) } // Copy updated reduced window into place. if kwtop > 0 { h[kwtop*ldh+kwtop-1] = s * v[0] } impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh) bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1) // Accumulate orthogonal matrix in order to update H and Z, if // requested. if ns > 1 && s != 0 { // work[:ns-1] contains the elementary reflectors stored // by a call to Dgehrd above. impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1, t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw) } // Update vertical slab in H. var ltop int if !wantt { ltop = ktop } for krow := ltop; krow < kwtop; krow += nv { kln := min(nv, kwtop-krow) bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw, 1, h[krow*ldh+kwtop:], ldh, v, ldv, 0, wv, ldwv) impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh) } // Update horizontal slab in H. if wantt { for kcol := kbot + 1; kcol < n; kcol += nh { kln := min(nh, n-kcol) bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw, 1, v, ldv, h[kwtop*ldh+kcol:], ldh, 0, t, ldt) impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh) } } // Update vertical slab in Z. if wantz { for krow := iloz; krow <= ihiz; krow += nv { kln := min(nv, ihiz-krow+1) bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw, 1, z[krow*ldz+kwtop:], ldz, v, ldv, 0, wv, ldwv) impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz) } } } // The number of deflations. nd = jw - ns // Shifts are converged eigenvalues that could not be deflated. // Subtracting infqr from the spike length takes care of the case of a // rare QR failure while calculating eigenvalues of the deflation // window. ns -= infqr work[0] = float64(lwkopt) return ns, nd }