cauchy Subroutine

private subroutine cauchy(n, x, l, u, Nbd, g, Iorder, Iwhere, t, d, Xcp, m, Wy, Ws, Sy, Wt, Theta, Col, Head, p, c, Wbp, v, Nseg, Iprint, Sbgnrm, Info, Epsmch)

For given x, l, u, g (with sbgnrm > 0), and a limited memory BFGS matrix B defined in terms of matrices WY, WS, WT, and scalars head, col, and theta, this subroutine computes the generalized Cauchy point (GCP), defined as the first local minimizer of the quadratic

Q(x + s) = g's + 1/2 s'Bs

along the projected gradient direction P(x-tg,l,u). The routine returns the GCP in xcp.

References

  1. R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, "A limited memory algorithm for bound constrained optimization", SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
  2. C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, "L-BFGS-B: FORTRAN Subroutines for Large Scale Bound Constrained Optimization" Tech. Report, NAM-11, EECS Department, Northwestern University, 1994.

Credits

  • NEOS, November 1994. (Latest revision June 1996.) Optimization Technology Center. Argonne National Laboratory and Northwestern University. Written by Ciyou Zhu in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.

Arguments

Type IntentOptional Attributes Name
integer, intent(in) :: n

the dimension of the problem.

real(kind=wp), intent(in) :: x(n)

the starting point for the GCP computation.

real(kind=wp), intent(in) :: l(n)

the lower bound of x.

real(kind=wp), intent(in) :: u(n)

the upper bound of x.

integer, intent(in) :: Nbd(n)

On entry nbd represents the type of bounds imposed on the variables, and must be specified as follows:

  • nbd(i)=0 if x(i) is unbounded,
  • nbd(i)=1 if x(i) has only a lower bound,
  • nbd(i)=2 if x(i) has both lower and upper bounds, and
  • nbd(i)=3 if x(i) has only an upper bound.
real(kind=wp), intent(in) :: g(n)

the gradient of f(x). g must be a nonzero vector.

integer :: Iorder(n)

working array used to store the breakpoints in the piecewise linear path and free variables encountered. On exit:

  • iorder(1),...,iorder(nleft) are indices of breakpoints which have not been encountered;
  • iorder(nleft+1),...,iorder(nbreak) are indices of encountered breakpoints; and
  • iorder(nfree),...,iorder(n) are indices of variables which have no bound constraits along the search direction.
integer, intent(inout) :: Iwhere(n)

On entry iwhere indicates only the permanently fixed (iwhere=3) or free (iwhere= -1) components of x.

On exit iwhere records the status of the current x variables:

  • iwhere(i) = -3 if x(i) is free and has bounds, but is not moved
  • iwhere(i) = 0 if x(i) is free and has bounds, and is moved
  • iwhere(i) = 1 if x(i) is fixed at l(i), and l(i) /= u(i)
  • iwhere(i) = 2 if x(i) is fixed at u(i), and u(i) /= l(i)
  • iwhere(i) = 3 if x(i) is always fixed, i.e., u(i)=x(i)=l(i)
  • iwhere(i) = -1 if x(i) is always free, i.e., it has no bounds.
real(kind=wp) :: t(n)

used to store the break points.

real(kind=wp) :: d(n)

used to store the Cauchy direction P(x-tg)-x.

real(kind=wp), intent(out) :: Xcp(n)

used to return the GCP on exit.

integer, intent(in) :: m

the maximum number of variable metric corrections used to define the limited memory matrix.

real(kind=wp), intent(in) :: Wy(n,Col)

stores information that defines the limited memory BFGS matrix: wy(n,m) stores Y, a set of y-vectors;

real(kind=wp), intent(in) :: Ws(n,Col)

stores information that defines the limited memory BFGS matrix: ws(n,m) stores S, a set of s-vectors;

real(kind=wp), intent(in) :: Sy(m,m)

stores information that defines the limited memory BFGS matrix: sy(m,m) stores S'Y;

real(kind=wp), intent(in) :: Wt(m,m)

stores information that defines the limited memory BFGS matrix: wt(m,m) stores the Cholesky factorization of (theta*S'S+LD^(-1)L').

real(kind=wp), intent(in) :: Theta

the scaling factor specifying B_0 = theta I.

integer, intent(in) :: Col

the actual number of variable metric corrections stored so far.

integer, intent(in) :: Head

the location of the first s-vector (or y-vector) in S (or Y).

real(kind=wp) :: p(2*m)

working array used to store the vector p = W^(T)d.

real(kind=wp) :: c(2*m)

working array used to store the vector c = W^(T)(xcp-x).

real(kind=wp) :: Wbp(2*m)

working array used to store the row of W corresponding to a breakpoint.

real(kind=wp) :: v(2*m)

working array

integer, intent(out) :: Nseg

records the number of quadratic segments explored in searching for the GCP.

integer, intent(in) :: Iprint

controls the frequency and type of output generated:

  • iprint<0 no output is generated;
  • iprint=0 print only one line at the last iteration;
  • 0<iprint<99 print also f and |proj g| every iprint iterations;
  • iprint=99 print details of every iteration except n-vectors;
  • iprint=100 print also the changes of active set and final x;
  • iprint>100 print details of every iteration including x and g;

When iprint > 0, the file iterate.dat will be created to summarize the iteration.

real(kind=wp), intent(in) :: Sbgnrm

the norm of the projected gradient at x.

integer, intent(inout) :: Info

On entry info is 0. On exit:

  • info = 0 for normal return,
  • info /= 0 for abnormal return when the system used in routine bmv is singular.
real(kind=wp), intent(in) :: Epsmch

machine precision


Calls

proc~~cauchy~~CallsGraph proc~cauchy lbfgsb_module::cauchy proc~bmv lbfgsb_module::bmv proc~cauchy->proc~bmv proc~daxpy lbfgsb_blas_module::daxpy proc~cauchy->proc~daxpy proc~dcopy lbfgsb_blas_module::dcopy proc~cauchy->proc~dcopy proc~ddot lbfgsb_blas_module::ddot proc~cauchy->proc~ddot proc~dscal lbfgsb_blas_module::dscal proc~cauchy->proc~dscal proc~hpsolb lbfgsb_module::hpsolb proc~cauchy->proc~hpsolb proc~dtrsl lbfgsb_linpack_module::dtrsl proc~bmv->proc~dtrsl proc~dtrsl->proc~daxpy proc~dtrsl->proc~ddot

Called by

proc~~cauchy~~CalledByGraph proc~cauchy lbfgsb_module::cauchy proc~mainlb lbfgsb_module::mainlb proc~mainlb->proc~cauchy proc~setulb lbfgsb_module::setulb proc~setulb->proc~mainlb

Source Code

      subroutine cauchy(n,x,l,u,Nbd,g,Iorder,Iwhere,t,d,Xcp,m,Wy,Ws,Sy, &
                        Wt,Theta,Col,Head,p,c,Wbp,v,Nseg,Iprint,Sbgnrm, &
                        Info,Epsmch)
      implicit none


      integer,intent(in) :: n !! the dimension of the problem.
      integer,intent(in) :: m !! the maximum number of variable metric corrections
                              !! used to define the limited memory matrix.
      integer,intent(in) :: Head !! the location of the first s-vector (or y-vector) in S (or Y).
      integer,intent(in) :: Col !! the actual number of variable metric corrections stored so far.
      integer,intent(out) :: Nseg !! records the number of quadratic segments explored
                                  !! in searching for the GCP.
      integer,intent(in) :: Iprint !! controls the frequency and type of output generated:
                                   !!
                                   !!  * `iprint<0   ` no output is generated;
                                   !!  * `iprint=0   ` print only one line at the last iteration;
                                   !!  * `0<iprint<99` print also f and |proj g| every iprint iterations;
                                   !!  * `iprint=99  ` print details of every iteration except n-vectors;
                                   !!  * `iprint=100 ` print also the changes of active set and final x;
                                   !!  * `iprint>100 ` print details of every iteration including x and g;
                                   !!
                                   !! When `iprint > 0`, the file `iterate.dat` will be created to
                                   !! summarize the iteration.
      integer,intent(inout) :: Info !! On entry info is 0.
                                    !! On exit:
                                    !!
                                    !!  * `info = 0`  for normal return,
                                    !!  * `info /= 0` for abnormal return when the system
                                    !!     used in routine [[bmv]] is singular.
      integer,intent(in) :: Nbd(n) !! On entry nbd represents the type of bounds imposed on the
                                   !! variables, and must be specified as follows:
                                   !!
                                   !!  * `nbd(i)=0` if `x(i)` is unbounded,
                                   !!  * `nbd(i)=1` if `x(i)` has only a lower bound,
                                   !!  * `nbd(i)=2` if `x(i)` has both lower and upper bounds, and
                                   !!  * `nbd(i)=3` if `x(i)` has only an upper bound.
      integer :: Iorder(n) !! working array used to store the breakpoints in the piecewise
                           !! linear path and free variables encountered. On exit:
                           !!
                           !!  * `iorder(1),...,iorder(nleft)` are indices of breakpoints
                           !!    which have not been encountered;
                           !!  * `iorder(nleft+1),...,iorder(nbreak)` are indices of
                           !!    encountered breakpoints; and
                           !!  * `iorder(nfree),...,iorder(n)` are indices of variables which
                           !!    have no bound constraits along the search direction.
      integer,intent(inout) :: Iwhere(n) !! On entry `iwhere` indicates only the permanently fixed (`iwhere=3`)
                                         !! or free (`iwhere= -1`) components of `x`.
                                         !!
                                         !! On exit `iwhere` records the status of the current `x` variables:
                                         !!
                                         !!  * `iwhere(i) = -3`  if `x(i)` is free and has bounds, but is not moved
                                         !!  * `iwhere(i) =  0 ` if `x(i)` is free and has bounds, and is moved
                                         !!  * `iwhere(i) =  1 ` if `x(i)` is fixed at l(i), and l(i) /= u(i)
                                         !!  * `iwhere(i) =  2 ` if `x(i)` is fixed at u(i), and u(i) /= l(i)
                                         !!  * `iwhere(i) =  3 ` if `x(i)` is always fixed, i.e.,  u(i)=x(i)=l(i)
                                         !!  * `iwhere(i) = -1`  if `x(i)` is always free, i.e., it has no bounds.
      real(wp),intent(in) :: Theta !! the scaling factor specifying `B_0 = theta I`.
      real(wp),intent(in) :: Epsmch !! machine precision
      real(wp),intent(in) :: x(n) !! the starting point for the GCP computation.
      real(wp),intent(in) :: l(n) !! the lower bound of `x`.
      real(wp),intent(in) :: u(n) !! the upper bound of `x`.
      real(wp),intent(in) :: g(n) !! the gradient of `f(x)`. `g` must be a nonzero vector.
      real(wp) :: t(n) !! used to store the break points.
      real(wp) :: d(n) !! used to store the Cauchy direction `P(x-tg)-x`.
      real(wp),intent(out) :: Xcp(n) !! used to return the GCP on exit.
      real(wp),intent(in) :: Wy(n,Col) !! stores information that defines the limited memory BFGS matrix: `wy(n,m)` stores `Y`, a set of y-vectors;
      real(wp),intent(in) :: Ws(n,Col) !! stores information that defines the limited memory BFGS matrix: `ws(n,m)` stores `S`, a set of s-vectors;
      real(wp),intent(in) :: Sy(m,m) !! stores information that defines the limited memory BFGS matrix: `sy(m,m)` stores `S'Y`;
      real(wp),intent(in) :: Wt(m,m) !! stores information that defines the limited memory BFGS matrix: `wt(m,m)` stores the Cholesky factorization of `(theta*S'S+LD^(-1)L')`.
      real(wp) :: p(2*m) !! working array used to store the vector `p = W^(T)d`.
      real(wp) :: c(2*m) !! working array used to store the vector `c = W^(T)(xcp-x)`.
      real(wp) :: Wbp(2*m) !! working array used to store the
                           !! row of `W` corresponding to a breakpoint.
      real(wp) :: v(2*m) !! working array
      real(wp),intent(in) :: Sbgnrm !! the norm of the projected gradient at `x`.

      logical :: xlower , xupper , bnded
      integer :: i , j , col2 , nfree , nbreak , pointr , ibp , nleft , &
                 ibkmin , iter
      real(wp) :: f1 , f2 , dt , dtm , tsum , dibp , zibp , dibp2 ,&
                  bkmin , tu , tl , wmc , wmp , wmw , tj ,  &
                  tj0 , neggi , f2_org

      ! Check the status of the variables, reset iwhere(i) if necessary;
      ! compute the Cauchy direction d and the breakpoints t; initialize
      ! the derivative f1 and the vector p = W'd (for theta = 1).

      if ( Sbgnrm<=zero ) then
         if ( Iprint>=0 ) write (output_unit,*) 'Subgnorm = 0.  GCP = X.'
         call dcopy(n,x,1,Xcp,1)
         return
      endif
      bnded = .true.
      nfree = n + 1
      nbreak = 0
      ibkmin = 0
      bkmin = zero
      col2 = 2*Col
      f1 = zero
      if ( Iprint>=99 ) write (output_unit,'(/,a)') &
            '---------------- CAUCHY entered-------------------'

      ! We set p to zero and build it up as we determine d.

      do i = 1 , col2
         p(i) = zero
      enddo

      ! In the following loop we determine for each variable its bound
      ! status and its breakpoint, and update p accordingly.
      ! Smallest breakpoint is identified.

      do i = 1 , n
         neggi = -g(i)
         if ( Iwhere(i)/=3 .and. Iwhere(i)/=-1 ) then
            ! if x(i) is not a constant and has bounds,
            ! compute the difference between x(i) and its bounds.
            if ( Nbd(i)<=2 ) tl = x(i) - l(i)
            if ( Nbd(i)>=2 ) tu = u(i) - x(i)

            ! If a variable is close enough to a bound
            ! we treat it as at bound.
            xlower = Nbd(i)<=2 .and. tl<=zero
            xupper = Nbd(i)>=2 .and. tu<=zero

            ! reset iwhere(i).
            Iwhere(i) = 0
            if ( xlower ) then
               if ( neggi<=zero ) Iwhere(i) = 1
            else if ( xupper ) then
               if ( neggi>=zero ) Iwhere(i) = 2
            else
               if ( abs(neggi)<=zero ) Iwhere(i) = -3
            endif
         endif
         pointr = Head
         if ( Iwhere(i)/=0 .and. Iwhere(i)/=-1 ) then
            d(i) = zero
         else
            d(i) = neggi
            f1 = f1 - neggi*neggi
            ! calculate p := p - W'e_i* (g_i).
            do j = 1 , Col
               p(j) = p(j) + Wy(i,pointr)*neggi
               p(Col+j) = p(Col+j) + Ws(i,pointr)*neggi
               pointr = mod(pointr,m) + 1
            enddo
            if ( Nbd(i)<=2 .and. Nbd(i)/=0 .and. neggi<zero ) then
               ! x(i) + d(i) is bounded; compute t(i).
               nbreak = nbreak + 1
               Iorder(nbreak) = i
               t(nbreak) = tl/(-neggi)
               if ( nbreak==1 .or. t(nbreak)<bkmin ) then
                  bkmin = t(nbreak)
                  ibkmin = nbreak
               endif
            else if ( Nbd(i)>=2 .and. neggi>zero ) then
               ! x(i) + d(i) is bounded; compute t(i).
               nbreak = nbreak + 1
               Iorder(nbreak) = i
               t(nbreak) = tu/neggi
               if ( nbreak==1 .or. t(nbreak)<bkmin ) then
                  bkmin = t(nbreak)
                  ibkmin = nbreak
               endif
            else
               ! x(i) + d(i) is not bounded.
               nfree = nfree - 1
               Iorder(nfree) = i
               if ( abs(neggi)>zero ) bnded = .false.
            endif
         endif
      enddo

      ! The indices of the nonzero components of d are now stored
      ! in iorder(1),...,iorder(nbreak) and iorder(nfree),...,iorder(n).
      ! The smallest of the nbreak breakpoints is in t(ibkmin)=bkmin.

      ! complete the initialization of p for theta not= one.
      if ( Theta/=one ) call dscal(Col,Theta,p(Col+1),1)

      ! Initialize GCP xcp = x.

      call dcopy(n,x,1,Xcp,1)

      if ( nbreak==0 .and. nfree==n+1 ) then
         ! is a zero vector, return with the initial xcp as GCP.
         if ( Iprint>100 ) write (output_unit,'(A,/,(4x,1p,6(1x,d11.4)))') 'Cauchy X =  ', (Xcp(i),i=1,n)
         return
      endif

      ! Initialize c = W'(xcp - x) = 0.

      do j = 1 , col2
         c(j) = zero
      enddo

      ! Initialize derivative f2.

      f2 = -Theta*f1
      f2_org = f2
      if ( Col>0 ) then
         call bmv(m,Sy,Wt,Col,p,v,Info)
         if ( Info/=0 ) return
         f2 = f2 - ddot(col2,v,1,p,1)
      endif
      dtm = -f1/f2
      tsum = zero
      Nseg = 1
      if ( Iprint>=99 ) write (output_unit,*) 'There are ' , nbreak , '  breakpoints '

      ! If there are no breakpoints, locate the GCP and return.
      if ( nbreak/=0 ) then

         nleft = nbreak
         iter = 1

         tj = zero

         !------------------- the beginning of the loop -------------------------
         main : do

            ! Find the next smallest breakpoint;
            ! compute dt = t(nleft) - t(nleft + 1).

            tj0 = tj
            if ( iter==1 ) then
               ! Since we already have the smallest breakpoint we need not do
               ! heapsort yet. Often only one breakpoint is used and the
               ! cost of heapsort is avoided.
               tj = bkmin
               ibp = Iorder(ibkmin)
            else
               if ( iter==2 ) then
                  ! Replace the already used smallest breakpoint with the
                  ! breakpoint numbered nbreak > nlast, before heapsort call.
                  if ( ibkmin/=nbreak ) then
                     t(ibkmin) = t(nbreak)
                     Iorder(ibkmin) = Iorder(nbreak)
                  endif
                  ! Update heap structure of breakpoints
                  ! (if iter=2, initialize heap).
               endif
               call hpsolb(nleft,t,Iorder,iter-2)
               tj = t(nleft)
               ibp = Iorder(nleft)
            endif

            dt = tj - tj0

            if ( dt/=zero .and. Iprint>=100 ) then
               write (output_unit,'(/,a,i3,a,1p,2(1x,d11.4))') 'Piece    ', Nseg, ' --f1, f2 at start point ', f1 , f2
               write (output_unit,'(a,1p,d11.4)') 'Distance to the next break point =  ', dt
               write (output_unit,'(A,1p,d11.4)') 'Distance to the stationary point =  ',dtm
            endif

            ! If a minimizer is within this interval, locate the GCP and return.

            if ( dtm<dt ) exit main

            ! Otherwise fix one variable and
            ! reset the corresponding component of d to zero.

            tsum = tsum + dt
            nleft = nleft - 1
            iter = iter + 1
            dibp = d(ibp)
            d(ibp) = zero
            if ( dibp>zero ) then
               zibp = u(ibp) - x(ibp)
               Xcp(ibp) = u(ibp)
               Iwhere(ibp) = 2
            else
               zibp = l(ibp) - x(ibp)
               Xcp(ibp) = l(ibp)
               Iwhere(ibp) = 1
            endif
            if ( Iprint>=100 ) write (output_unit,*) 'Variable  ' , ibp , '  is fixed.'
            if ( nleft==0 .and. nbreak==n ) then
               ! all n variables are fixed,
               ! return with xcp as GCP.
               dtm = dt
               call update()
               return
            endif

            ! Update the derivative information.

            Nseg = Nseg + 1
            dibp2 = dibp**2

            ! Update f1 and f2.

            ! temporarily set f1 and f2 for col=0.
            f1 = f1 + dt*f2 + dibp2 - Theta*dibp*zibp
            f2 = f2 - Theta*dibp2

            if ( Col>0 ) then
               ! update c = c + dt*p.
               call daxpy(col2,dt,p,1,c,1)

               ! choose wbp,
               ! the row of W corresponding to the breakpoint encountered.
               pointr = Head
               do j = 1 , Col
                  Wbp(j) = Wy(ibp,pointr)
                  Wbp(Col+j) = Theta*Ws(ibp,pointr)
                  pointr = mod(pointr,m) + 1
               enddo

               ! compute (wbp)Mc, (wbp)Mp, and (wbp)M(wbp)'.
               call bmv(m,Sy,Wt,Col,Wbp,v,Info)
               if ( Info/=0 ) return
               wmc = ddot(col2,c,1,v,1)
               wmp = ddot(col2,p,1,v,1)
               wmw = ddot(col2,Wbp,1,v,1)

               ! update p = p - dibp*wbp.
               call daxpy(col2,-dibp,Wbp,1,p,1)

               ! complete updating f1 and f2 while col > 0.
               f1 = f1 + dibp*wmc
               f2 = f2 + two*dibp*wmp - dibp2*wmw
            endif

            f2 = max(Epsmch*f2_org,f2)
            if ( nleft>0 ) then
               dtm = -f1/f2
               ! to repeat the loop for unsearched intervals.
            else if ( bnded ) then
               f1 = zero
               f2 = zero
               dtm = zero
               exit main
            else
               dtm = -f1/f2
               exit main
            endif

         end do main
         !------------------- the end of the loop -------------------------------

      end if

      if ( Iprint>=99 ) then
         write (output_unit,*)
         write (output_unit,*) 'GCP found in this segment'
         write (output_unit,'(a,i3,a,1p,2(1x,d11.4))') &
                     'Piece    ', Nseg , ' --f1, f2 at start point ', f1 , f2
         write (output_unit,'(A,1p,d11.4)') 'Distance to the stationary point =  ',dtm
      endif
      if ( dtm<=zero ) dtm = zero
      tsum = tsum + dtm

      ! Move free variables (i.e., the ones w/o breakpoints) and
      ! the variables whose breakpoints haven't been reached.

      call daxpy(n,tsum,d,1,Xcp,1)

      call update()

      contains

      subroutine update()

         ! Update c = c + dtm*p = W'(x^c - x)
         ! which will be used in computing r = Z'(B(x^c - x) + g).

         if ( Col>0 ) call daxpy(col2,dtm,p,1,c,1)
         if ( Iprint>100 ) write (output_unit,'(A,/,(4x,1p,6(1x,d11.4)))') 'Cauchy X =  ', (Xcp(i),i=1,n)
         if ( Iprint>=99 ) write (output_unit,'(/,A,/)') '---------------- exit CAUCHY----------------------'

      end subroutine update

      end subroutine cauchy