program bisect
c **************************************************************************
c                                    GP.F
c This program finds upper and lower bounds on the optimal solution of a
c Graph Bisection problem (i.e. m1 = m2 = n/2).  Rendl-Wolkowicz upper
c bounds are found using a Bundle-Trust algorithm to perform the
c minimization, and a block Lanczos algorithm to calculate the eigenvalues.
c Lower bounds are found by doing a sort, and then applying the Kernighan-
c Lin heuristic to improve the resulting bisection.
c
c Other files required: subs.for
c
c Input file: The name of this file is provided by the user.  It contains
c a sparse representation of the adjacency matrix A.
c
c When running the program, the following screen input is required:
c     filename
c         - The file must contain a sparse representation of the adjacency
c           matrix.
c     maxit
c         - The maximum number of BT iterations allowed; suggestion: 30.
c     set up d automatically or read from file?
c         - Normally set up automatically; enter 0 for this.
c     scaling factor for d
c         - An initial choice for d is needed to start the BT iterations.
c           This scaling factor is used to adjust the initial choice provided
c           by the program.  Often a good choice is 0.8.
c     estimate of final upper bound
c         - If 0 is entered, the program averages the initial upper bound
c           and the initial feasible solution cost.
c
c     At the end of the run, enter 1 if you want to save the optimal d (you
c     will be asked for a filename) or some other integer to stop.
c
c Storage of the Adjacency Matrix
c     This matrix is sparse, symmetric (so only the upper triangle is
c     stored) and has zeros on the diagonal.  The input file contains:
c     n  nzl  nzs
c     icol(1:nzl)
c     irow(i) a(i)  i= 1,nzs
c
c     n = # of nodes
c     nzl = # of last row containing non-zero above main diagonal
c     nzs = total # of nonzeros above main diagonal
c     icol(i) = for each row i, # non-zeros after main diagonal
c     irow(i) = column number for the ith non-zero
c     a(i) = weight (adjacency matrix entry) for the ith non-zero
c
c Variables and Data Structures
c     n         = number of nodes
c     maxcom    = For BT: maximum number of calls to COMPFG
c     maxit     = maximum number of BT iterations allowed
c     iprint    = controls level of printout from BT
c     ifail     = gives info about the result of BT
c     iwork()   = work space for BT
c     icol(i)   = For each row i, # non-zeros after main diagonal
c     irow(i)   = Column for the ith non-zero
c     ibest()   = The first  subset for the best feasible partition found
c     jbest()   = The second subset for the best feasible partition found
c     ipkeep()  = Temporary storage of feasible partition
c     iqkeep()  = Temporary storage of feasible partition
c     ad()      = diagonal elements of adjacency matrix (now assumed to be 0)
c     a(i)      = ith non-zero weight (adjacency matrix entry)
c     suma      = sum of the entries in the adjacency matrix
c     d         = the diagonal perturbation of the A matrix
c     f         = the function value: f(d) = largest eigenvalue of
c                 Vn' * (A + diag(d)) * Vn
c     g()       = gradient of f
c     fm        = estimate of the final upper bound
c     eps       = stopping criterion for BT
c     work()    = work space for BT
c     worklz()  = work space for DNLASO (Lanczos algorithm)
c     xconst    = constant for special representation of Vn, ie -1/(n+sqrt(n))
c     yconst    = constant for special representation of Vn, ie -1/sqrt(n)
c     vec(i,j)  = the eigenvectors found by Lanczos
c     bstkl     = cost of best feasible solution after  Kernighan-Lin
c     bstev     = cost of best feasible solution before Kernighan-Lin
c     nfig      = number of decimal digits of accuracy required from Lanczos
c     nblock    = number of vectors in each Lanczos block
c     nval      = number of eigenvalues desired
c     nperm     = number of eigenvalues successfully found by Lanczos
c     matmul    = total number of matrix accesses in BT
c     infile    = name of input file
c     istop     = tells BT method to abort if we get stuck in Lanczos
c
c       last changed: July 1994               Franz Rendl
c **************************************************************************

        integer nmax, reset, lwork, ntot

c-------- needs to be adapted for larger/smaller problems  -(1)-
        parameter (nmax = 5000, ntot = 30000, reset = 10)
        parameter (nmval= 6, lwork =  100000)

        integer n, i, maxcom, maxit, iprint, ifail, iwork( reset)
        integer icol( nmax), irow( ntot), ibest( nmax/2)
        integer ipkeep( nmax/2), iqkeep( nmax/2), jbest( nmax/2)
        double precision ad( nmax), a( ntot), suma,
     *     d( nmax), f, g(nmax), fm, eps, work( lwork)
        double precision xconst, yconst, vec(nmax, nmval), bstkl, bstev
        real dummy( 5), xbst
c
c work space for dnlaso
        double precision worklz(800+21*nmax)
c
        character*20, infile

        external compfg

        common / graph1 / a, ad, suma
        common / graph2 / nzl, icol, irow
        common / const / xconst, yconst
c
        common / iconst / matmul, nfig, nblock, nval, nperm
        common / stop / istop, idummy
c
        common / lzwork / worklz
        common / wkspc / work
        common / eij /  vec
        common / globl / bstev, bstkl
        common / globl2 / ibest, jbest
        common / keep / ipkeep, iqkeep

        common / eigvls / dummy, xbst
c If run a series of tests, might choose to initialise random
c number generator each time (for repeatibility)
        common /random/iurand
c
c Initialise Lanczos work array
        do 5 i = 1, 800+21*nmax
            worklz( i) = 0.0d0
    5   continue

c bstev and bstkl contain current champions of best solutions
c with respect to eigenvectors and kernighan-lin
        bstev = -1.0d10
        bstkl = bstev

c *** read description of graph
        print *,'filename:'
        read '(a20)', infile
        open( 4, file = infile, status = 'old')
        read(4,*) n, nzl, nzs

c check maximum dimensions!
        if (n .gt. nmax .or. nzs .gt. ntot) then
            write(*,*)' Maximum dimensions exceeded!'
            write(*,*)' Max number of nodes = ',nmax,' Actual = ',n
            write(*,*)' Max number of edges = ',ntot,' Actual = ',nzs
            stop
        endif

        read(4,*) (icol( i), i=1,nzl)
c check that the sum of the non-zeros per column equals the
c total number of non-zeros, nzs
        isum = 0
        do 6 i = 1,nzl
          isum = isum + icol(i)
   6    continue
        if (isum .ne. nzs) then
            write(*,*)' Data Error!'
            write(*,*)' Number of non-zeros specified as ', nzs
            write(*,*)' Sum of non-zeros per column is ',isum
            stop
        endif

        read(4,*) (irow( i), a( i), i=1,nzs)
c read(4,*) (ad( i), i=1,n)
c main diag of graph is assumed to be zero, i.e. ad( i) = 0.0
        close(4)

        print *,'graph was read successully. It has'
        print 7,n,nzs
7       format(2x,i6,'  nodes and ',i7,' edges.')

c Some parameters are hard-coded
        eps = 0.1
        nfig = 5
        nblock = 3
        nval = 2
        maxit = 15
c        print *,'maxit ='
c        read *,maxit
c Set up constants for dnlaso

c Set up constants for bt and Vn
        matmul = 0
        iprint = 2
        maxcom = maxit + 20
        yconst = sqrt( float( n))
        xconst = - 1.0d0 / (float( n) + yconst)
        yconst = - 1.0d0 / yconst

c Compute initial diagonal perturbation and sum of entries (suma)
        do 14 i = 1,n
14              d( i) = 0.0d0

        suma = 0.0
        llast = 0
        do 16 j = 1, nzl
                if ( icol( j) .eq. 0) goto 16
                lfirst = llast + 1
                llast = llast + icol( j)
                do 15 l = lfirst, llast
                        i = irow( l)
                        d( i) = d( i) + a( l)
                        d( j) = d( j) + a( l)
                        suma = suma + a( l)
15                      continue
16              continue
        suma = 2.0d0 * suma
        do 17 i = 1,n
                d( i) = d( i) - suma / float( n)
17              continue

c *** this is a good choice in general, on special problems it might
c     be either chosen close to one, a trivial choice is 0

        factr = .8
c for very sparse graphs factr should be close to one
        if ( float(nzs) / float( n) .le. 3.) factr = .95
	if (float(nzs) / float( n) .gt. 5.) factr = .6
	if (float(nzs) / float(n) .gt. 10.) factr = .4
        do 19 i = 1,n
19              d( i) = d( i) * factr

c Calculate an initial upper bound
        call compfg( n, d, f, g)
        print 8, f
 8      format(5x,'initial upper bound = ', f20.4)
        nval = 1
        nblock = 2
c
c   *******************
c Use first two eigenvectors to find an initial feasible bisection
        i = 1
        j = 2
        print *,'use first two eigenvectors to get bisection:'
        call feas(n, nzs, i, j )
c pass best solution on to BT for printout
        xbst = bstkl
c   *******************

c estimate of final bound
        fm = 0.7 * f + 0.3 *bstkl
        print 9, fm
 9      format(3x,'initial guess on fm=', f20.3)

c Now perform BT iterations to improve the upper bound
        call bt( n, d, f, g, compfg, fm, eps, maxcom, maxit, reset,
     *       iprint, ifail, iwork, work, lwork)

        if (ifail .eq. 4) then
            write(*,*)' BT Code does not have enough workspace'
            stop
        endif
c Recalculate final upper bound
        nfig = 7
        nval = 3
        nblock = nval + 1
        print 11, nfig
11      format(1x,'final check: 3 largest eigs, accuracy=', i3)
        call compfg( n, d, f, g)
        print 10,f
10      format(1x,'final upper bound:',f12.3)

c   *******************
c Calculate a final feasible solution.  Use pairs of eigenvectors.
c Note that Lanczos has only calculated nval eigenvectors.  The number
c reliable eigenvectors found, nperm, may be less than nval, but we can
c use even the unreliable estimates to help find a good feasible solution.
        print *,'run around circles using first eigenvectors:'
        do 30 i = 1, nval - 1
        do 30 j = i+1, nval
30          call feas( n, nzs, i, j)

c Now let's see what we have got:
c First round down bound to nearest integer
        ibnd = f
        print 802, ibnd
802     format(//2x,'** final bound (rounded down)=',i10)
        print 801,  bstkl
801     format(2x,'** best bisection=',f10.2)
	print 803, (ibnd- bstkl)/ float(bstkl)
803	format(2x,'** final gap=' , f5.3)
c   *******************

        print *,'total number of matrix accesses:', matmul
c        print *,'1 = save optimal d and best bisection, else stop:'
c        read *,i
c this can be used to store the final result
c the two sidesof the partition are in ibest and jbest 
        i = 0
        if (i .ne. 1) stop
        print *,'filename:'
        read '(a20)', infile
        open( 4, file = infile, status = 'unknown')
        write( 4, 90) (d(i),i=1,n)
        write( 4, 91) (ibest( i), i = 1, n/2)
        write( 4, 91) (jbest( i), i = 1, n/2)
91      format( 20i4)
90      format(5(1x, f12.5))
        close(4)

        end

c-----------------------  feas.for -------------

       subroutine feas(n, nzs, col1, col2)
c  *** take eigenvectors in columns col1 and col2 and do:
c  1) run around circle and find best bisection, store it in *keep
c  2) take best bisection found in 1) and apply Ker-Lin
c
c  We use a different representation of the matrix here, because it saves
c  time in the Kernighan-Lin routine.  We pretend that the matrix is not
c  symmetric and store all the non-zero elements in each row.
c  This new sparse matrix representation is as follows:
c  nnz = number of non-zero entries
c  weight - array containing the nonzero entries in row order
c  iadj   - indicates which column of the matrix the corresponding
c           entry of weight is in
c  ifirst - array of pointers into weight
c           ifirst(i) indicates first weight entry for row i
c  ilast  - array of pointers into weight
c           ilast(i)  indicates last  weight entry for row i
c  part   - This array indicates for each node which side of the
c           partition it is on, i.e. ip or iq.  Specifically,
c           part(i) = +j if node i is in the jth position of the
c           ip side of the partition and part(i) = -j if node i
c           is in the jth position of the iq side.

c ----------- needs to be adapted ---------- (2) ----
        parameter (nmax = 5000, ntot = 30000, nmval = 6)
        parameter (nn = nmax/2, not0 = ntot*2)
        integer col1, col2
        integer ip(nn), iq(nn), part(nmax), icol(nmax), irow(ntot),
     *          ifirst(nmax), ilast(nmax), ibest(nn), jbest(nn),
     *          iadj(not0), kp(nn), kq(nn),ideg(nmax),
     *          ipkeep(nn), iqkeep(nn), iadr(nmax)
        double precision sum1, sum2, x, vec(nmax,nmval), z(nmax),
     *          y(nmax), a(ntot), ad( nmax), suma, t, w(nmax),
     *          wk1(nn), wk2(nn), wk3(nn), gain(nn), sum,
     *          weight(not0), tcost, bstev, bstkl, cost, best
        logical iwk4( nn), iwk5( nn)

        common / eij / vec
        common / graph1 / a, ad, suma
        common / graph2 / nzl, icol, irow
        common / keep /   ipkeep, iqkeep
        common / globl / bstev, bstkl
        common / globl2 / ibest, jbest
        common / wkspc / weight
c
c  Calculate the new matrix representation
        call trafo(n, nzl, nzs, icol, irow, a, ideg, ifirst, ilast,
     *             iadj, weight, iadr)

        best = 0.0d0
        n2 = n/2
c  form z = Vn * vec(.,col1) (unproject)
c  and y = Vn * vec(.,col2) (for the second eigenvector)
c  first sum the elements
        sum1 = 0.0d0
        sum2 = 0.0d0
        do 10 i = 1,n-1
            sum1 = sum1 + vec(i,col1)
            sum2 = sum2 + vec(i,col2)
   10   continue
        t = n
        z(1) = -1/sqrt(t) * sum1
        y(1) = -1/sqrt(t) * sum2

        x = -1/(t + sqrt(t))
        do 20 i = 1,n-1
            z(i+1) = x * sum1 + vec(i,col1)
            y(i+1) = x * sum2 + vec(i,col2)
   20   continue
c  now multiply by A to get linearization
        i = 1
        call matvec(nmax, n, i, nzl, icol, irow, a, ad, z, w)
        do 25 i = 1,n
25             z( i) = w( i)
        i = 1
        call matvec( nmax,n, i, nzl, icol, irow, a, ad, y, w)
        do 26 i = 1,n
26             y( i) = w( i)

c  sum all weights and divide by 2
        sum = 0.0
        do 65 i = 1, ilast(n)
            sum = sum + weight( i)
   65   continue
        sum = sum/2.0

c  Now iterate, over all values of gamma
        do 30 i = 1,n,5
            gamma = atan(-z(i)/y(i))
            do 40 j = 1, n
                w(j) = cos(gamma) * z(j) + sin(gamma) * y(j)
   40       continue

c  find cost of bisection for w, use part and iadr as integer workspace
            call fndcst(n, nzs, part, iadr, w, cost,
     *                  ifirst, ilast, weight, iadj, ip, iq)

            if ( cost. gt. best) then
                 best = cost
                 do 35 j = 1, n2
                       ipkeep( j) = ip( j)
                       iqkeep( j) = iq( j)
   35            continue
            endif

c  update best solution found by eigenvectors
            if ( cost .gt. bstev) then
                       bstev = cost
                       print 820, ' new champion:', bstev
            endif
   30   continue

c  try Kernighan-Lin
c  set up part
        print 820, 'now try Ker-Lin:'
        do 50 i = 1, n2
            part(ipkeep(i)) = i
            part(iqkeep(i)) = -i
   50   continue

        call partit(n2, nn, not0, .false., ipkeep, iqkeep, kp, kq,
     *              tcost, wk1, wk2, wk3, iwk4, iwk5,
     *              ifirst, ilast, weight, iadj, part, gain)
c  output the original cost, the improved cost, and the difference
c  update the best kerlin-solution
        if (sum - tcost .gt. bstkl) then
                bstkl = sum - tcost
                do 60 i = 1, n2
                    ibest( i) = ipkeep( i)
60                  jbest( i) = iqkeep( i)
        print 820, ' after Ker-Lin:',bstkl
820     format(1x,a20,f10.2, 3x, 3f10.2)
        endif
        return
        end

        subroutine fndcst(n, nzs, part, index, w, cost,
     *                    ifirst, ilast, weight, iadj, ip, iq)

        integer ip(n/2), iq(n/2), part(n),
     *   index(n), ifirst(n), ilast(n), iadj(nzs*2)
        double precision sum, cost1, w( n), weight(nzs*2), cost

c
c  sort the unprojected vector, but remember the original positions
c  of the elements
        call sort(n,w,index)

c  now round w into a feasible solution and form part
        n2 = n/2
        do 30 i = 1,n2
            w(index(i)) = 0
            part(index(i)) = -i
   30   continue
        do 40 i = n2+1, n
            w(index(i)) = 1
            part(index(i)) = i-n2
   40   continue

c  what is the cost of this partition?
c  first set up ip and iq
        j = 0
        k = 0
        do 50 i = 1,n
            if (w(i) .eq. 1) then
                j = j + 1
                ip(j) = i
            else
                k = k + 1
                iq(k) = i
            endif
   50   continue

c  now find cost
c  sum all weights and divide by 2
        sum = 0.0
        do 65 i = 1, ilast(n)
            sum = sum + weight( i)
   65   continue
        sum = sum/2.0

        cost1 = 0
        do 60 i = 1,n2
            do 70 j = ifirst(ip(i)),ilast(ip(i))
                if (w(iadj(j)) .eq. 0) cost1 = cost1 + weight(j)
   70       continue
   60   continue

        cost = sum - cost1
        return
        end

        subroutine trafo( n, nzl, nzs, icol, irow, a, ideg, ifirst,
     *                    ilast, iadj, weight, iadr)
        integer n, nzl, nzs, icol( 1), irow( 1), ideg( 1)
        integer ifirst( 1), ilast( 1), iadj( 1), iadr( 1)
        double precision a( 1), weight( 1)

c  set up ideg
        do 10 i = 1,nzl
10      ideg(i) = icol(i)
        do 20 i = nzl + 1, n
20      ideg(i) = 0
        do 30 l = 1,nzs
        i=irow(l)
30      ideg(i) = ideg(i) + 1
        ifirst(1) = 1
        ilast(1) = ideg(1)
        do 40 i = 2, n
        ifirst(i) = ifirst(i-1) + ideg(i-1)
40      ilast(i) = ilast(i-1) + ideg(i)
        do 50 i = 1, n
50      iadr(i) = ifirst(i)
        llast = 0
        do 70 j = 1,nzl
                if(icol(j) .eq. 0) goto 70
                        lfirst = llast + 1
                        llast = llast + icol(j)
                        do 60 l = lfirst, llast
                                i = irow(l)
                                iadj(iadr(i)) = j
                                weight(iadr(i)) = a(l)
                                iadr(i) = iadr(i) + 1
                                iadj(iadr(j)) = i
                                weight(iadr(j)) = a(l)
                                iadr(j) = iadr(j) + 1
60                      continue
70      continue

        return
        end

        subroutine sort(n,a,ipoint)
c  heapsort - sort into nondecreasing order
c  taken from "Combinatorial Heuristic Algorithms with FORTRAN" - H.T. Lau
        integer          ipoint(n)
        double precision a(n),atemp
c
        do 10 i = 1,n
            ipoint(i) = i
   10   continue
        j1 = n
        j2 = n/2
        j3 = j2
        atemp = a(j2)
        jpont = ipoint(j2)
c
   20   j4 = j2 + j2
        if (j4 .le. j1) then
            if (j4 .lt. j1) then
                if (a(j4+1) .ge. a(j4)) j4 = j4 + 1
            endif
            if (atemp .lt. a(j4)) then
                a(j2) = a(j4)
                ipoint(j2) = ipoint(j4)
                j2 = j4
                goto 20
            endif
        endif
c
        a(j2) = atemp
        ipoint(j2) = jpont
        if (j3 .gt. 1) then
            j3 = j3 - 1
            atemp = a(j3)
            jpont = ipoint(j3)
            j2 = j3
            goto 20
        endif
c
        if (j1 .ge. 2) then
            atemp = a(j1)
            jpont = ipoint(j1)
            a(j1) = a(1)
            ipoint(j1) = ipoint(1)
            j1 = j1 - 1
            j2 = j3
            goto 20
        endif
        return
        end

        subroutine partit( n, nn, not0, init, ip, iq, kp, kq,
     *                     tcost, wk1, wk2, wk3, iwk4, iwk5,
     *                     ifirst, ilast, weight, iadj, part, gain)

c  the Kernighan-Lin graph bisection heuristic - takes any feasible
c  bisection and iteratively improves it

        integer ip( n), iq( n), kp( n), kq( n)
        integer ifirst( nn*2), ilast( nn*2), iadj( not0), part( nn*2)
        double precision weight( not0), wk1( n), wk2( n), wk3( n),
     *                   gain( n), tcost, tmax, small, tot1
        logical iwk4( n), iwk5( n), init

        if ( init) then
c  Set up an initial partition
c  Put the first half of the nodes into ip and the second half into iq
                do 10 i = 1,n
                        ip( i) = i
                        iq( i) = i+n
                        part( i) = i
                        part( n+i) = -i
10              continue
        endif

c  Initialise work arrays
20      do 30 i = 1,n
                iwk4( i) = .true.
                iwk5( i) = .true.
30      continue
        tcost = 0.0

c  External cost of a node defined to be total weight of cut edges
c  from that node; similarly Internal cost is total weight of UNcut
c  edges from that node.  Let D(i) be the difference between the external
c  cost and the internal cost for node i.
c
c  Step 1:  Calculate D(i) for every node i.  Store in wk1 (for
c           nodes in subset ip) and wk2 (for nodes in iq).
c
c  Consider all nodes in subset ip
        do 70 i = 1,n
                tot1 = 0.0d0
                tot2 = 0.0
                do 80 j = ifirst( ip( i)), ilast( ip( i))
                    if (part( iadj( j)) .gt. 0) then
c  tot2 is total weight uncut edges from node ip(i)
                        tot2 = tot2 + weight( j)
                    else
c  tot1 is total weight cut   edges from node ip(i)
                        tot1 = tot1 + weight( j)
                    endif
80              continue
c  tcost is total weight of cut edges
                tcost = tcost + tot1
                wk1( i) = tot1 - tot2
70      continue
        small = -2.0 * tcost

c  Consider all nodes in subset iq
        do 100 i = 1,n
                tot1 = 0.0d0
                tot2 = 0.0
                do 110 j = ifirst( iq( i)), ilast( iq( i))
                    if (part( iadj( j)) .gt. 0) then
c  tot1 is total weight cut   edges from node iq(i)
                        tot1 = tot1 + weight( j)
                    else
c  tot2 is total weight uncut edges from node iq(i)
                        tot2 = tot2 + weight( j)
                    endif
110             continue
                wk2( i) = tot1 - tot2
100     continue

c  If two nodes a and b on opposite sides of the partition are
c  interchanged, the gain (reduction in cost) is
c  D(a) + D(b) - 2 * the weight on the edge from a to b
c
c  Step 2:  Choose a and b so that the gain is maximised.  Then
c  remove a and b from consideration and repeat the process - do
c  this until there are no nodes left to consider.
c
c  Do until no nodes left to consider...
c  Slight modification here: we consider swaps of at most iswap nodes,
c  because this is much faster
        iswap = 20
        do 140 i = 1,iswap
c  Initialise the maximum gain
            tmax = -99999.9
c  Calculate gain for nodes in ip
            do 120  j = 1,n
c  Is this node still available?
                if ( iwk4( j)) then
c  Initialise the gain array
                    do 115 k = 1,n
                        gain( k) = 0
115                 continue
c
c  Consider edges from node ip( j)
                    do 117 l = ifirst( ip( j)), ilast( ip( j))
c                       icol is the node that this edge connects to
                        icol = iadj( l)
c  index gives the position of the node in the partition
                        index = part( icol)
c  Is this node still available?
                        if (iwk5( abs(index)) ) then
c  Is it on the opposite side of the partition?
                            if ( index .lt. 0)
c  contribution to gain = -2 * weight on edge
     *                          gain( -index) = -2.0 * weight( l)
                        endif
117                 continue

c  Calculate the other components of gain
                    do 118 k = 1,n
c  Is this node still available?
                        if ( iwk5( k)) then
c  contribution is D(a) + D(b)
                            gain( k) = gain( k) + wk1( j) + wk2( k)
c
                            if (gain( k) .gt. tmax) then
                                tmax = gain( k)
c  ia and ib are the nodes to be interchanged (i.e. best so far)
                                ia = ip( j)
                                ib = iq( k)
                                ind1 = j
                                ind2 = k
                            endif
                        endif
118                 continue
                endif
120         continue

c  Store max gain in wk3, and pair to be swapped in kp and kq
            wk3( i ) = tmax
            kp( i) = ia
            kq( i) = ib
c  Remove from further consideration
            iwk4( ind1) = .false.
            iwk5( ind2) = .false.

c  Now recalculate the D values for the partitions with nodes a and b removed
c  D(x)'= D(x) +2*(weight on edge from x to a) -2*(weight on edge from x to b)
c  D(y)'= D(y) +2*(weight on edge from y to b) -2*(weight on edge from y to a)
c  where x and a are in ip subset and y and b are in iq subset
c
c  First consider edges from node a
            do 130 j = ifirst( ia),ilast( ia)
                icol = iadj( j)
                index = part( icol)
c  If on the ip side of the partition
                if (index .gt. 0) then
c  If allowed to consider
                    if (iwk4( index) )
c  Add 2*(weight on edge from x to a) to D(x)
     *                  wk1( index) = wk1( index) + 2.0*weight( j)
                else
c  If allowed to consider
                    if (iwk5( -index))
c  Add -2*(weight on edge from y to a) to D(y)
     *                  wk2( -index) = wk2( -index) - 2.0*weight( j)
                endif
130         continue

c  Now consider edges from node b
            do 135 j = ifirst( ib),ilast( ib)
                icol = iadj( j)
                index = part( icol)
c  If on the ip side of the partition
                if (index .gt. 0) then
c  If allowed to consider
                    if (iwk4( index))
c  Add -2*(weight on edge from x to b) to D(x)
     *                  wk1( index) = wk1( index) - 2.0*weight( j)
                else
c  If allowed to consider
                    if (iwk5( -index))
c  Add 2*(weight on edge from y to b) to D(y)
     *                  wk2( -index) = wk2( -index) + 2.0*weight( j)
                endif
135         continue

140     continue

c  Now that we consider swaps of at most 30 nodes, we need to set kp and
c  kq for the remaining n-30 nodes
        i = iswap + 1
        j = i
        do 150 k = 1,n
            if (iwk4( k)) then
                kp( i) = ip( k)
                i = i + 1
            endif
            if (iwk5( k)) then
                kq( j) = iq( k)
                j = j + 1
            endif
150     continue

c  If we swap k nodes, the total gain is gain(1) + ... + gain(k)
c  Choose k to maximise the partial sum  gain(1) + ... + gain(k)
        tmax = small
        tot1 = 0.0d0
c  Now considering swaps of at most 30 nodes
        do 160 i = 1,iswap
                tot1 = tot1 + wk3( i)
                if ( tot1 .gt. tmax) then
                        tmax = tot1
                        k = i
                endif
160     continue

c  If the total gain is positive, do the swap.  If it is (near) zero,
c  then a local optimum has been reached and Ker-Lin terminates

        if (tmax .gt. 5.0d-05) then

                do 170 i = 1,k
c  Update partition - do the swap
                        part( kq( i)) = i
                        part( kp( i)) = -i
                        ip( i) = kq( i)
170                     iq( i) = kp( i)
                k1 = k + 1
                do 180 i = k1, n
                        part( kp( i)) = i
                        part( kq( i)) = -i
                        ip( i) = kp( i)
180                     iq( i) = kq( i)
c  Continue iterating
                goto 20
        endif
        return
        end

c -------------- compfg.for --------------

        subroutine compfg( n, d, f, g)
c ---------- needs to be adapted ----- (3) ----
        parameter (nmax = 5000, ntot = 30000, nmval = 6)
        double precision f, d( nmax), g( nmax)
c d = diagonal shift (A - diag(d))
c f = upper bound on graph bisection (f = (1/4)*lmax(VtA(d)V) + suma/4)
c d = subgradient
c local variables
        double precision suma, ad( nmax), a( ntot), sum
c variables for dnlaso
        double precision val(nmval,4), worklz(800+21*nmax)
        double precision qs( nmax, 45), vec( nmax, nmval)
        integer ind( nmval)
c constants defining the projection matrix Vn
        double precision xconst, yconst
c       for temporary storage of eigenvalues
        real eigval(5), xbst

        external op, iovect

        common / store / qs
        common / graph1 / a, ad, suma
        common / const / xconst, yconst
        common / iconst / matmul, nfig, nblock, nval, nperm
        common / stop / istop, mulply
        common / lzwork / worklz
        common / eigvls / eigval, xbst

        common / eij / vec

        istop = 0
c constants for dnlaso
        nmvec = nmax
        maxop = n/2
        maxj = 42
        nperm = 0
c       do 10 i=1,n
c10             work( i) = 1.0d0

c set current perturbation of main diag (ad( i) = -d( i))
        do 20 i=1,n
20              ad( i) = -d( i)
        n1 = n - 1
22      continue

        call dnlaso(op, iovect, n1, nval, nfig, nperm, nmval, val,
     1       nmvec, vec, nblock, maxop, maxj, worklz, ind, ierr)
        do 23 i=1,nperm
23              eigval( i) = val( i, 1)
        do 24 i=nperm+1,5
24              eigval( i) = 0.0
        mulply = ind( 1)

        if ( nperm .le. 0) then
c               set istop to 1 to abort BT
                if ( nblock .eq. nmval ) istop = 1
                nblock = nblock + 1
                print 99,' **blocksize increased**:', nblock
                matmul = matmul + ind( 1)
                if (istop .eq. 0) goto 22

                        endif


        matmul = matmul + ind( 1)
99      format(1x,a25,3i4)
c the upper bound f can now be computed
        f = n * val( 1, 1) * 0.25 + suma * 0.25

c calculate the gradient
c first we need the sum of the components of the eigenvector
        sum = 0.0d0
        do 30 i = 1,n-1
30              sum = sum + vec( i, 1)
        g( 1) = - yconst * yconst * sum * sum * n * 0.25
        do 40 i = 1, n-1
                g( i+1) = xconst * sum + vec( i, 1)
40              g( i+1) = - g( i+1) * g( i+1) * n * 0.25

c make sure that sum(g) = 0
        sum = 0.0d0
        do 50 i=1,n
50              sum = sum + g( i)
        do 60 i=1,n
60              g( i) = g( i) - sum / float( n)

c store eigenvector for next iteration
        do 70 i = 1,n1
70              worklz( i) = vec( i, 1)

        return
        end

        subroutine op( n, m, p, q)
c computes q = ap where a is adjacency matrix ( in condensed form0
c p,q are n by m matrices
c needed for dnlaso
c constants related to the graph: nmax, ntot

c ---------- needs to be adapted -------- (4) --
        parameter (nmax = 5000, ntot = 30000, nmval = 6)
        double precision p( n, m), q( n, m)
c arrays defining the graph
        double precision ad( nmax), a( ntot)
        integer icol( nmax), irow( ntot)
c constants defining transformation matrix vn and work arrays
        double precision xconst, yconst, sum, vec( nmax, nmval)
        double precision v2( nmax, nmval), sump( 10), suma

        common / graph1 / a, ad, suma
        common / graph2 / nzl, icol, irow
        common / const / xconst, yconst

c we have to compute (Vnt * A * Vn) * p
c first comes vec := Vn * p
c we need the column sum of the entries in p
        np1 = n+1
        do 20 j = 1,m
        sum = 0.0
        do 10 i = 1,n
10              sum = sum + p( i,j)
20      sump( j) = sum
c vec(1) = yconst * sump, vec(i+1) = xconst * sump + p( i)
        do 30 j=1,m
        do 30 i= 1,n
30              vec( i+1, j) = xconst * sump( j) + p( i, j)
        do 40 j = 1,m
40              vec( 1, j) = yconst * sump( j)

c now we form v2 = A * vec
        call matvec( nmax,np1, m, nzl, icol, irow, a, ad, vec, v2)

c finally we need Vnt *v2
c we need the column sums of v2 but without element in first row
        do 60 j = 1,m
        sum = 0.0
        do 50 i = 2,np1
50              sum = sum + v2( i, j)
60      sump( j) = sum

c q( i) = yconst * v2( 1) + xconst * sum + v2( i+1)
        do 80 j = 1,m
                sum = yconst * v2( 1, j) + xconst * sump( j)
                do 70 i = 1, n
70              q( i, j) = sum + v2( i+1, j)
80      continue

        return
        end

        subroutine matvec( nmax, n, p, nzl, icol, irow, a, ad, w, u)

c ----------- needs to be adapted  --- (5) ---
        parameter ( ntot = 30000)
c sparse matrix - vektor multiplikation: u = Aw
c u und w sind n x p matrizen
        integer nmax, n, p, nzl, icol( nmax), irow( ntot)
        double precision a( ntot), ad( nmax), w( nmax, p), u( nmax, p)
        integer i, j, k, l, lfirst, llast

        do 10 i = 1,n
        do 10 k = 1,p
                u( i, k) = ad( i) * w( i, k)
10              continue

c columnwise
        llast = 0
        do 30 j = 1,nzl
                if (icol( j) .eq. 0) goto 30
                lfirst = llast + 1
                llast = llast + icol( j)
                do 20 l = lfirst, llast
                        i = irow( l)
                do 20 k = 1, p
                        u( i, k) = u( i, k) + a( l) * w( j, k)
                        u( j, k) = u( j, k) + a( l) * w( i, k)
20                      continue
30              continue
        return
        end

      SUBROUTINE IOVECT(N,M,Q,J,K)
C
C THIS SUBROUTINE STORES AND RECALLS VECTORS.  IT STORES THE VECTORS
C IN CORE ALTHOUGH MOST REAL PROBLEMS WOULD USE A DISK.

c -------- needs to be adapted -(6)-----
        parameter (nmax = 5000)
      DOUBLE PRECISION Q(N,M),QS( nmax,45)
      COMMON /STORE/QS
      IF(K.EQ.1)GO TO 30
      DO 20 L=1,M
         L1=J-M+L
         DO 10 I=1,N
            QS(I,L1)=Q(I,L)
   10    CONTINUE
   20 CONTINUE
      RETURN
C
   30 DO 50 L=1,M
         L1=J-M+L
         DO 40 I=1,N
            Q(I,L)=QS(I,L1)
   40    CONTINUE
   50 CONTINUE
      RETURN
      END