C******************************************************************
      SUBROUTINE CLUST(SIMMAT,N,NENTRY,IDENT,savec,savclu)
C------------------------------------------------------------------
C This performs single link cluster analysis of  the symmetrical
C matrix SIMMAT(N,N) where elements of SIMMAT are SIMILARITY values
C for the comparison of the elements i,j.
C NENTRY = Number entries compared in SIMMAT
C
C
C
C Author: G. J. Barton  (1987) All rights reserved.
C
C Version 1.3
C
C -- Also records which element is responsible for forming the
C    current cluster (ie IPOS,JPOS) stored in CELM(2,maxcluster)
C
c -- this version includes the option to do the plot in a vertical
C    format ie with the last cluster joined at the bottom, and all
C    the sequences along the top!
C    Note that the IDENTS are not plotted in this format since it is
C    assumend that this will only be used for large groups of seqs
C
C -- This version has IDENT array as an argument idents are plotted
C    after the entry number on the dendrogram it also interrogates
C    the user for the x-axis title and title for the plot
C
C -- This routine utilises the simpleplot library to do the
C    neccessary plotting
C
C The method works by building a set of clusters as follows:
C
C 1. The largest element of SIMMAT is located - this may be the
C    result of a comparison between:
C    (i)   Two entries which have not been clustered already.
C    (ii)  An unclustered entry and a cluster.
C    (iii) Two clusters.
C 2. The two entries or clusters located in 1 are joined to form
C    a new cluster which is stored in array CPAIRS(2,MACLUS) -
C    CPAIRS(1,*) and CPAIRS(2,*) store the element or cluster
C    locations.  If an element is a cluster then a value of
C    CADD (usually 2000) is added to the entry which means that
C    the number refers to the (element-CADD)th cluster and is not
C    a new entry.
C 3. The CTOTAL(MACLUS,MACLUS) array is added to and stores all the
C    entries which constitute the current cluster. (This is v. ineffiecient
C    in terms of memory since the array may be hardly filled at all)
C 4. The elements of SIMMAT associated with the newly generated
C    cluster are now set to MINSET (-1e6) (Yet another ineffiecient
C    step!!)
C 5. return to step 1. and continue until all elements have been
C    accounted for.
C
C The coding could be made more efficient by making greater use of
C linked lists.  In particular for the storage of data currently held
C in array CTOTAL.  If memory is a problem then  the code could be
C modified to allow the use of the upper diagonal of SIMMAT to be used
C in place of the CTOTAL array. Note also that the SIMMAT and CTOTAL
C arrays are frequently accessed non-sequentially this may present
C execution time problems for v. large arrays however  tests on arrays
C up to 90x90 have not presented any problems and are executed very quickly..
C 
C version 1.4    Can make use of two alternative subroutines
C                gjplot_simple.f and gjplot_gks.f.
C                gjplot_simple is only supported on VAX machines
C                that have a SIMPLEPLOT licence agreement....
C
C Last modified: 8/12/87
C             
C
C------------------------------------------------------------------------
C
C
C --  include file of virtually everything
C
      include 'clust.blk'
C
      logical savec,savclu
C
C --
      NCLUST = 0
C
C

1     CONTINUE
C
C --  find the biggest element of SIMMAT, only use the lower diagonal
      MAXVAL = minset
      DO 11,I=2,NENTRY
         DO 10,J=1,I-1
            IF(SIMMAT(I,J).GT.MAXVAL)THEN
               MAXVAL = SIMMAT(I,J)
               IPOS = I
               JPOS = J
            END IF
10       CONTINUE
11    CONTINUE
C
C --  update the number of clusters and current cluster score
      NCLUST = NCLUST +1
C      write(6,*)'nclust = ',nclust
C      write(6,603)
C      DO 6000,I=1,NCLUST
C        WRITE(6,600)I,CSCORE(I),CPAIRS(1,I),CPAIRS(2,I),NCTOT(I),
C     -            (CTOTAL(I,J),J=1,NCTOT(I))
C600     FORMAT(1X,I5,F10.2,3I6,20I4)
C603     format(3x,'NCLUS','  CSCORE  ','  p1  ','  p2  ',' nctot',
C     -  ' TOTALS')
C
C6000  CONTINUE
C      do 1234,i=1,nentry
C      write(6,6876)(simmat(i,j),j=1,nentry)
C1234  continue
C6876  format(1x,10f10.0)
C
C      write(6,*)'maxval = ',maxval,' ipos = ',ipos,' jpos = ',jpos
C
      CSCORE(NCLUST) = MAXVAL
C
C --  look back through totals to see if ipos or jpos are already clusterd
C     i,j pointers to position in ctotal array - if = 0 then not in cluster
      IPOINT = 0
      JPOINT = 0
      IF(NCLUST.GT.1)THEN
         DO 20,I=NCLUST-1,1,-1
            DO 22,II=1,NCTOT(I)
               IF((CTOTAL(I,II).EQ.IPOS).AND.(IPOINT.EQ.0))THEN
                  IPOINT = I
               ELSE IF((CTOTAL(I,II).EQ.JPOS).AND.(JPOINT.EQ.0))THEN
                  JPOINT = I
               END IF
22          CONTINUE
20       CONTINUE
      END IF
C      write(6,*)'ipoint = ',ipoint,' jpoint = ',jpoint
C
C --  record the elements responsible for this cluster
      CELM(1,NCLUST) = IPOS
      CELM(2,NCLUST) = JPOS
C
C --  update the CPAIRS and CTOTAL arrays for new cluster then set the
C     common elements of SIMMAT to MINSET
      IF((IPOINT.EQ.0).AND.(JPOINT.EQ.0))THEN
C       two new elements
        CPAIRS(1,NCLUST)=IPOS
        CPAIRS(2,NCLUST)=JPOS
        CTOTAL(NCLUST,1)=IPOS
        CTOTAL(NCLUST,2)=JPOS
        NCTOT(NCLUST)=2
        SIMMAT(IPOS,JPOS)=MINSET
        simmat(jpos,ipos)=minset
      ELSE IF((IPOINT.GT.0).AND.(JPOINT.GT.0))THEN
C       two clusters, CADD indicates that the CPAIRS is pointing to another
C       element of CPAIRS and not to SIMMAT
        CPAIRS(1,NCLUST)=IPOINT+CADD
        CPAIRS(2,NCLUST)=JPOINT+CADD
        II = 0
        DO 30,I=1,NCTOT(IPOINT)
           II = II + 1
           CTOTAL(NCLUST,II)=CTOTAL(IPOINT,I)
30      CONTINUE
        DO 33,I=1,NCTOT(JPOINT)
           II = II +1
           CTOTAL(NCLUST,II)=CTOTAL(JPOINT,I)
33      CONTINUE
        NCTOT(NCLUST)=II
        DO 34,I=1,NCTOT(IPOINT)
           DO 35,J=1,NCTOT(JPOINT)
              SIMMAT(CTOTAL(IPOINT,I),CTOTAL(JPOINT,J))=MINSET
              simmat(ctotal(jpoint,j),ctotal(ipoint,i))=minset
35         CONTINUE
34      CONTINUE
      ELSE IF((IPOINT.GT.0).AND.(JPOINT.EQ.0))THEN
C       one cluster and a new element
        CPAIRS(1,NCLUST)=IPOINT+CADD
        CPAIRS(2,NCLUST)=JPOS
        II=0
        DO 40,I=1,NCTOT(IPOINT)
           II =II +1
           CTOTAL(NCLUST,II)=CTOTAL(IPOINT,I)
40      CONTINUE
        II = II + 1
        CTOTAL(NCLUST,II)=JPOS
        NCTOT(NCLUST)=II
        DO 41,I=1,NCTOT(IPOINT)
           SIMMAT(CTOTAL(IPOINT,I),JPOS)=MINSET
           simmat(jpos,ctotal(ipoint,i))=minset
41      CONTINUE
      ELSE
C       the opposte cluster and a new element
        CPAIRS(1,NCLUST)=IPOS
        CPAIRS(2,NCLUST)=JPOINT+CADD
        II=0
        DO 50,I=1,NCTOT(JPOINT)
           II =II +1
           CTOTAL(NCLUST,II)=CTOTAL(JPOINT,I)
50      CONTINUE
        II = II + 1
        CTOTAL(NCLUST,II)=IPOS
        NCTOT(NCLUST)=II
        DO 51,I=1,NCTOT(JPOINT)
           SIMMAT(CTOTAL(JPOINT,I),IPOS)=MINSET
           simmat(ipos,ctotal(jpoint,i))=minset
51      CONTINUE
      END IF
C
C --  check if all done
      IF(NCTOT(NCLUST).EQ.NENTRY)THEN
        WRITE(6,*)NCLUST,' CLUSTERS GENERATED '
        WRITE(6,*)NENTRY,' ELEMENTS CONSIDERED '
      ELSE
C        write(6,667)(ctotal(nclust,ii),ii=1,nctot(nclust))
C667     format(1x,10i4)
        GOTO 1
      END IF
C
C --  get on and draw the dendrogram
C
      call gjplot(simmat,n,nentry,ident)
C
C --  write out tree file etc
      if(savec)then
         call wtree(simmat,n,nentry,ident)
         return
      end if
C
C --  write out prolog clusters
      if(savclu)then
         call wrclus(simmat,n,nentry,ident)
         return
      end if
C
C
C --  if requested then plot out the cluster details
C
1111  CONTINUE
      CALL QMESS(6,5,1,
     -'Do you want to record numerical details of the '//
     -'clusters formed ? [N]: ',LINE)
      CALL CUCASE(LINE)
      IF(INDEX(line,'Y').GT.0)THEN
        CALL QMESS(6,5,1,
     -  'Enter filename for storage of cluster details ',LINE)
        OPEN(UNIT=7,STATUS='NEW',FILE=LINE,IOSTAT=IOS)
        IF(IOS.NE.0)THEN
          CALL AMESS(6,1,'ERROR OPENING FILE '//
     -               LINE(1:ENDFIN(LINE)),1,'E')
          inquire(file=line,exist=fexist)
          if(fexist)then
             call amess(6,1,'File already exists',1,'e')
          end if
          stop
        END IF
C
C --    write out plot title and axis title for reference
        CALL AMESS(7,1,'PLOT TITLE '//TIT(1:ENDFIN(TIT)),1,' ')
        CALL AMESS(7,1,'AXIS TITLE '//XTIT(1:ENDFIN(XTIT)),1,' ')
C
C --    write out details except ctotal array
        WRITE(7,603)
        DO 6000,I=1,NCLUST
          WRITE(7,600)I,CELM(1,I),CELM(2,I),
     -                CSCORE(I),CPAIRS(1,I),
     -                CPAIRS(2,I),NCTOT(I)
600       FORMAT(1X,I5,2I6,F10.2,2I6,I8)
603       format(3x,'NCLUS',' CELM1',' CELM2',
     -           '  CSCORE  ','  p1  ','  p2  ','    nctot')
          CALL AMESS(6,1,'CLUSTER DETAILS WRITTEN TO FILE '//
     -               LINE(1:ENDFIN(LINE)),1,'-')
6000    CONTINUE
      END IF
C
C
      END
C
