C**********************************************************
      PROGRAM SORTER
C----------------------------------------------------------
C
C Author: Geoff Barton (1987)
C
C Program to read the output of an AMULT database scan
C and sort the data on the basis of user specified columns
C or produce a histogram output.
C
C also allows the calculation of mean and SD for the
C distribution and selection of sequences on the basis of
C their SD derived from this distribution
C
C Output options default to all but allow only the top n
C to be written if desired
C
C To programmers, please Note: this is a really nasty bit of 
C programming - you guessed!! In my defense, it did evolve
C VERY quickly on a dumb (i.e. line editor) terminal and the 
C needs of a Thesis write-up were paramount!
C
C Modified 19/7/89:  Now allow option to rescale the graph
C to scores 0-100
C
C Modified 23rd May 1991:  Now use Numerical recipies indexx
C subroutine (modified to sort into descending order)
C Also add some user-friendly features....
C
C----------------------------------------------------------
C
      INTEGER MAXDIM
      PARAMETER (MAXDIM=100000)
C
C --  1=match,2=nas,3=rmean,4=rsd,5=score
      REAL RDATA(MAXDIM,5)
      INTEGER ILENS(MAXDIM,2)
C
C --  sorting array,sorted output
      REAL DSD,DMEAN,skew,kurt
C
C --  indicator (pointer indicating order after sort)
      INTEGER IND(MAXDIM)
C
C --  character junk
      CHARACTER*131 LINE,TITLE(MAXDIM)*80,INCHAR*6,BUFF*80
C
      character*80 ifile,ofile,gfile
      logical ablank
C
      LOGICAL ERROR,NOSORT,EXTID,versus,SD,SHORT,cutoff
      INTEGER I,J,NDATA,CHOICE,ENDFIN,NID,ix,iy,NSHORT,L,ios
C
      real bdiff,cutval
      integer nbuket
C
      character*80 justl,temchr
C
      NOSORT =.FALSE.
      SHORT =.FALSE.
      EXTID = .FALSE.
      SD =.FALSE.
      cutoff = .false.
C
      CALL AMESS(6,1,'Program F S O R T ',1,'-')
      CALL AMESS(6,1,'Processes FSCAN output',1,' ')
      CALL AMESS(6,0,'Author:  Geoff Barton',1,' ')
      WRITE(6,600)MAXDIM
600   FORMAT(1x,'Maximum Allowed Database Size:',I10,' Sequences')
      call amess(6,1,'For the FSCAN file <fname>',0,' ')
      CALL AMESS(6,0,'IF SD CALC ENTER <fname>/S',0,' ')
      CALL AMESS(6,0,'IF RESTRICTED OUTPUT ENTER <fname>/S=integer',
     -0,' ')
      CALL AMESS(6,0,'IF VERSUS ENTER <fname>/V',0,' ')
      CALL AMESS(6,0,'IF ID EXTRACT ENTER <fname>/ID',0,' ')
      call amess(6,0,'IF CUTOFF then ENTER <fname>/CUTOFF',0,' ')
      call amess(6,0,'IF HISTOGRAM REQUIRED ENTER <fname>/HIST',0,' ')
      call amess(6,0,'If No /option given I will sort all results',
     -0,' ')
      call amess(6,0,'For BRIEF help enter /HELP now!',0,' ')
6001  continue
      CALL QMESS(6,5,0,'ENTER FILE TO PROCESS: ',LINE)
C      call cucase(line)
      if(index(line,'/').gt.0)then
        call cucase(line(index(line,'/')+1:))
      end if
      if(index(line,'/HELP').GT.0)then
        write(6,601)
        write(6,602)
	goto 6001
      end if
      IF(INDEX(LINE,'/HIST').GT.0)THEN
        NOSORT=.TRUE.
      END IF
      IF(INDEX(LINE,'/S').GT.0)THEN
        SD=.TRUE.
        IF(INDEX(LINE,'=').GT.0)THEN
          SHORT=.TRUE.
          CALL INTCOM(LINE(INDEX(LINE,'=')+1:),NSHORT,ERROR)
          WRITE(6,*)'I WILL REPORT THE TOP',NSHORT,' SCORES'
        END IF
      END IF
C
      IF(INDEX(LINE,'/V').GT.0)THEN
        VERSUS=.TRUE.
        NOSORT=.TRUE.
      END IF

C
      if(index(line,'/CUTOFF').gt.0)then
        cutoff = .true.
        call qmess(6,5,0,'ENTER CUTOFF [real]: ',buff)
        call reacom(buff,cutval,error)
        write(6,6667)cutval
6667    format(1x,'I will report sequences scoring .gt. :',f10.2)
      end if
C
      IF(INDEX(LINE,'/ID').GT.0)THEN
        EXTID=.TRUE.
        if(.not.cutoff)then
        CALL QMESS(6,5,0,'ENTER NUMBER OF IDS TO WRITE OUT: ',
     -  BUFF)
        CALL INTCOM(BUFF,NID,ERROR)
        end if
      END IF
C
      IF(NOSORT.OR.EXTID.OR.VERSUS.OR.SD.or.cutoff)THEN
        LINE=LINE(1:INDEX(LINE,'/')-1)
      END IF
C
C --  set up filenames
      ifile = line(:endfin(line))
      ofile = line(:index(line,'.'))//'sorted'
      gfile = line(:index(line,'.'))//'graph'
C
      OPEN(UNIT=1,FILE=LINE,STATUS='OLD',iostat=ios)
      if(ios.ne.0)then
        call amess(6,1,'Error opening file',1,'e')
        stop
      end if
C
C --
      IF(.NOT.NOSORT.and..not.extid)THEN
      CALL QMESS(6,5,0,
     -'ENTER FILE FOR SORTED OUTPUT [.sorted]: ',LINE)
      if(ablank(line))then
         line = ofile
      end if
      OPEN(UNIT=2,FILE=LINE,STATUS='NEW',iostat=ios)
      ELSE
      OPEN(UNIT=2,STATUS='SCRATCH',iostat = ios)
      END IF
      if(ios.ne.0)then
         call amess(6,1,'Error opening file',1,'e')
         stop
      end if
      IF(EXTID)THEN
        CALL QMESS(6,5,0,'ENTER FILE FOR ID''S: ',LINE)
        OPEN(UNIT=4,STATUS='NEW',FILE=LINE,iostat=ios)
        if(ios.ne.0)then
          call amess(6,1,'Error oepning file',1,'e')
          stop
        end if
      END IF
C99    CONTINUE
      CALL AMESS(6,0,'1=MATCH, 2=NAS, 3=RMEAN, 4=RSD, 5=SCORE',
     -1,' ')
      IF(VERSUS)THEN
        CALL QMESS(6,5,0,'ENTER X,Y CHOICE :',LINE)
        CALL IIREAD(LINE,IX,IY,',',6,ERROR)
      ELSE
        CALL QMESS(6,5,0,'ENTER CHOICE  :',LINE)
        CALL INTCOM(LINE,CHOICE,ERROR)
      END IF
      IF(ERROR)THEN
        CALL AMESS(6,0,'ERROR READING CHOICE',1,'E')
C        GOTO 99
      END IF
C
      IF(.NOT.VERSUS)THEN
      CALL AMESS(6,0,'USING: '//INCHAR(CHOICE),1,' ')
      END IF
C
C --  read the guff at the top of the file and write
      CALL AMESS(6,0,'READING DATA FILE',0,' ')
      I=0
C
C --
      IF(SD)THEN
      CALL AMESS(2,1,'THE SD VALUES SHOWN ARE BASED ON THE'//
     -' DISTRIBUTION FOUND AND +++++NOT+++++ UPON RANDOM '//
     -'SEQUENCES!',1,'-')
      END IF
1     CONTINUE
      READ(1,200,END=1000)LINE
      IF((INDEX(LINE,'>').GT.0).AND.(INDEX(LINE,':').GT.0))THEN
C       we've found a title line
        I=I+1
        TITLE(I)=LINE
        READ(1,101)(ILENS(I,L),L=1,2),(RDATA(I,J),J=1,5)
101     FORMAT(1X,2I5,1X,6F10.2)
        IF(EXTID)THEN
          if(.not.cutoff)then
          IF(I.GT.NID)THEN
            CALL AMESS(6,0,'IDENTS WRITTEN TO FILE',0,' ')
            STOP
          END IF
C
C write out the score and id in a form suitable for select3
C
          WRITE(4,413)int(rdata(i,choice)*100),
     -                LINE(INDEX(LINE,';')+1:INDEX(LINE,':')-1)
413       format(I5,1x,A)
          else
            if(rdata(I,choice).ge.cutval)then
               WRITE(4,413)int(rdata(i,choice)*100),
     -                     LINE(INDEX(LINE,';')+1:INDEX(LINE,':')-1)
            end if
          end if
        END IF
C--general cutoff case, only keep those results that are .gt. cutoff
        if(cutoff)then
           if(rdata(I,choice).lt.cutval)then
C --          reject this result
              i = i - 1
           end if
        end if
      ELSE
C       this is a bit of guff
        if(endfin(line).ne.0)then
          WRITE(2,200)LINE(1:ENDFIN(LINE))
        end if
200     FORMAT(A)
      END IF
C
      GOTO 1
C
1000  CONTINUE
      CALL AMESS(6,0,INCHAR(I)//' DATASETS READ IN',0,' ')
      NDATA = I
      if(ndata.le.0)then
         call amess(6,1,'No sequences satisfy cutoff',1,' ')
         stop
      end if
C
      if(cutoff.and.extid)then
         call amess(6,1,'Idents written to file (dbase order)',1,' ')
         stop
      end if
C
C --  do the sort
      IF(.NOT.NOSORT)THEN
C
        IF(SD)THEN
          CALL AMESS(6,0,'CALCULATING SCORE DISTRIBUTION',
     -    0,' ')
          WRITE(6,*)'USING ITEM ',CHOICE
          CALL STATS2(RDATA(1,CHOICE),NDATA,DMEAN,DSD,SKEW,KURT)
          CALL AMESS(6,0,'STATISTICS COMPLETE',0,' ')
          WRITE(6,*)'NUMBER OF POINTS ',NDATA
          WRITE(6,*)'MEAN             ',DMEAN
          WRITE(6,*)'SD               ',DSD
          WRITE(6,*)'SKEW             ',SKEW
          WRITE(6,*)'KURTOSIS         ',KURT
          CALL AMESS(6,0,'CALCULATING SIGSCORES',0,' ')
          DO 234,I=1,NDATA
             RDATA(I,5)=(RDATA(I,CHOICE)-DMEAN)/DSD
234       CONTINUE
        END IF
        CALL AMESS(6,0,'PERFORMING SORT',0,' ')
        if(sd)then
          choice = 5
        end if
C        DO 24,I=1,NDATA
C           TSORT(I)=RDATA(I,choice)
C24      CONTINUE
        call timer(6)
C        CALL SORTBs(RDATA(1,choice),NDATA,sorted,IND)
        call indexx(NDATA,RDATA(1,choice),IND)
        CALL AMESS(6,0,'DATA SORTED',0,' ')
        call timer(6)
C        DO 23,I=1,NDATA
C           RDATA(I,choice)=TSORT(I)
C23      CONTINUE
C

C
        CALL AMESS(6,0,'WRITING SORTED FILE',0,' ')
        IF(SHORT)THEN
          WRITE(2,*)'TOP ',NSHORT,' DATASETS OF COMPARISON'
          NDATA = NSHORT
        END IF
        DO 20,I=1,NDATA
           WRITE(2,200)TITLE(IND(I))(1:ENDFIN(TITLE(IND(I))))
           WRITE(2,101)(ILENS(IND(I),L),L=1,2),(RDATA(IND(I),J),J=1,5)
20      CONTINUE
        CALL AMESS(6,0,'SORTED FILE WRITTEN',0,'-')
C
C-- write prolog clauses?
        call qmess(6,5,0,'Generate prolog clauses? [N]: ',line)
        call cucase(line)
        if(index(line,'Y').gt.0)then
           open(unit=12,status='new',
C commented out for SUN Unix  carriagecontrol='list',
     -     file = ofile(:index(ofile,'.'))//'pl')
           write(12,200)':- dynamic sorter_out/3.'
           do 2987,i=1,ndata
              write(temchr,2001)'sorter_out(',i,','//
     -        title(ind(i))(index(title(ind(i)),';')+1:
     -                      index(title(ind(i)),':')-1)//',',
     -               rdata(ind(i),1),').'
2001          format(a,i5,a,f10.2,a)
              call clcase(temchr)
              call dblank(temchr,j)
              write(12,200)temchr(:j)
2987       continue
        end if
C
      ELSE
C
C --    write out the genplot file
        CALL QMESS(6,5,0,
     -  'ENTER Graph FILENAME [.graph]: ',line)
        if(ablank(line))then
          line = gfile
        end if
        OPEN(UNIT=3,FILE=LINE,STATUS='NEW',iostat=ios)
        if(ios.ne.0)then
          call amess(6,1,'Error opening file',1,'e')
          stop
        end if
C        IF(VERSUS)THEN
C           WRITE(3,*)'!SYMBOLS=SPECIAL'
C        ELSE
C        WRITE(3,*)'!HIST,X=50:1.0'
C        WRITE(3,*)'!DATA_TYPE=XY_ONLY'
C        END IF
C        WRITE(3,*)'!START_DATA'
C        IF(VERSUS)THEN
C          DO 39,I=1,NDATA
C             WRITE(3,3331)RDATA(I,IX),RDATA(I,IY)
Cn39        CONTINUE
C        ELSE
C          DO 30,I=1,NDATA
C             WRITE(3,3331)RDATA(I,CHOICE)
C30        CONTINUE
C        END IF
3332     continue
C
C --rescale option
C
         call qmess(6,5,1,'Rescale Graph 1-100 [Y]: ',line)
         if(ablank(line))then
            call amess(6,1,'Rescaling scores',1,' ')
            call rscale(rdata(1,choice),ndata,0.0,100.0,sfact,rmin)
            write(6,*)'sfact: ',sfact,'rmin: ',rmin
         end if
         write(6,*)'Enter the bucket interval required'
         read(5,*)bdiff
         if(.not.error)then
              write(3,3337)gfile
3337          format(1x,'These data from file: ',
     -             /1x,a80/)
         end if
C
         call hisplot(rdata(1,choice),ndata,bdiff,3,nbuket,error)
         if(error)then
           write(6,*)'Error - nbuket',nbuket,'bdiff ',bdiff
           goto 3332
         end if
C
3331     format(f8.2)
C        WRITE(3,*)'!END_DATA'
      END IF
C
C --
      STOP
601   format(1x,'BRIEF HELP'/
     -1x,'This programme sorts the results of a ',
     -1x,' database scan using the '/
     -1x,'FSCAN programme. ',
     -' At the ENTER FILE TO PROCESS prompt, enter '/
     -1x,'the name of the output file (F1) from the FSCAN run.'/
     -1x,'If no /options are given, the programme will create a file '/
     -1x,'in similar format to F1, but with the results sorted in '/
     -1x,'descending order of fit to the query pattern.'/
     -1x,'The /SD option calculates various statistics on the score '/
     -1x,'distribution then appends the SD scores for each sequence '/
     -1x,'in the database - this is not normally useful!'/)
C
602   format(1x,'The /S=integer option allows the top N results to be'/
     -1x,'saved rather than the whole lot - thus saving on disk space.'/
     -1x,'The /V option is not normally used.'/
     -1x,'The /ID option allows the score and ID of each sequence '/
     -1x,'to be written to a file which can subsequently be used to'/
     -1x,'extract the sequences from a database using the SELECT'/
     -1x,'programme.'/
     -1x,'/CUTOFF allows only sequence id''s scoring above some cutoff'/
     -1x,'to be output.'/
     -1x,'/HIST permits a histogram of scores to be output.'/
     -1x,'/HELP gives this not so brief HELP!')
      goto 6001
      END
C

