/****************************************************************************

 AGETBLOC - a routine to read an AMPS block file

   Copyright:  Geoffrey J. Barton (1992,1997)

   email: geoff@ebi.ac.uk

   Please see README for conditions of use.

****************************************************************************

History:

15th November 1992 - ANSI C version - also uses GJ... routines.
This version adapted with error messages for alscript.

11th June 1992.
Agetbloc:  like getbloc, but does not require that every character read into
the seqs structure is an alphabetic character.  Also does not contain the 
option to convert the "sequences" read in into integer format

getbloc:  Read an AMPS style block file into the seqs array
the nbloc aligned sequences are stored in positions 1-nbloc.

This is a straight-ish translation of the fortran routine fbloc.f, hence
the non-C like goto's...

Sequence lengths are actual length +1 + 1.  This allows position 0 to 
be reserved for future use, and preserves the '\0' for output.

18/Feb/1993:Fix (i) to (i+1) in realloc.  Spotted by RBR.

 $Id$
 $Log$

*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "array.h"
#include "gjutil.h"


int Agetbloc(FILE *bfile,struct seqdat *bloc,int *nbloc)

{
    int i,llen;
    extern int MAXnseq, MAXslen, MAXilen, MAXtlen, MAXnbloc;
    char *buff;
    extern FILE *std_in,*std_out,*std_err;

    char *idstart, *idend, *bstart, sident = 0;
    int idlen,totseq = 0,k,j;

    buff = (char *) GJmalloc(sizeof(char) * MAXtlen);

l1: 
    buff = fgets(buff,MAXtlen,bfile);
    if(buff == NULL){
	fprintf(std_err,"Premature end of BLOCK FILE\n");
	return 0;
    }
    if((idstart = strchr(buff,'>')) != NULL){
	if(++totseq == MAXnbloc){
	    fprintf(std_err,
	    "Max Number of block file sequences exceeded: %d\n",
	    totseq);
	    fprintf(std_err,"Use MAX_NSEQ command to increase value");
	    return 0;
	}
	sident = 1;
	idend = strchr(idstart,' ');
	if(idend == NULL){
	  idend = strchr(idstart,'\0');
	}
	if(idend == NULL){
	  fprintf(std_err,"Error reading identifier:%s\n",idstart);
	  error("Exiting",1);
	}
	idlen = (idend - idstart) + 1;
	bloc[totseq].id = (char *) malloc(sizeof(char) * idlen);
	bloc[totseq].id = GJstrblank(bloc[totseq].id,idlen);
	strncpy(bloc[totseq].id,idstart+1,idlen-1);   /* don't copy the ">" symbol */
	bloc[totseq].ilen = idlen-1;
	bloc[totseq].id[idlen-1] = '\0';

	bloc[totseq].tlen = strlen(idend)+1;
	bloc[totseq].title = (char *) GJmalloc(sizeof(char) * bloc[totseq].tlen);
	bloc[totseq].title = GJstrblank(bloc[totseq].title,bloc[totseq].tlen);
	strcpy(bloc[totseq].title,idend);

	bloc[totseq].seq = (char *) GJmalloc(sizeof(char) * MAXslen);
        bloc[totseq].seq[0] = ' ';
	goto l1;
    } else if(sident){
	if((idstart = strchr(buff,'*')) != NULL){
	    i = 0;
	    while((buff = fgets(buff,MAXtlen,bfile)) != NULL){
		if(*idstart == '*'){
/*		    fprintf(stdout,"Blocfile read: Length: %d\n",i);*/
		    ++i;
		    for(k=1;k<totseq+1;++k){
			bloc[k].slen = i;
			bloc[k].seq[i] = '\0';
			bloc[k].seq = (char *) realloc(bloc[k].seq,sizeof(char)*(i+1)); /*i+1 fix suggested by rbr*/
		    }
		    *nbloc = totseq;
		    free(buff);
		    return 1;
		}
		bstart = idstart;
		++i;
		if(i==MAXslen)error("Max Sequence length exceeded - use MAX_SEQ_LEN command to increase",1);
		for(j=1;j<totseq+1;++j){
		    /*cope with short lines */
/*		    if(!isalnum(*bstart)) *bstart = ' '; */
		    bloc[j].seq[i] = *bstart++;
		}
	    }
	    fprintf(std_err,"No terminating * in blocfile\n");
	    return 0;
	}else{
	  goto l1;
	}
    } else {
	goto l1;
    }
}
 
