/***************************************************************************

msf2blc:  A program to convert a GCG .MSF file into an AMPS blockfile.

   Copyright:  Geoffrey J. Barton (1992,1997)

   email: geoff@ebi.ac.uk
   Please see the README file for details of conditions for use of this program.

   $Id$
   $Log$

****************************************************************************

Notes:  This program can be run as a pipe:  type msf2blc -q < input > output
Only error messages will be output to std_err

Default mode is interactive and prompts for filenames.

The storage for the sequences is allocated dynamically, so the MAX_SEQ_LEN
defines in the header file "defaults.h" have no effect.  If a system memory
limit is reached, then a "malloc error" message will be written and the
program will stop.  Most computers should happily cope with large numbers of
long sequences.  Some possible solutions to this problem are outlined in
the user manual - alscript.doc

24 October 1994: Add -n option to delete . from alignment if found.

****************************************************************************/

#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "gjutil.h"
#include "array.h"
#include "defaults.h"

#define TOKENS " \t\n"


main(int argc,char *argv[])
{
	struct seqdat *seqs;
	FILE *fp,*fout;
	int nseq;
	int found;
	int i,j;
        char *token,*sbit;
        char *line;
        extern FILE *std_err,*std_in,*std_out;
        char *msffile;
        char *blocfile;
        int quiet;
	int nodot;
/*        
        std_err = stderr;
        std_in = stdin;
        std_out = stdout;
*/

	GJinitfile();
        
        line = GJstrcreate(MAX_INLEN," ");
        msffile = GJstrcreate(MAX_INLEN,NULL);
        blocfile = GJstrcreate(MAX_INLEN,NULL);

        nseq = 0;
        found = 0;
        quiet = 0;
	nodot = 0;

        if(argc > 1){
	  if(strcmp(argv[1],"-q")==0){
            /* Quiet mode - read .MSF file from stdin and output block file to stdout */
            quiet = 1;
            fp = std_in;
            fout = std_out;
	  }
	  if(argc > 2){
	      if(strcmp(argv[2],"-n")==0){
		  /* set flag to remove dots */
		  nodot = 1;
	      }
	  }
        }else{
          /* Verbose mode - prompt for all filenames */
          fprintf(std_out,"\n\n");
          fprintf(std_out,"GCG .MSF to AMPS Blockfile conversion\n");
          fprintf(std_out,"Copyright: University of Oxford (1992)\n");
          fprintf(std_out,"Author: G. J. Barton (1992)\n\n");
          fprintf(std_out,"Max number/length of alignment - Defined by System\n");
          fprintf(std_out,"If you get a malloc error message - see manual\n\n");
          fprintf(std_out,"Enter MSF filename: ");
          
          fscanf(std_in,"%s",msffile);
          fprintf(std_out,"Opening: %s\n",msffile);
          fp = GJfopen(msffile,"r",1);
          
          fprintf(std_out,"Enter Block filename: ");
          fscanf(std_in,"%s",blocfile);
          fprintf(std_out,"Opening: %s\n",blocfile);
          fout = GJfopen(blocfile,"w",1);
        }
	
	fprintf(fout,"\n");
	fprintf(fout,"Conversion of GCG .MSF file to AMPS BLOCKFILE format\n");
	fprintf(fout,"msf2blc:  Geoffrey J. Barton (1992)\n\n");

        seqs = (struct seqdat *) GJmalloc(sizeof(struct seqdat));

       	if(!quiet)fprintf(std_out,"Reading .msf file\n");
        while(fgets(line,MAX_INLEN,fp) != NULL){
	  if(line[0] != '\n'){
             token = strtok(line,TOKENS);
             if(token != NULL){
               if(strcmp(token,"Name:") == 0){
                 /* This is a seq id name */
                  token = strtok(NULL,TOKENS);
                  seqs = (struct seqdat *) GJrealloc(seqs,sizeof(struct seqdat) * (nseq +1));
                  seqs[nseq].id = GJstrdup(token);
                  seqs[nseq].title = GJstrdup(line);
                  seqs[nseq].slen = 0;
                  seqs[nseq].seq = (char *) GJmalloc(sizeof(char));
                  ++nseq;
                  if(!quiet)fprintf(std_out,"%s\n",seqs[nseq-1].id);
	       }else if((strcmp(token,"//") == 0) || found){
                  /* this signals the end of identifiers so process sequences*/
                  found = 1;
                  if(token != NULL){
                    /* find out which seq this is */
                    i=0;
		    for(i=0;i<nseq;++i){
	               if(strcmp(token,seqs[i].id) == 0){
		         break;
		       }
		     }
                     /* read in the sequence */
                     if(i < nseq){
                       token = strtok(NULL,"\n");
                       if(token == NULL){
                         GJerror("Cannot find sequence in line");
                         fprintf(std_err,"%s",line);
                         exit(1);
		       }
                       j=0;
                       while(token[j] != '\0'){
                         if(isalpha(token[j]) || token[j] == '.'){
                           seqs[i].seq = (char *) GJrealloc(seqs[i].seq,sizeof(char) * (seqs[i].slen +1));
                           seqs[i].seq[seqs[i].slen] = token[j];
                           ++seqs[i].slen;
			 }
                         ++j;
		       }
		     }
		  }
		}else{
                  /* this is a comment line - just echo */
                  fprintf(fout,"%s\n",line);
		}
	     }
	   }
	}
        if(!quiet)fprintf(std_out,"All %d sequences read in\n",nseq);
        if(!quiet)fprintf(std_out,"Writing .blc file\n");
        
        for(i=0;i<nseq;++i){
            fprintf(fout,">%s %s\n",seqs[i].id,seqs[i].title);
        }
        fprintf(fout,"* iteration 1\n");
        for(i=0;i<seqs[0].slen;++i){
            for(j=0;j<nseq;++j){
		/* edit out dots if required */
		if(nodot == 1){
		    if(seqs[j].seq[i] == '.'){
			seqs[j].seq[i] = ' ';
		    }
		}
                fprintf(fout,"%c",seqs[j].seq[i]);
            }
            fprintf(fout,"\n");
        }
        fprintf(fout,"*\n");
        if(!quiet)fprintf(std_out,"All done\n");
        
        for(i=0;i<nseq;++i){
	  GJfree(seqs[i].seq);
	  GJfree(seqs[i].id);
  	  GJfree(seqs[i].title);
	}
	GJfree(seqs);
	GJfree(line);
	GJfree(blocfile);
	GJfree(msffile);

}	
