/******************************************************************************
 The computer software and associated documentation called DOMAK hereinafter
 referred to as the WORK which is more particularly identified and described in
 Appendix A of the file LICENSE.  Conditions and restrictions for use of
 this package are also in this file.

 This routine was developed by Asim S. Siddiqui (modified from a routine
 written by Robert B. Russell)

 The WORK was developed by:
        Asim S. Siddiqui and Geoffrey J. Barton
        Laboratory of Molecular Biophysics
        University of Oxford
        Rex Richards Building
        South Parks Road
        Oxford OX1 3QU U.K.
        Tel:  (+44) 865-275379
        FAX:  (+44) 865-510454
        INTERNET: as@bioch.ox.ac.uk
        JANET:    as@uk.ac.ox.bioch

 The WORK is Copyright (1995) University of Oxford
        Administrative Offices
        Wellington Square
        Oxford OX1 2JD U.K.

 All use of the WORK must cite:
 Siddiqui, A. S. and Barton, G. J., "Continuous and Discontinuous Domains: An
 Algorithm for the Automatic Generation of Reliable Protein Domain Definitions" 
 PROTEIN SCIENCE, 4:872-884 (1995).
*****************************************************************************/

#include <stdio.h>
#include <string.h>
#include <ase_error.h>
#include <include.h>

/* Given a file containing a list of protein descriptors, returns
 *  a list of brookhaven starts and ends, or appropriate wild cards
 *  for subsequent use */
int rbrg_getdomain(domain_string,domains,ndomain,maxdomain,gottrans,OUTPUT)
char *domain_string;
struct domain_loc *domains;
int *ndomain;
int maxdomain;
int *gottrans;
FILE *OUTPUT;
{
	int i,j;
	int comment;
	int count,end,nobjects;

	char c;
	char *buff,*add_buff;
	char *index;

	char *rbrg_skiptononspace();

	buff=(char*)malloc(200*sizeof(char));
	add_buff=buff;

	count=0;
	end=0;
	(*gottrans)=0;
	while(!end) {
	  end=rbrg_define(&domains[count],&i,domain_string,OUTPUT);
	  if(i==1) (*gottrans)=1;
	  if(end==-1) return -1;
	  count+=(!end);
	  if(count>maxdomain && end!=1) {
	    fprintf(OUTPUT,"error in domain specification file\n");
	    return -1;
	  }
	  if(count==maxdomain) end=1;
	} /* end while(!end)... */
	(*ndomain)=count;

	/* check for duplication */
	for(i=0; i<(*ndomain); ++i) 
	   for(j=i+1; j<(*ndomain); ++j)  
	      if(strcmp(domains[i].id,domains[j].id)==0) {
		 fprintf(OUTPUT,"error: domain identifiers must not be the same\n");
		 fprintf(OUTPUT,"       found two copies of %s, domains %d & %d\n",
			domains[i].id,i+1,j+1);
		 return -1; 
	      }

	free(add_buff); /* since buff is turned into NULL when EOF is encountered */
	return 0;
}

int rbrg_define(domain,gottrans,domain_string,OUTPUT)
struct domain_loc *domain;
int *gottrans;
char *domain_string;
FILE *OUTPUT;
/* reads in the next domain descriptor from a supplied input file 
 * returns 0 if all is well, -1 if an error occurs, 1 if EOF occurs */
{

	 int i,j,k;
	 int nobjects;
	 int comment;

 	 char c;
	 char *index;
	 char *buff,*add_buff;
         char *c_ptr;
	    
	 char *rbrg_skiptononspace();

	 buff=(char*)malloc(2000*sizeof(char));
	 add_buff=buff;
	
	comment=1;
	(*gottrans)=0;
	buff[0]='%';
        c_ptr = domain_string;
	while(buff[0]=='%' || buff[0]=='#') {
	 i=0; 
	 while((c=*c_ptr)!= '\0' && c!='\n')  {
            c_ptr++;
	    if(i>2000) {
	      fprintf(OUTPUT,"error: line length in domain file exceeds memory limit\n");
	      return -1;
	    }
	    buff[i++]=c;
	 }
	 if(c== '\0') { free(add_buff); return 1; }
	 buff[i]='\0';
	}
        c_ptr++;
	/* read in domain */
	   sscanf(buff,"%s",&domain[0].filename[0]); /* read the filename */
	   index=strchr(buff,' ');
	   sscanf(index,"%s",&domain[0].id[0]);	/* read the identifier */
	   index=strchr(buff,' ');
	   /* allocation of memory, initially */
	   domain[0].type=(int*)malloc(sizeof(int));
	   domain[0].start=(struct brookn*)malloc(sizeof(struct brookn));
	   domain[0].end=(struct brookn*)malloc(sizeof(struct brookn)); 
	   domain[0].V=(float*)malloc(3*sizeof(float));
	   domain[0].v=(float*)malloc(3*sizeof(float));
	   domain[0].R=(float**)malloc(3*sizeof(float*));
	   domain[0].r=(float**)malloc(3*sizeof(float*));
	   for(i=0; i<3; ++i) {
	      domain[0].R[i]=(float*)malloc(3*sizeof(float));
	      domain[0].r[i]=(float*)malloc(3*sizeof(float));
	      for(j=0; j<3; ++j) 
		 if(i==j) domain[0].R[i][j]=domain[0].r[i][j]=1.0;
		 else domain[0].R[i][j]=domain[0].r[i][j]=0.0;
	      domain[0].V[i]=domain[0].v[i]=0.0;
	   }

	   nobjects=0;
	   index=strchr(buff,'{')+1; /* get to one after opening '{' */
	   if(index==NULL) return -1;
	   while(index[0]!='\0' && index[0]!='}' && index[0]!='\n') { /* read until closing '}' */
	      while(index[0]==' ') index++; /* get to next non space */
	      if(strncmp(index,"ALL",3)==0) {  /* want all the coordinates in the file */
		 domain[0].type[nobjects]=1;
		 domain[0].start[nobjects].cid=domain[0].start[nobjects].in=
		     domain[0].end[nobjects].cid=domain[0].end[nobjects].in='?';
	 	 domain[0].start[nobjects].n=domain[0].end[nobjects].n=0;
		 index=rbrg_skiptononspace(index,OUTPUT);
	      } else if(strncmp(index,"CHAIN",5)==0) { /* want specific chain only */
		 domain[0].type[nobjects]=2;
		 if((index=rbrg_skiptononspace(index,OUTPUT))==NULL) return -1;
		 domain[0].start[nobjects].cid=domain[0].end[nobjects].cid=index[0];
		 domain[0].start[nobjects].in=domain[0].end[nobjects].in='?';
		 domain[0].start[nobjects].n=domain[0].end[nobjects].n=0;
		 index=rbrg_skiptononspace(index,OUTPUT);
	      } else { /* assume that otherwise a specific start and end will be provided */
		 domain[0].type[nobjects]=3;
		 if(index[0]=='_') domain[0].start[nobjects].cid=' '; 
		 else domain[0].start[nobjects].cid=(*index);
		 if((index=rbrg_skiptononspace(index,OUTPUT))==NULL) return -1;
		 sscanf(index,"%d",&domain[0].start[nobjects].n);
		 if((index=rbrg_skiptononspace(index,OUTPUT))==NULL) return -1;
		 if(index[0]=='_') domain[0].start[nobjects].in=' ';
		 else domain[0].start[nobjects].in=(*index);
		 if((index=rbrg_skiptononspace(index,OUTPUT))==NULL) return -1;
		 if((index=rbrg_skiptononspace(index,OUTPUT))==NULL) return -1;
		 if(index[0]=='_') domain[0].end[nobjects].cid=' ';
		 else domain[0].end[nobjects].cid=(*index);
		 if((index=rbrg_skiptononspace(index,OUTPUT))==NULL) return -1;
		 sscanf(index,"%d",&domain[0].end[nobjects].n);
		 if((index=rbrg_skiptononspace(index,OUTPUT))==NULL) return -1;
		 if(index[0]=='_') domain[0].end[nobjects].in=' ';
		 else domain[0].end[nobjects].in=(*index);
		 index=strchr(index,' ');
		 if(index!=NULL) while(index[0]==' ') index++;
	      }
	      nobjects++;
	      /* reallocing if necessary */
	      if(index!=NULL && index[0]!='}' && index[0]!='\n') {
		domain[0].type=(int*)realloc(domain[0].type,(nobjects+1)*sizeof(int));
	   	domain[0].start=(struct brookn*)realloc(domain[0].start,(nobjects+1)*sizeof(struct brookn));
		domain[0].end=(struct brookn*)realloc(domain[0].end,(nobjects+1)*sizeof(struct brookn));
	      }
	      /* now either stop, or move onto the next descriptor */
           } /* end of while((*index... */
	   
	   if(strchr(buff,'}')==NULL) {
               ase_error_fatal("rbrg_getdomain", "error in format");
	   }  /* end of if(strchr(buff,'}'... */
	   domain[0].nobj=nobjects;
	   free(add_buff);
	   return 0;
}

char *rbrg_skiptononspace(index,OUTPUT)
char *index;
FILE *OUTPUT;
{
	index=strchr(index,' ');
	if(index!=NULL) while(index[0]==' ') index++;
	if(index==NULL) {
	   fprintf(OUTPUT,"error in domain descriptors\n");
	   return NULL;
	}
	return index;
}
