#include <stdio.h>
#include <stdlib.h>
#include <math.h>
// #include <stdarg.h>
#include <string.h>
// #include <unistd.h>

#include "io_lib_header.h"
#include "util_lib_header.h"
#include "define_header.h"
#include "dp_lib_header.h"
#include "fastal_lib_header.h"
#include "fast_tree_header.h"


//TODO: seq_pair2diagonal delete num points from parameters
//TODO: reuse list


//Fastal_param *param_set;


/*! \mainpage T-Coffee Index Page
 *
 * \section intro_sec Introduction
 *
 * This is the introduction.
 *
 * \section install_sec Installation
 *
 * \subsection step1 Step 1: Opening the box
 *  
 * etc...
 * \section fastal_sec Fastal
 * 
 * This program is a very fast aligner. It is capable of aligning huge sets of sequences because it keeps as much as necessary on hard disk.
 */







/*!
 *	\file fastal.c
 *	\brief Source code for the fastal algorithm
 */


/**
 * \brief Calculates scores for diagonal segments.
 * 
 * \param seq1 Sequence 1
 * \param seq2 Sequence 2
 * \param *diagonals The diagonals. Three consecutive entries belong togehter. 1. pos in \a seq1 , 2. pos in \a seq2 and 3. length of diagonal
 * \param num_diagonals Number of diagonals
 * \param s1_length Length of \a seq1
 * \param list length of list.
 * \param list An 2-dim array to save the scores in.
 * \return new list
 */
int **
diag2pair_list(char* seq1,
			   char* seq2,
			   int *diagonals,
			   int num_diagonals,
			   int ***list_in,
			   int *current_length,
			   int *current_num_points,
			   int additional_needed,
			   Fastal_param *param_set)
{
	int **mat = param_set->M;
	int i, j, diag_length, pos1, pos2;
	int **list = list_in[0];

// 	printf("NUM: %i\n",num_diagonals);

	int l1 = strlen(seq1), l2 = strlen(seq2);
	int x = *current_num_points;


	int s1_length = strlen(seq1);
	int mini;
	for (i = 0; i < num_diagonals; ++i)
	{
		pos1 = diagonals[i*3];
		pos2 = diagonals[i*3+1];
		diag_length = diagonals[i*3+2];
		mini = MIN(pos1, pos2);
		pos1 -= mini;
		pos2 -= mini;
		while ((pos1 < l1) && (pos2 < l2))
		{
			if (x==*current_length)
			{
				*current_length+=1000;
				list=vrealloc (list,(*current_length)*sizeof(int*));
			}
			if (!list[x])
				list[x]=vcalloc (7, sizeof (int));
			list[x][0] = pos1+1;
			list[x][1] = pos2+1;
			list[x][2] = mat[toupper(seq1[pos1])-'A'][toupper(seq2[pos2])-'A'];

			++x;
			++pos1;
			++pos2;
		}
	}
	*current_num_points = x;
	list_in[0]=list;
}

void
guessalignment(Fastal_profile prf)
{
	
}

int 
fastal_compare (const void * a, const void * b)
{
	return (*(int*)a - *(int*)b);
}

int **
diagonals2int(int *diagonals,
			  int num_diagonals,
			  char *seq1,
			  char *seq2,
			  int *num_points,
			  Fastal_param *param_set)
{
	int l1 = strlen(seq1);
	int l2 = strlen(seq2);
	int gep = param_set->gep;
	
	int current_size = l2+l1;

	int **list = vcalloc(current_size, sizeof(int*));
	int *diags = vcalloc(num_diagonals, sizeof(int));
	int i;
// 	printf("SEQ: %s\nSEQ:%s\n",seq1, seq2);
// 	printf("X: %i\n",num_diagonals);
	for (i = 0; i < num_diagonals; ++i)
	{
		diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1];
	}

	qsort (diags, num_diagonals, sizeof(int), fastal_compare);

	int *diagx = vcalloc(num_diagonals, sizeof(int));
	int *diagy = vcalloc(num_diagonals, sizeof(int));
	int *old_pos = vcalloc(num_diagonals, sizeof(int));

	//+1 because diagonals start here at position 1, like in "real" dynamic programming
	int a = 0, b = -1;
	for (i = 0; i < num_diagonals; ++i)
	{
		
		if (diags[i] < l1)
		{
			diagx[i] = l1 - diags[i];
			diagy[i] = 0;
			a= i;
		}
		else
			break;
	}
	++a;
	b=a-1;
	for (; i < num_diagonals; ++i)
	{
		diagx[i] = 0;
		diagy[i] = diags[i]-l1;
		b = i;
	}

	int tmpy_pos;
	int tmpy_value;
	int **M = param_set->M;
	int *last_y = vcalloc(l2+1, sizeof(int));
	int *last_x = vcalloc(l1+1, sizeof(int));
	last_y[0] = 0;
	
	last_x[0] = 0;
	list[0] = vcalloc(6, sizeof(int));

	int list_pos = 1;
	int dig_num = l1;
	int tmp_l2 = l2 + 1;
	
	//left border
	for (; list_pos < tmp_l2; ++list_pos)
	{
		list[list_pos] = vcalloc(6, sizeof(int));
		list[list_pos][0] = 0;
		list[list_pos][1] = list_pos;
		last_y[list_pos] = list_pos;
		list[list_pos][2] = list_pos*gep;
		list[list_pos][4] = list_pos-1;
	}

	int pos_x = 0;
	int diags_old = l2;
	
	int tmp = l1;
	int y;
	int tmp_l1 = l1-1;
	while (pos_x < tmp_l1)
	{
		if (list_pos + num_diagonals+2 > current_size)
		{
			current_size += num_diagonals*1000;
			list = vrealloc(list, current_size * sizeof(int*));
		}
		//upper border
		list[list_pos] = vcalloc(6, sizeof(int));
		list[list_pos][0] = ++pos_x;
		list[list_pos][1] = 0;
		list[list_pos][2] = pos_x * gep;
		list[list_pos][3] = last_y[0];
		tmpy_value = list_pos;
		tmpy_pos = 0;
		last_x[pos_x] = list_pos;
		++list_pos;

		//diagonals
		for (i = a; i <= b; ++i)
		{
			list[list_pos] = vcalloc(6, sizeof(int));
			
			list[list_pos][0] = ++diagx[i];
			
			list[list_pos][1] = ++diagy[i];
			list[list_pos][3] = last_y[diagy[i]];
			list[list_pos][4] = list_pos-1;
			list[list_pos][5] = last_y[diagy[i]-1];
			list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'];
			last_y[tmpy_pos] = tmpy_value;
			tmpy_value = list_pos;
			tmpy_pos = diagy[i];
			
			++list_pos;
		}
		last_y[tmpy_pos] = tmpy_value;

		
		//lower border
		if (list[list_pos-1][1] != l2)
		{
			list[list_pos] = vcalloc(6, sizeof(int));
			list[list_pos][0] = pos_x;
			list[list_pos][1] = l2;
			list[list_pos][3] = last_y[l2];
			
			list[list_pos][2] = -1000;
			list[list_pos][4] = list_pos-1;
			if (pos_x > l2)
				list[list_pos][5] = last_x[pos_x-l2];
			else
				list[list_pos][5] = l2-pos_x;
			last_y[l2] = list_pos;
			++list_pos;
			
		}


		if ((b >= 0) && (diagy[b] == l2))
			--b;
		
		if ((a >0) && (diagx[a-1] == pos_x))
			--a;
	}


	dig_num = -1;
	if (list_pos + l2+2 > current_size)
	{
		current_size += list_pos + l2 + 2;
		list = vrealloc(list, current_size * sizeof(int*));
	}
	
	
// 	right border	
	list[list_pos] = vcalloc(6, sizeof(int));
	list[list_pos][0] = l1;
	list[list_pos][1] = 0;
	list[list_pos][3] = last_x[l1-1];
	list[list_pos][2] = -1000;
	++list_pos;
	
	

	for (i = 1; i <= l2; ++i)
	{
		list[list_pos] = vcalloc(6, sizeof(int));
		list[list_pos][0] = l1;
		list[list_pos][1] = i;
		list[list_pos][3] = last_y[i];
		list[list_pos][4] = list_pos-1;
		y = last_y[i-1];
		if ((list[y][0] == l1-1) && (list[y][1] == i-1))
		{
			list[list_pos][5] = y;
			list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A'];
		}
		else
		{
			if (i <= l1)
			{
				list[list_pos][5] = last_x[l1-i];
			}
			else
			{
				list[list_pos][5] = i-l1;
			}
			list[list_pos][2] = -1000;
		}
		++list_pos;
	}
	
	list[list_pos - l2][2] = -1000;

	*num_points = list_pos;
	
	
// 	int blb;
// 	for (blb = 0; blb <list_pos; ++blb)
// 	{
//  	printf("LIST_A: %i %i %i %i %i %i %i %i\n",blb, list[blb][0],list[blb][1],list[blb][3],list[blb][2], list[blb][4], list[blb][5], list[blb][6]);
// 	}
	return list;
}
 
/**
 * \brief Makes a sorted list out of diagonals.
 * 
 * \param diagonals A list of diagonals to use during dynamic programming.
 * \param num_diagonals Number of diagonals.
 * \param seq1 Sequence 1.
 * \param seq2 Sequence 2.
 * \param gep cost for gap extension.
 * \param num_points Number of points in the list
 * \return A 2-dim array which contains all points needed for the sparse dynamic programming algorithm.
 */
int **
diagonals2int5(int *diagonals,
			  int num_diagonals,
			  char *seq1,
			  char *seq2,
			  int *num_points,
			  Fastal_profile *prf1,
			  Fastal_profile *prf2,
			  char *pos2char,
			  Fastal_param *param_set)
{

	int l1 = strlen(seq1);
	int l2 = strlen(seq2);

	int gep = param_set->gep;

	int current_size = l2+l1;
	int **list = vcalloc(current_size, sizeof(int*));
	int *diags = vcalloc(num_diagonals, sizeof(int));
	int i;
	for (i = 0; i < num_diagonals; ++i)
	{
		diags[i] = l1 - diagonals[i*3] + diagonals[i*3+1];
	
	}

 	qsort (diags, num_diagonals, sizeof(int), fastal_compare);

	int *diagx = vcalloc(num_diagonals, sizeof(int));
	int *diagy = vcalloc(num_diagonals, sizeof(int));
	int *old_pos = vcalloc(num_diagonals, sizeof(int));

	//+1 because diagonals start here at position 1, like in "real" dynamic programming
	int a = 0, b = -1;
	for (i = 0; i < num_diagonals; ++i)
	{
		
		if (diags[i] < l1)
		{
			
			diagx[i] = l1 - diags[i];
			diagy[i] = 0;

			a= i;
		}
		else
			break;
	}
	++a;
	b=a-1;
	for (; i < num_diagonals; ++i)
	{
			diagx[i] = 0;
			diagy[i] = diags[i]-l1;
			b = i;

	}

	int tmpy_pos;
	int tmpy_value;
	int **M = param_set->M;

	int *last_y = vcalloc(l2+1, sizeof(int));
	int *last_x = vcalloc(l1+1, sizeof(int));
	last_y[0] = 0;
	
	last_x[0] = 0;
	list[0] = vcalloc(6, sizeof(int));
// 	list[0][3] = l1;
	int list_pos = 1;
	int dig_num = l1;
	int tmp_l2 = l2 + 1;
	
	//left border
	for (; list_pos < tmp_l2; ++list_pos)
	{
		list[list_pos] = vcalloc(6, sizeof(int));
		list[list_pos][0] = 0;
		list[list_pos][1] = list_pos;
		last_y[list_pos] = list_pos;
		list[list_pos][2] = list_pos*gep;
		list[list_pos][3] = ++dig_num;
		list[list_pos][5] = list_pos-1;
	}

	int pos_x = 0;
	int diags_old = l2;
	
	int bla;
	int bla2, bla3, tmp_x;
	
	int tmp = l1;
	int y;
 	int tmp_l1 = l1-1;
	while (pos_x < tmp_l1)
	{
		if (list_pos + num_diagonals+2 > current_size)
		{
			current_size += num_diagonals*50;
			list = vrealloc(list, current_size * sizeof(int*));
		}
		//upper border
		list[list_pos] = vcalloc(6, sizeof(int));
		list[list_pos][0] = ++pos_x;
		list[list_pos][1] = 0;
		list[list_pos][2] = pos_x * gep;
		list[list_pos][3] = --tmp;
		list[list_pos][4] = last_y[0];
		tmpy_value = list_pos;
		tmpy_pos = 0;
		last_x[pos_x] = list_pos;
		++list_pos;

		//diagonals
		for (i = a; i <= b; ++i)
		{
			list[list_pos] = vcalloc(6, sizeof(int));
			list[list_pos][0] = ++diagx[i];
			list[list_pos][1] = ++diagy[i];
			list[list_pos][3] = diags[i];

			list[list_pos][4] = last_y[diagy[i]];
			list[list_pos][5] = list_pos-1;
			list[list_pos][6] = last_y[diagy[i]-1];
			
			list[list_pos][2] = 0;
			
			bla3 = 0;
			bla2 = 0;
			tmp_x = 0;
			for (bla = 0; bla<10; ++bla)
			{
				
				for (bla2 = 0; bla2<10; ++bla2)
				{
					bla3  += prf2->prf[bla2][diagy[i]-1] * prf1->prf[bla][diagx[i]-1];
					tmp_x += prf2->prf[bla2][diagy[i]-1] * prf1->prf[bla][diagx[i]-1] * M[pos2char[bla]-'A'][pos2char[bla2] -'A'];

				}
			}
			list[list_pos][2] = (int)tmp_x / bla3;
			
// 			for (bla = 0; bla<10; ++bla)
// 				bla2 += prf2->prf[bla][diagy[i]-1];
// 			bla2 = bla2/prf2->num_sequences;
// 			
// 			for (bla = 0; bla<10; ++bla)
// 				bla3 += prf1->prf[bla][diagy[i]-1];
//
// 			bla3 = bla3/prf1->num_sequences;
//
//
// 			if ((bla2 > 0.7) && (bla3 > 0.7))
// 				list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'];
// 			else if ((bla< 0.7) && (bla3 < 0.7))
// 				list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'] = 3;
// 			else
// 				list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'] * ((bla< 0.7) && (bla3 < 0.7));
//  			list[list_pos][2] = M[toupper(seq1[diagx[i]-1])-'A'][toupper(seq2[diagy[i]-1])-'A'];//* ((bla2+bla3)/2);
			last_y[tmpy_pos] = tmpy_value;
			tmpy_value = list_pos;
			tmpy_pos = diagy[i];

			++list_pos;
		}
		last_y[tmpy_pos] = tmpy_value;


		//lower border
		if (list[list_pos-1][1] != l2)
		{
			list[list_pos] = vcalloc(6, sizeof(int));
			list[list_pos][0] = pos_x;
			list[list_pos][1] = l2;
			list[list_pos][4] = last_y[l2];
			
			list[list_pos][2] = -1000;
			list[list_pos][3] = l1 - pos_x + l2;
			list[list_pos][5] = list_pos-1;
			if (pos_x > l2)
				list[list_pos][6] = last_x[pos_x-l2];
			else
				list[list_pos][6] = l2-pos_x;
			last_y[l2] = list_pos;
			++list_pos;
		}


		if ((b >= 0) && (diagy[b] == l2))
			--b;
		
		if ((a >0) && (diagx[a-1] == pos_x))
			--a;
	}


	dig_num = -1;
	if (list_pos + l2+2 > current_size)
	{
		current_size += list_pos + l2 + 2;
		list = vrealloc(list, current_size * sizeof(int*));
	}


// 	right border	
	list[list_pos] = vcalloc(6, sizeof(int));
	list[list_pos][0] = l1;
	list[list_pos][1] = 0;
	list[list_pos][3] = ++dig_num;
	list[list_pos][4] = last_x[l1-1];
	list[list_pos][2] = -1000;
	++list_pos;

	for (i = 1; i <= l2; ++i)
	{
		list[list_pos] = vcalloc(6, sizeof(int));
		list[list_pos][0] = l1;
		list[list_pos][1] = i;
		list[list_pos][3] = ++dig_num;
		list[list_pos][4] = last_y[i];
		list[list_pos][5] = list_pos-1;
		y = last_y[i-1];
		if ((list[y][0] == l1-1) && (list[y][1] == i-1))
		{
			list[list_pos][6] = y;
			list[list_pos][2] = M[toupper(seq1[l1-1])-'A'][toupper(seq2[i-1])-'A'];
		}
		else
		{
			if (i <= l1)
			{
				list[list_pos][6] = last_x[l1-i];
			}
			else
			{
				list[list_pos][6] = i-l1;
			}
			list[list_pos][2] = -1000;
		}
		++list_pos;
	}
	
	list[list_pos - l2][2] = -1000;

	*num_points = list_pos;

	return list;
}
 



//**************************   sparse dynamic aligning **********************************************************


void
combine_profiles2file(int **prf1,
					  int **prf2,
					  int pos1,
					  int pos2,
					  Fastal_param *param_set,
					  FILE *prof_f,
					  char state)
{
	int alphabet_size = param_set->alphabet_size;
	char *pos2aa = &(param_set->pos2char[0]);	
	int i;
	int x = 0;
	if (state == 'M')
	{
		for (i = 0; i < alphabet_size; ++i)
			if (prf1[i][pos1] + prf2[i][pos2] > 0)
			{
				if (x)
					fprintf(prof_f," %c%i", pos2aa[i],prf1[i][pos1]+prf2[i][pos2]);
				else
					fprintf(prof_f,"%c%i", pos2aa[i],prf1[i][pos1]+prf2[i][pos2]);
				x = 1;
			}
		fprintf(prof_f,"\n");
	}
	else if (state == 'D')
	{
		for (i = 0; i < alphabet_size; ++i)
			if (prf2[i][pos2] > 0)
		{
			if (x)
				fprintf(prof_f," %c%i", pos2aa[i],prf2[i][pos2]);
			else
				fprintf(prof_f,"%c%i", pos2aa[i],prf2[i][pos2]);
			x = 1;
		}
		fprintf(prof_f,"\n");
	}
	else
	{
		for (i = 0; i < alphabet_size; ++i)
			if (prf1[i][pos1] > 0)
		{
			if (x)
				fprintf(prof_f," %c%i", pos2aa[i],prf1[i][pos1]);
			else
				fprintf(prof_f,"%c%i", pos2aa[i],prf1[i][pos1]);
			x = 1;
		}
		fprintf(prof_f,"\n");
	}
}



#define LIN(a,b,c) a[b*5+c]
/**
 * Calculates a fast and sparse dynamic programming matrix
 * 
 * \param prf1 Profile of first sequence.
 * \param prf2 Profile of second sequence.
 * \param param_set The parameter for the alignment.
 * \param list The list of diagonals.
 * \param n number of dots.
 * \param edit_f File to save the edit information.
 * \param prof_f File to save the profile.
 * \param node_number Number of the new profile.
 */
int
list2linked_pair_wise_fastal(Fastal_profile *prf1,
							 Fastal_profile *prf2,
							 Fastal_param *param_set,
							 int **list,
							 int n,
							 FILE *edit_f,
							 FILE *prof_f,
							 int node_number)
{
	int a,b,c, i, j, LEN=0, start_trace;
	int pi, pj,ij, delta_i, delta_j, prev_i, prev_j;
	static int **slist;
	static long *MI, *MJ, *MM,*MT2;
	static int *sortseq;
	static int max_size;
	int gop, gep, igop, igep;
	int l1, l2, l, ls;
	char **al;
	char **aln,*char_buf;
	int ni=0, nj=0;
	long score;
	int nomatch = param_set->nomatch;
 
	l1=prf1->length;
	l2=prf2->length;

	al=declare_char (2,l1+l2+1);


	
	igop=param_set->gop;
	gep=igep=param_set->gep;
	if (n>max_size)
	{
		max_size=n;

		vfree (MI);vfree (MJ); vfree (MM);
		free_int (slist, -1);

		slist=declare_int (n,3);

		MI=vcalloc (5*n, sizeof (long));
		MJ=vcalloc (5*n, sizeof (long));
		MM=vcalloc (5*n, sizeof (long));

	}
	else
	{
		for (a=0; a<n; a++)
			for (b=0; b<5; b++)LIN(MI,a,b)=LIN(MJ,a,b)=LIN(MJ,a,b)=-1000000;
	}
  
	for (a=0; a<n; a++)
	{
		i=list[a][0];
		j=list[a][1];


		if (i==l1 || j==l2)gop=0;
		else gop=igop;

		if (i==l1 && j==l2)start_trace=a;
		else if ( i==0 || j==0)
		{
			LIN(MM,a,0)=-1000000;
			if (j==0)
			{
				LIN(MJ,a,0)=-10000000;
				LIN(MI,a,0)=gep*i;
			}
			else if (i==0)
			{
				LIN(MI,a,0)=-10000000;
				LIN(MJ,a,0)=gep*j;
			}

			LIN(MI,a,1)=LIN(MJ,a,1)=-1;
			LIN(MI,a,2)=LIN(MJ,a,2)=i;
			LIN(MI,a,3)=LIN(MJ,a,3)=j;
			continue;
		}

		pi = list[a][3];
		pj = list[a][4];
		ij = list[a][5];

		prev_i=list[pi][0];
		prev_j=list[pj][1];

		delta_i=list[a][0]-list[pi][0];
		delta_j=list[a][1]-list[pj][1];

		/*Linear Notation*/
		LIN(MI,a,0)=MAX(LIN(MI,pi,0),(LIN(MM,pi,0)+gop))+delta_i*gep;
		LIN(MI,a,1)=pi;
		LIN(MI,a,2)=delta_i;
		LIN(MI,a,3)=0;
		LIN(MI,a,4)=(LIN(MI,pi,0) =(LIN(MM,pi,0)+gop))?'i':'m';

		LIN(MJ,a,0)=MAX(LIN(MJ,pj,0),(LIN(MM,pj,0)+gop))+delta_j*gep;
		LIN(MJ,a,1)=pj;
		LIN(MJ,a,2)=0;
		LIN(MJ,a,3)=delta_j;

		LIN(MJ,a,4)=(LIN(MJ,pj,0) =LIN(MM,pj,0)+gop)?'j':'m';

		if (a>1 && (ls=list[a][0]-list[ij][0])==(list[a][1]-list[ij][1]))
		{
			LIN(MM,a,0)=MAX3(LIN(MM,ij,0),LIN(MI,ij,0),LIN(MJ,ij,0))+list[a][2]-(ls*nomatch);

			LIN(MM,a,1)=ij;
			LIN(MM,a,2)=ls;
			LIN(MM,a,3)=ls;
			if ( LIN(MM,ij,0)>=LIN(MI,ij,0) && LIN(MM,ij,0)>=LIN(MJ,ij,0))LIN(MM,a,4)='m';
			else if ( LIN(MI,ij,0) >= LIN(MJ,ij,0))LIN(MM,a,4)='i';
			else LIN(MM,a,4)='j';
	  
		}
		else
		{
			LIN(MM,a,0)=UNDEFINED;
			LIN(MM,a,1)=-1;
		}  
	}
  
	a=start_trace;
	if (LIN(MM,a,0)>=LIN(MI,a,0) && LIN(MM,a,0) >=LIN(MJ,a,0))MT2=MM;
	else if ( LIN(MI,a,0)>=LIN(MJ,a,0))MT2=MI;
	else MT2=MJ;

	score=MAX3(LIN(MM,a,0), LIN(MI,a,0), LIN(MJ,a,0));
  
	i=l1;
	j=l2;
  
  
	while (!(i==0 &&j==0))
	{
		int next_a;
		l=MAX(LIN(MT2,a,2),LIN(MT2,a,3));
      // HERE ("%c from %c %d %d SCORE=%d [%d %d] [%2d %2d]", T2[a][5],T2[a][4], T2[a][2], T2[a][3], T2[a][0], gop, gep, i, j);
		if (i==0)
		{
			while ( j>0)
			{
				al[0][LEN]=0;
				al[1][LEN]=1;
				j--; LEN++;
			}
		}
		else if (j==0)
		{
			while ( i>0)
			{
				al[0][LEN]=1;
				al[1][LEN]=0;
				i--; LEN++;
			}
		}
      
		else if (l==0) {HERE ("L=0 i=%d j=%d",l, i, j);exit (0);}
		else 
		{
			for (b=0; b<l; b++, LEN++)
			{
				if (LIN(MT2,a,2)){al[0][LEN]=1;i--;ni++;}
				else al[0][LEN]=0;
	      
				if (LIN(MT2,a,3)){al[1][LEN]=1;j--;nj++;}
				else al[1][LEN]=0;
			}
	  
			next_a=LIN(MT2,a,1);
			if (LIN(MT2,a,4)=='m')MT2=MM;
			else if (LIN(MT2,a,4)=='i')MT2=MI;
			else if (LIN(MT2,a,4)=='j')MT2=MJ;
			a=next_a;
		}
	}

	invert_list_char ( al[0], LEN);
	invert_list_char ( al[1], LEN);

	fprintf(edit_f, "%i\n%i\n%i\n%i\n",prf1->prf_number, prf2->prf_number, prf1->is_leaf, prf2->is_leaf);
	fprintf(prof_f, "%i\n0\n%i\n1\n", node_number,LEN);

	char statec[] = {'M','D','I'};
	int num = 0;
	int state = 0;
	i = 0;
	j = 0;

	for ( b=0; b< LEN; b++)
	{
		if ((al[0][b]==1) && (al[1][b]==1))
		{

			combine_profiles2file(prf1->prf, prf2->prf, i, j, param_set, prof_f, 'M');
			++i;
			++j;
			if (state != 0)
			{
				fprintf(edit_f, "%c%i\n",statec[state], num);
				num =1;
				state = 0;
			}
			else
				++num;
		}
		else if (al[0][b]==1)
		{
// 			prf1->prf[param_set->alphabet_size-1] += prf2->num_sequences;
			combine_profiles2file(prf1->prf, prf2->prf, i, j, param_set, prof_f, 'I');
			++i;
			if (state != 2)
			{
				fprintf(edit_f, "%c%i\n",statec[state], num);
				num =1;
				state = 2;
			}
			else
				++num;
		} 
		else if (al[1][b]==1)
		{
//			prf2->prf[param_set->alphabet_size-1] += prf1->num_sequences;
			combine_profiles2file(prf1->prf, prf2->prf, i, j, param_set, prof_f, 'D');
			++j;
			if (state != 1)
			{
				fprintf(edit_f, "%c%i\n",statec[state], num);
				num =1;
				state = 1;
			}
			else
				++num;
		}
	}
	

	fprintf(edit_f, "%c%i\n",statec[state], num);
	num =1;
	state = 1;
	
	
	fprintf(edit_f,"*\n");
	fprintf(prof_f,"*\n");
	free_char (al, -1);
//    exit(0);
	return LEN;
}






/**
 * \brief Tuns a profile into a consensus sequence.
 * 
 * The character with the highest number of occurences is used as consensus. Gaps are not included. For example: 10 '-' and one 'A' would give 'A' as consensus.
 * \param profile The profile.
 * \param file_name Name of the file to save the consensus sequence in.
 * \param param_set The parameter of the fastal algorithm.
 * \return the sequence
 */
char*
profile2consensus(Fastal_profile *profile, char *file_name, Fastal_param *param_set)
{
	FILE *cons_f = fopen(file_name,"w");
	fprintf(cons_f, ">%i\n", profile->prf_number);
	char* seq = vcalloc(profile->length+1, sizeof(char));
	int i, j;
	int most_pos, most;
	int alphabet_size = param_set->alphabet_size;
	int **prf = profile->prf;
	char *pos2char = param_set->pos2char;
	for (i = 0; i < profile->length; ++i)
	{
		most = -1;
		for (j = 0; j < alphabet_size; ++j)
		{
			if (prf[j][i] > most)
			{
				most = prf[j][i];
				most_pos = j;
			}
		}
		seq[i] = pos2char[most_pos];
		fprintf(cons_f, "%c",pos2char[most_pos]);
	}
	seq[i] = '\0';
	fprintf( cons_f, "\n");
	fclose(cons_f);
	return seq;
}




/**
 * \brief Calculates the diagonals between two sequences.
 * 
 * Uses bl2seq to calculate the diagonals.
 * \param seq_file1 File with sequence 1.
 * \param seq_file2 File with sequence 2.
 * \param diagonals An array where the diagonal points will be stored.
 * \param dig_length length of \a diagonals .
 * \param num_points Number of points in all diagonals.
 * \return number of diagonals;
 */
int
seq_pair2blast_diagonal(char *seq_file_name1,
						char *seq_file_name2,
						int **diagonals,
						int *dig_length,
						int l1,
						int l2,
						int is_dna)
{
	int *diag = vcalloc(l1 + l2, sizeof(int));
	char *out_file = vtmpnam(NULL);
	char blast_command[600];

	if (is_dna)
		sprintf(blast_command, "bl2seq -p blastn -i %s -j %s -D 1 -g F -o %s", seq_file_name1, seq_file_name2, out_file);
	else
 		sprintf(blast_command, "bl2seq -p blastp -i %s -j %s -D 1 -g F -o %s", seq_file_name1, seq_file_name2, out_file);
	system(blast_command);

	int *diags = diagonals[0];
	FILE *diag_f = fopen(out_file,"r");
	char line[300];
	fgets(line, 300, diag_f);
	fgets(line, 300, diag_f);
	fgets(line, 300, diag_f);
	
	
	char delims[] = "\t";
	char *result = NULL;
	int length, pos_q, pos_d, i;
	int current_pos = 0;
	while (fgets(line, 300, diag_f) != NULL)
	{
		strtok(line, delims);
		strtok(NULL, delims);
		strtok(NULL, delims);
		length =  atoi(strtok(NULL, delims));
		strtok(NULL, delims);
		strtok(NULL, delims);
		pos_q = atoi(strtok(NULL, delims))-1;
		strtok(NULL, delims);
		pos_d = atoi(strtok(NULL, delims))-1;

		if (current_pos >= *dig_length)
		{
			(*dig_length) += 90;
			diags = vrealloc(diags, sizeof(int)*(*dig_length));
		}
		if (diag[l1-pos_q+pos_d] == 0)
		{
 			diag[l1-pos_q+pos_d] =1;
			diags[current_pos++] = pos_q;
			diags[current_pos++] = pos_d;
			diags[current_pos++] = length;
		}
	}
	vfree(diag);
	fclose(diag_f);
	diagonals[0] = diags;
	return current_pos/3;
}




//******************************* OTHER STUFF ***********************

/**
 *	\brief Reads the sequence from a given position in a fasta file and turns it into a profile.
 * 
 * \param seq_file The file where the sequence is stored.
 * \param off_set The off_set from the beginning of the file to the position of the sequence name.
 * \param profile The profile where the sequence will be stored into.
 * \param prf_number The number of this profile.
 */
void
file_pos2profile(FILE *seq_file,			//File with sequences
				 long off_set,				//offset of sequence from the beginning of file point to the sequence name, not to the sequence itself
				 Fastal_profile *profile,	//profile to save into
				 int prf_number,			//number of the profile
				 Fastal_param *param_set)			
{
	int alphabet_size = param_set->alphabet_size;
	profile->is_leaf = 1;
	int *aa2pos = &(param_set->char2pos[0]);
	const int LINE_LENGTH = 500;
 	char line[LINE_LENGTH];
	profile->num_sequences = 1;
	profile->prf_number = prf_number;
	fseek (seq_file , off_set , SEEK_SET );
	
	fgets (line, LINE_LENGTH , seq_file);
	int seq_length = 0;
	int i, j;

	while(fgets(line, LINE_LENGTH, seq_file)!=NULL)
	{
		if (line[0] != '>')
		{

			line[LINE_LENGTH-1] = '\n';
			if (seq_length + LINE_LENGTH >= profile->allocated_memory)
			{
				for (i = 0; i < alphabet_size; ++i)
				{
					profile->prf[i] = vrealloc(profile->prf[i], (profile->allocated_memory+PROFILE_ENLARGEMENT)*sizeof(int));
				}
				profile->allocated_memory += PROFILE_ENLARGEMENT;
			}

			i = 0;
			while (line[i] != '\n')
			{
				for(j = 0; j<alphabet_size; ++j )
					profile->prf[j][seq_length+i] = 0;
				profile->prf[aa2pos[toupper(line[i])-'A']][seq_length+i] = 1;
				++i;
			}
			seq_length += i;

		}
		else
			break;
	}
	profile->length = seq_length;
}



/**
*	constructs index of fasta_file
*/
int
make_index_of_file(char *file_name, 		//file with sequences
				   long **file_positions)	//array to save the positions
{
	const int LINE_LENGTH = 150;
	(*file_positions) = vcalloc(ENLARGEMENT_PER_STEP,  sizeof(long));

	int current_size = ENLARGEMENT_PER_STEP;
	int current_pos = 0;
	
	FILE *file = fopen(file_name,"r");
	
	char *sequence = vcalloc(3*LINE_LENGTH,sizeof(char));
	int seq_length=0;
	int allocated_length=3*LINE_LENGTH;
	char line[LINE_LENGTH];

	int num_of_sequences = 0;
	int mem_for_pos = ENLARGEMENT_PER_STEP;
	
	if (file == NULL)
	{
		printf("FILE NOT FOUND\n");
		exit(1);
	}
	else
	{
		(*file_positions)[num_of_sequences] = ftell(file);
		while(fgets(line, LINE_LENGTH , file)!=NULL)
		{
			int length = strlen(line);
			if (line[0] == '>')
			{
				++num_of_sequences;
				
				if (num_of_sequences == mem_for_pos)
				{
					(*file_positions) = vrealloc((*file_positions),(ENLARGEMENT_PER_STEP+mem_for_pos) * sizeof(long));
					mem_for_pos += ENLARGEMENT_PER_STEP;
				}
			}
			(*file_positions)[num_of_sequences] = ftell(file);
		}
	}
	fclose(file);
	return num_of_sequences;
}


/**
*	reads a profile from a profile_file
*/
profile_file2profile(Fastal_profile *prof,	//structure to save the profile in
					 FILE *profile_f,		//file where the profile is stored
					 long position,			//position in profile_f where the profile is stored
					 Fastal_param *param_set)
{
	
	int alphabet_size = param_set->alphabet_size;
			
	int *aa2pos = &(param_set->char2pos[0]);


	fseek(profile_f,position,SEEK_SET);
	const int LINE_LENGTH = 500;
	char line[500];
	
	fgets(line, LINE_LENGTH, profile_f);

	prof->prf_number = atoi(line);
// 	fgets(line, LINE_LENGTH, profile_f);
// 	prof->num_sequences = atoi(line);
// 	fgets(line, LINE_LENGTH, profile_f); //is-dna is already known
	fgets(line, LINE_LENGTH, profile_f);
	prof->is_leaf = atoi(line);

	fgets(line, LINE_LENGTH, profile_f);
	prof->length = atoi(line);
	fgets(line, LINE_LENGTH, profile_f);
	prof->weight = atoi(line);
	int i,j;
	if (prof->length > prof->allocated_memory)
		for (i = 0;i < alphabet_size; ++i)
	{
		prof->prf[i] = vrealloc(prof->prf[i],prof->length*sizeof(int));
	}
	
	char delims[] = " ";
	char *result = NULL;
	char *result_num = NULL;
	
	int length = prof->length;

	for (i = 0; i < length; ++i)
	{
		for(j = 0; j<alphabet_size; ++j )
			prof->prf[j][i] = 0;
		fgets(line, LINE_LENGTH , profile_f);
		result = strtok( line, delims );
		
		while( result != NULL)
		{
			result_num = &result[1];
			prof->prf[aa2pos[result[0]-'A']][i] = atoi(result_num);
			result = strtok( NULL, delims );
		}
	}
}



/**
*	writes a profile into a file
*/
void 
profile2file(Fastal_profile *profile,	//the profile to save
			 FILE* file,				//file to save in
			 Fastal_param *param_set)
{
	int alphabet_size = param_set->alphabet_size;

	char *pos2aa = &(param_set->pos2char[0]);

	fseek(file,0,SEEK_SET);
	
	fprintf(file,"%i\n", profile->prf_number);
// 	fprintf(file,"%i\n", profile->num_sequences);
	
	fprintf(file,"%i\n", profile->is_leaf);
	fprintf(file,"%i\n", profile->length);
	fprintf(file,"%i\n", profile->weight);
	int i = 0, j = 0;
	int max = profile->length;
	int x= 0;
	--alphabet_size;
	while (i < max)
	{
		for (j = 0; j < alphabet_size; ++j)
			if (profile->prf[j][i] > 0)
			{
				if (x)
					fprintf(file," %c%i", pos2aa[j],profile->prf[j][i]);
				else
					fprintf(file,"%c%i", pos2aa[j],profile->prf[j][i]);
					x = 1;
			}
		if (profile->prf[j][i] > 0)
			if (x)
				fprintf(file," %c%i", pos2aa[j],profile->prf[j][i]);
			else
				fprintf(file,"%c%i", pos2aa[j],profile->prf[j][i]);
			x = 1;
		x = 0;
		fprintf(file,"\n");
		++i;
	}
	fprintf(file,"*\n");
}



/**
*	Reads the profile out of an alignment
*/
void
file2profile(FILE* profile_f,			//file to read the profile of
			 Fastal_profile *prof,		//profile saved in here
			 int prf_number,			//number of the profile
			 Fastal_param *param_set)
{
	int alphabet_size = param_set->alphabet_size;

	int *aa2pos =  &(param_set->char2pos[0]);


	fseek(profile_f,0,SEEK_SET);
	const int LINE_LENGTH = 500;
	char line[500];
	
	fgets(line, LINE_LENGTH, profile_f);
	prof->prf_number = atoi(line);
// 	fgets(line, LINE_LENGTH, profile_f); //is-dna is already known
	fgets(line, LINE_LENGTH, profile_f);
	prof->is_leaf = atoi(line);

	fgets(line, LINE_LENGTH, profile_f);
	prof->length = atoi(line);
	
	fgets(line, LINE_LENGTH, profile_f);
	prof->weight = atoi(line);
	int i,j;
	if (prof->length > prof->allocated_memory)
		for (i = 0;i < alphabet_size; ++i)
		{
			prof->prf[i] = vrealloc(prof->prf[i],prof->length*sizeof(int));
		}
	
	char delims[] = " ";
	char *result = NULL;
	char *result_num = NULL;
	
	int length = prof->length;

	for (i = 0; i < length; ++i)
	{
		for(j = 0; j<alphabet_size; ++j )
			prof->prf[j][i] = 0;
		fgets(line, LINE_LENGTH , profile_f);
		result = strtok( line, delims );
		
		while( result != NULL)
		{
			result_num = &result[1];
			prof->prf[aa2pos[result[0]-'A']][i] = atoi(result_num);
			result = strtok( NULL, delims );
		}
	}
}



/**
*	This method takes a profile and turns it into a sumed up version of same size.
*/
int**
sumup_profile(Fastal_profile *profile,	//profile to sum-up
			  int **sumup,
			  Fastal_param *param_set)	//summed_up_profile
{
	
	char *pos2aa = &(param_set->pos2char[0]);
	int alphabet_size = param_set->alphabet_size;
	int **M = param_set->M;
	int prof_length = profile->length;
	
	int i,j,k;

	for (i = 0; i < prof_length; ++i)
	{
		sumup[alphabet_size][i] = 0;
		for (k = 0; k < alphabet_size; ++k)
		{
			sumup[k][i] = 0;
			sumup[alphabet_size][i] += profile->prf[k][i];
			for (j = 0; j < alphabet_size; ++j)
			{
				sumup[k][i] += profile->weight * profile->prf[j][i] * M[pos2aa[j]-'A'][pos2aa[k]-'A'];
			}
		}
	}

	return sumup;
}



/**
*	Turns the dynamic programming matrix into a editfile and calculates the new profile
*/
int
nw_matrix2edit_file(double **prog_matrix,	//dynamic programming matrix
					Fastal_profile *prf1,	//profile of dim1
					Fastal_profile *prf2,	//profile of dim2
	 				FILE *edit_f,			//file to safe the edit in
	  				int **prf_field,		//space to safe the new profile
					int *field_length,
					Fastal_param *param_set)		//length of prf_field
{
	int **M = param_set->M;
	int alphabet_size = param_set->alphabet_size;
	double gap_cost = param_set -> gop;
	fprintf(edit_f, "%i\n%i\n%i\n%i\n",prf1->prf_number, prf2->prf_number, prf1->is_leaf, prf2->is_leaf);
	int sum[] = {0,0,0};
	char sumc[] = {'M','I','D'};
	int last = 0;
	int n = 0;
	int m = 0;
	int field_pos = 0;
	int i;
	int prf1_length = prf1->length;
	int prf2_length = prf2->length;
	while ((n < prf1_length) && (m < prf2_length))
	{
		//if necesarry allocate more memory for result
		if ((*field_length)-alphabet_size < field_pos)
		{
			(*field_length) += ENLARGEMENT_PER_STEP;
			
			for (i = 0; i <alphabet_size+1; ++i)
			{
				prf_field[i] = vrealloc(prf_field[i], (*field_length)*sizeof(int));
			}
		}
		
		if (prog_matrix[n][m] == (prog_matrix[n+1][m] +gap_cost))
		{
			for (i = 0; i<alphabet_size; ++i)
			{
				prf_field[i][field_pos] = prf1->prf[i][n];
			}
			++n;
			++ field_pos;
			
			if (last != 1)
			{
				fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
				sum[last] = 0;
			}
			last = 1;
			++sum[last];
		}
		else if (prog_matrix[n][m] == (prog_matrix[n][m+1] +gap_cost))
		{
			
			for (i = 0; i<alphabet_size; ++i)
			{
				prf_field[i][field_pos] = prf2->prf[i][m];
			}
			++m;
			++ field_pos;
			if (last != 2)
			{
				fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
				sum[last] = 0;
			}
			last = 2;
			++sum[last];
		}
		else 
		{
			for (i = 0; i<alphabet_size; ++i)
			{
				prf_field[i][field_pos] = prf1->prf[i][n] + prf2->prf[i][m];
			}
			++n;
			++m;
			++ field_pos;
			if (last != 0)
			{
				fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
				sum[last] = 0;
			}
			last = 0;
			++sum[last];
		}
	}
	fprintf(edit_f,"%c%i\n",sumc[last],sum[last]);
	
	//gaps in prf2
	last = 0;
	while (n < prf1_length)
	{
		for (i = 0; i<alphabet_size; ++i)
		{
			prf_field[i][field_pos] = prf1->prf[i][n];
		}
		++n;
		++ field_pos;
		++last;
	}
	if (last > 0)
		fprintf(edit_f,"I%i\n",last);
	
	//gaps in prf1
	last = 0;
	while (m < prf2_length)
	{
		for (i = 0; i<alphabet_size; ++i)
		{
			prf_field[i][field_pos] = prf2->prf[i][m];
		}
		++m;
		++ field_pos;
		++last;
	}
	if (last > 0)
		fprintf(edit_f,"D%i\n",last);
	fprintf(edit_f,"*\n");
	return field_pos;
}




/**
 * \brief Pairwise alignments of profile is done here.
 *
 * \param profile1 Profile of sequence 1
 * \param profile2 Profile of sequence 2
 * \param prog_matrix Matrix for dynamic programming
 * \param edit_file_name The edit_file_name
 * \param sumup_prf The sumup version of profile 1, which later contains the aligned profile.
 * \param sumup_length Contains the length of the aligned profile.
 * \return length of the aligned profile
 */
int
prf_nw(Fastal_profile *profile1,	//profile of sequence 1
	   Fastal_profile *profile2,	//profile of sequence 2
	   double **prog_matrix,		//matrix for dynamic programming (at least as long as necessary for alignment)
	   FILE *edit_file_name,		//name of edit file
	   int **sumup_prf,				//sum_up
	   int *sumup_length, 
	   Fastal_param *param_set)			//sum_up length
{
	int alphabet_size = param_set->alphabet_size;
	double gap_cost = param_set->gop;
	
	int i;
	if (*sumup_length < profile1->length)
	{
		for (i = 0; i < alphabet_size+1; ++i)
		{
			sumup_prf[i] = vrealloc(sumup_prf[i], profile1->length*sizeof(int));
		}
		*sumup_length = profile1->length;
	}
	sumup_prf = sumup_profile(profile1, sumup_prf, param_set);
	
	

	int j,k;
	int prof1_length = profile1->length;
	int prof2_length = profile2->length;

	int** M = param_set->M;
	double match_score;
	int amino_counter;
	int residue_pairs = 0;

	for (i = prof2_length; i > 0; --i)
	{
		prog_matrix[prof1_length][i] = gap_cost * (prof2_length-i);
	}

	i = prof1_length-1;
	prog_matrix[prof1_length][prof2_length] = 0.0;
	while (i >=0)
	{
		j = prof2_length-1;

		prog_matrix[i][prof2_length] = gap_cost*(prof1_length-i);
		while (j >=0)
		{
			match_score = 0.0;
			residue_pairs = 0;
			for (k = 0; k < alphabet_size; ++k)
			{
				residue_pairs += profile2->prf[k][j];
				match_score += (profile2->prf[k][j] * sumup_prf[k][i]);
			}
			match_score /= (residue_pairs * sumup_prf[alphabet_size][i]);
			prog_matrix[i][j] = MAX3(prog_matrix[i+1][j+1]+match_score, prog_matrix[i+1][j]+gap_cost, prog_matrix[i][j+1]+gap_cost);
			
			--j;
		}
		--i;
	}
	return nw_matrix2edit_file(prog_matrix, profile1, profile2, edit_file_name, sumup_prf, sumup_length, param_set);
}



/**
 * \brief Writes the sequence into the alignment_file.
 * 
 * \param aligned_sequence Pattern of aligned sequence.
 * \param sequence_file File with sequences.
 * \param sequence_position Positions of sequences in \a sequence_file. 
 * \param alignment_file The file to write the sequence into.
 * 
*/
void
edit_seq2aligned_seq(char *aligned_sequence,	//pattern for aligned sequence
					 FILE *sequence_file,		//file with all the sequences
					 long sequence_position,	//position in sequence file with the correct sequence
					 FILE *alignment_file)		//file to write the alignment into
{
	fseek(sequence_file, sequence_position, SEEK_SET);
	const int LINE_LENGTH = 300;
	char line[LINE_LENGTH];
	fgets (line, LINE_LENGTH , sequence_file);
	fprintf(alignment_file,"%s", line);	//writing of sequence name
	int pos = 0;
	int i = 0;
	while(fgets(line, LINE_LENGTH, sequence_file)!=NULL)
	{
		if (line[0] != '>')
		{
			line[LINE_LENGTH-1] = '\n';
			i = 0;
			while (line[i] != '\n')
			{
				while (aligned_sequence[pos] == '-')
				{
					fprintf(alignment_file,"-");
					++pos;
				}
				fprintf(alignment_file,"%c",line[i]);
				++i;
				++pos;
			}
		}
		else
			break;
	}
	while (aligned_sequence[pos] != '\n')
	{
		fprintf(alignment_file,"-");
		++pos;
	}
	fprintf(alignment_file,"\n");
}



/**
 * \brief Recursive function to turn the edit_file into the alignment.
 * 
 * \param sequence_file File with all sequences.
 * \param sequence_position The array of sequence positions in \a sequence_file
 * \param edit_file File to safe the edit profiles in.
 * \param edit_positions Array saving the coorespondence between edit profile and position in \a edit_file
 * \param node_number The current node.
 * \param number_of_sequences The number of sequences.
 * \param aligned_sequence The sequence that is edited.
 * \param alignment_length The length of the alignment.
 * \param edit_seq_file File that saves the edited_sequences of the internal nodes.
 * \param offset Saves the size of the edited_sequences.
 * \param alignment_file File where the alignment is saved.
 * 
 */
void
edit2alignment(FILE *sequence_file,		//sequence file
			   long *seq_positions,		//sequence positions
			   FILE *edit_file,			//file saving the edit profiles
			   long *edit_positions,	//array saving the correspondence between edit profile and position in edit_file
			   int node_number,			//the current node
			   int number_of_sequences,	//number of sequences
			   char *aligned_sequence,	//the sequence that is edited
			   int alignment_length,	//length of the alignment - and thus of aligned_sequence
			   FILE *edit_seq_file,		//file saving the edited_sequences of the internal nodes
			   int offset,				//saves the size of the edited_sequence
			   FILE* alignment_file)	//file saving the alignments
{
	fseek(edit_file, edit_positions[node_number-number_of_sequences], SEEK_SET);
	const LINE_LENGTH = 50;
	char line[LINE_LENGTH];
	fgets(line, LINE_LENGTH , edit_file);
	int child1 = atoi(line);
	fgets(line, LINE_LENGTH , edit_file);
	int child2 = atoi(line);
	fgets(line, LINE_LENGTH , edit_file);
	int is_leaf1 = atoi(line);
	fgets(line, LINE_LENGTH , edit_file);
	int is_leaf2 = atoi(line);
	
	static char seq_line[10];

	char x;
	int number;
	int pos = 0;
	
	//first child
	while(fgets(line, LINE_LENGTH , edit_file)!=NULL)
	{
		x = line[0];
		if (x == '*')
			break;
		number = atoi(&line[1]);
		if (x == 'M')
		{
			while (number > 0)
			{
				if (aligned_sequence[pos] == 'X')
					--number;
				++pos;
			}
		}
		else if (x == 'I')
		{
			while (number > 0)
			{
				if (aligned_sequence[pos] == 'X')
					--number;
				++pos;
			}
		}
		else if (x == 'D')
		{
			while (number > 0)
			{
				if (aligned_sequence[pos] == 'X')
				{
					aligned_sequence[pos] = '-';
					--number;
				}
				++pos;
			}
		}
	}

	if (is_leaf1)
	{
		edit_seq2aligned_seq(aligned_sequence, sequence_file, seq_positions[child1], alignment_file);
	}
	else
	{
		fprintf(edit_seq_file, "%s", aligned_sequence);
		edit2alignment(sequence_file, seq_positions, edit_file, edit_positions, child1, number_of_sequences, aligned_sequence, alignment_length, edit_seq_file, offset, alignment_file);
	}
	
	//second child
	fseek(edit_seq_file, offset, SEEK_CUR);
	fgets(aligned_sequence, alignment_length+3, edit_seq_file);
	fseek(edit_seq_file, offset, SEEK_CUR);
	
	pos = 0;
	fseek(edit_file, edit_positions[node_number-number_of_sequences], SEEK_SET);
	while(fgets(line, LINE_LENGTH , edit_file)!=NULL)
	{
		x = line[0];
		if (x == '*')
			break;
		number = atoi(&line[1]);
		if (x == 'M')
		{
			while (number > 0)
			{
				if (aligned_sequence[pos] == 'X')
					--number;
				++pos;
			}
		}
		else if (x == 'I')
		{
			while (number > 0)
			{
				if (aligned_sequence[pos] == 'X')
				{
					aligned_sequence[pos] = '-';
					--number;
				}
				++pos;
			}
		}
		else if (x == 'D')
		{
			while (number > 0)
			{
				if (aligned_sequence[pos] == 'X')
					--number;
				++pos;
			}
		}
	}

	if (is_leaf2)
	{
		edit_seq2aligned_seq(aligned_sequence, sequence_file, seq_positions[child2], alignment_file);
	}
	else
	{
		fprintf(edit_seq_file, "%s", aligned_sequence);
		edit2alignment(sequence_file, seq_positions, edit_file, edit_positions, child2, number_of_sequences, aligned_sequence, alignment_length, edit_seq_file, offset, alignment_file);
	}
}




//  * The file has the follwing format (# and text behind are only comments and not included into the file):
// 		 * 1		# Number of profile.
// 		 * 1		# is DNA or not.
// 		 * 5		# Number of columns in the profile.
// 		 * 4A 1C	# In this column are 4 'A' and 1 'C'
// 		 * 3G		# In this column are 3 'G'
// 		 * 5A		# In this column are 5 'A'
// 		 * 2A 3C	# In this column are 2 'A' and 3 'C'
// 		 * 5C		# In this column are 5 'C'
// 		 * *		# Marks the end of this profile



/**
 * \brief Writes a profile to a file.
 * 
 * \param sumup_prf The profile array, not a real profile.
 * \param length The length of the profile.
 * \param file The FILE object to write the the profile into.
 * \param is_dna The type of sequence.
 * \param number The number of the profile.
 */
void
write2file(int **sumup_prf,
		   int length, 
		   FILE *file,
		   int number,
		   Fastal_param *param_set)
{
	char *pos2aa = &(param_set->pos2char[0]);
	fprintf(file,"%i\n0\n%i\n1\n",number, length );
	int i, j;
	int alphabet_size = param_set->alphabet_size;
	
	i = 0;
	int x = 0;
	while (i < length)
	{
		for (j = 0; j < alphabet_size; ++j)
			if (sumup_prf[j][i] > 0)
			{
				if (x)
					fprintf(file," %c%i", pos2aa[j],sumup_prf[j][i]);
				else
					fprintf(file,"%c%i", pos2aa[j],sumup_prf[j][i]);
				x = 1;
			}
// 		x = 1;
		x = 0;
		fprintf(file,"\n");
		++i;
	}
	fprintf(file,"*\n");
}









/**
*	main of the fastal algorithm
*/
int
fastal(int argc,	//number of arguments
	   char **argv)	//arguments first = fastal, second = tree
{
	
	int test;
	for (test = 0; test < argc; ++test)
	{
		printf("%s\n",argv[test]);
	}
	
	struct fastal_arguments arguments;

	arguments.output_file = "out.aln";
	arguments.tree_file = NULL;
	arguments.gep = -1;
	arguments.gop = -10;
	arguments.method = "fast";
	
// 	argp_parse (&argp, argc, argv, 0, 0, &arguments);

	Fastal_param *param_set = vcalloc(1,sizeof(Fastal_param));
	fill_parameters(arguments.is_dna, param_set, arguments.method);
	param_set->gep = arguments.gep;
	param_set->gop = arguments.gop;


	int alphabet_size = param_set->alphabet_size;

	//sequence file management
	char **seq_name;
	long *file_positions = NULL;
	long **tmp = &file_positions;
	int number_of_sequences = make_index_of_file(arguments.sequence_file, tmp);
	FILE *seq_file = fopen(arguments.sequence_file,"r");


	//edit file management
	FILE *edit_file = fopen("edit_tmp","w+");
	long current_edit_pos;
	long *edit_positions = vcalloc(number_of_sequences,sizeof(long));


	//profile management
	Fastal_profile **profiles = vcalloc(3,sizeof(Fastal_profile*));
	initiate_profiles(profiles, param_set);
	FILE * prof_file = fopen("prf_tmp","w+");
	long* profile_positions = vcalloc(4,sizeof(long*));
	int max_prof = 4;
	int saved_prof = 0;
	
	
	//dynamic programming matrix
	double ** dyn_matrix = vcalloc(1,sizeof(double*));
	dyn_matrix[0] = vcalloc(1,sizeof(double));
	int *length1 = vcalloc(1,sizeof(int));
	int *length2 = vcalloc(1,sizeof(int));
	*length1 = 1;
	*length2 = 2;
	int i;
	int **sumup_prf = vcalloc(alphabet_size+1,sizeof(int*));
	for (i = 0; i < alphabet_size+1; ++i)
		sumup_prf[i] = vcalloc(1,sizeof(int));
	int *sumup_length = vcalloc(1,sizeof(int));
	*sumup_length = 1;



	if (arguments.tree_file == NULL)
	{
		arguments.tree_file = "HUMAN.tree";
		printf("CONSTRUCT TREE\n");
		make_partTree(arguments.sequence_file, arguments.tree_file, 4, 20);
	}


	printf("CONSTRUCT ALIGNMENT\n");
	//tree file management
	FILE *tree_file = fopen(arguments.tree_file,"r");
	const int LINE_LENGTH = 100;
	char line[LINE_LENGTH];
	char delims[] = " ";
	int node[3];
	char *result = NULL;
	int j;
	int alignment_length;


	//memory for sparse dynamic
	int *diagonals = vcalloc(3,sizeof(int));
	int *dig_length = vcalloc(1,sizeof(int));
	*dig_length = 3;
	int **list = NULL;//vcalloc(1,sizeof(int*));
// 	list[0] = vcalloc(7,sizeof(int));
	int *list_length = vcalloc(1,sizeof(int));

	*list_length = 0;
	int ***list_p = vcalloc(1,sizeof(int**));



	//bottom-up traversal
	while(fgets(line, LINE_LENGTH, tree_file)!=NULL)
	{
		//read profiles
		node[0] = atoi(strtok(line,delims));
		node[1] = atoi(strtok(NULL,delims));
		node[2] = atoi(strtok(NULL,delims));
		//getting profile of second child
		if (node[1] < number_of_sequences)
		{
			file_pos2profile(seq_file, file_positions[node[1]], profiles[1], node[1], param_set);	//profile to save into
		}
		else
		{
			profile_file2profile(profiles[1], prof_file, profile_positions[--saved_prof], param_set);
			fseek (prof_file , profile_positions[saved_prof] , SEEK_SET);
		}

		//getting profile of first child
		if (node[0] < number_of_sequences)
		{
			file_pos2profile(seq_file, file_positions[node[0]], profiles[0], node[0], param_set);	//profile to save into
		}
		else
		{
			profile_file2profile(profiles[0], prof_file, profile_positions[--saved_prof], param_set);
			fseek (prof_file , profile_positions[saved_prof] , SEEK_SET);
		}
		if (saved_prof == max_prof)
		{
			max_prof += 5;
			profile_positions = vrealloc(profile_positions, max_prof*sizeof(long));
		}
		edit_positions[node[2]-number_of_sequences] = ftell(edit_file);
		profile_positions[saved_prof] = ftell(prof_file);
		++saved_prof;
		if (!strcmp(param_set->method,"nw"))
		{
			dyn_matrix = resize_dyn_matrix(dyn_matrix, length1, length2, profiles[0]->length+1, profiles[1]->length+1);
			alignment_length = prf_nw(profiles[0], profiles[1], dyn_matrix, edit_file, sumup_prf, sumup_length, param_set);
			write2file(sumup_prf, alignment_length, prof_file, node[2], param_set);
		}
		else if (!strcmp(param_set->method, "fast"))
		{
			char *file_name1 = vtmpnam(NULL);
			char *file_name2 = vtmpnam(NULL);
			char *seq1 = profile2consensus(profiles[0], file_name1, param_set);
			char *seq2 = profile2consensus(profiles[1], file_name2, param_set);
			int **diagonals_p = &diagonals;
			int num_diagonals = seq_pair2blast_diagonal(file_name1, file_name2, diagonals_p, dig_length, strlen(seq1),strlen(seq2), arguments.is_dna);
			diagonals = diagonals_p[0];
			char *p = &param_set->pos2char[0];
			list = diagonals2int(diagonals, num_diagonals, seq1, seq2, list_length, param_set);//, profiles[0], profiles[1], p);
			alignment_length = list2linked_pair_wise_fastal(profiles[0], profiles[1], param_set, list, *list_length, edit_file, prof_file, node[2]);
			int x;

			for (x = 0; x < *list_length; ++x)
			{
				vfree(list[x]);
			}
			vfree(list);
			list = NULL;
			vfree(seq1);
			vfree(seq2);
		}
	}

	//free_memory & close files
	vfree(diagonals);
	fclose(tree_file);
	fclose(prof_file);
	free_fastal_profile(profiles[0], alphabet_size);
	free_fastal_profile(profiles[1], alphabet_size);
	vfree(profiles);
	vfree(profile_positions);
	free_dyn_matrix(*length1,dyn_matrix);
	for (i = 0; i <= alphabet_size; ++i)
	{
		vfree(sumup_prf[i]);
	}
	vfree(sumup_prf);
	vfree(param_set);

	//bottom-down traversal (edit_files --> alignment)
	char file_name[FILENAMELEN];
	sprintf(file_name,arguments.output_file);

	FILE *alignment_file = fopen(file_name, "w");
	FILE *edit_seq_file = fopen("edit_seq.tmp","w+");

	char *aligned_sequence = vcalloc(alignment_length+3, sizeof(char));

	
	long offset = ftell(edit_seq_file);
	for (i = 0; i < alignment_length; ++i)
	{
		fprintf(edit_seq_file, "X");
		aligned_sequence[i] = 'X';
	}
	aligned_sequence[i]= '\n';
	aligned_sequence[i+1]= '\0';
	fprintf(edit_seq_file, "\n");
	offset = (ftell(edit_seq_file) - offset)*-1;


	edit2alignment(seq_file, file_positions, edit_file, edit_positions, node[2], number_of_sequences, aligned_sequence, alignment_length, edit_seq_file, offset, alignment_file);


	//free_memory & close files

	vfree(edit_positions);
	fclose(edit_file);
	fclose(seq_file);

	return 0;
}




//******************   toolbox   ***************************


/**
*	enlargement of the dynamic programming matrix in case it is to small.
*/
double**
resize_dyn_matrix(double **dyn_matrix,	//the dynamic programming matrix
				  int *old_length1,		//old length of dimension 1
				  int *old_length2,		//old length of dimension 2
				  int length1,			//new minimum length of dimension 1
				  int length2)			//new maximum length of dimension 2
{
	int i;
	if (*old_length1 < length1)
	{
		dyn_matrix = vrealloc(dyn_matrix,length1*sizeof(double*));
		for (i = *old_length1; i < length1; ++i)
			dyn_matrix[i] = vcalloc(*old_length2,sizeof(double));
		*old_length1 = length1;
	}
	if (*old_length2 < length2)
	{
		for (i = 0;i<*old_length1; ++i)
			dyn_matrix[i] = vrealloc(dyn_matrix[i], length2*sizeof(double));
		*old_length2 = length2;
	}
	return dyn_matrix;
}



/**
*	frees the memory of a dynamic programming matrix
*/
void
free_dyn_matrix(int length1,			//length of first dimension
				double **dyn_matrix)	//dynamic matrix
{
	int i = 0;
	for (; i<length1; ++i)
		vfree(dyn_matrix[i]);
	vfree(dyn_matrix);
}



/**
*	initialises the profiles.
*/
void
initiate_profiles(Fastal_profile **profiles,	//profiles pointer
				  Fastal_param *param_set)
{
	int alphabet_size = param_set->alphabet_size;
	int i,j;
	for (i =0; i < 3; ++i)
	{
		profiles[i] = vcalloc(1,sizeof(Fastal_profile));
		profiles[i]->weight = 1;
		profiles[i]->is_leaf = 1;
		profiles[i]->prf = vcalloc(alphabet_size, sizeof(int*));
		for (j = 0; j < alphabet_size; ++j)
		{
			profiles[i]->prf[j] = vcalloc(PROFILE_ENLARGEMENT, sizeof(int));
		}
		profiles[i]->allocated_memory = PROFILE_ENLARGEMENT;
	}
}


/**
*	initalises the files where the profiles are temporarly stored.
*/
void
initiate_profile_files(FILE **profile_files)
{
	char names[10];
	int i = 0;
	for (;i < 4; ++i)
	{
		sprintf(names,"tmp_prf_%i",i);
		profile_files[i] = fopen(names,"w+");
	}
}



/**
 *	frees all memory occupied by the profile
 */
void
free_fastal_profile(Fastal_profile* profile, int alphabet_size)
{
	--alphabet_size;
	for (;alphabet_size >= 0; --alphabet_size)
		vfree(profile->prf[alphabet_size]);
	vfree(profile->prf);
}


/**
*	initialize the parameters
*/
void
fill_parameters(int is_dna, Fastal_param *param_set, char *method)
{
	sprintf(param_set->method,"%s",method);
	int i;
	if (is_dna)
	{
		param_set->alphabet_size = 10;
		char tmp1[] = {'A','C','G','T','N','R','Y','D','M','W'};
		int  tmp2[] = { 0, -1,  1,  7, -1, -1, 2, -1, -1, -1, -1, -1, 8, 4, -1, -1, -1, 5, -1, 3, -1, -1, 9, -1, 6, -1};
		for (i = 0; i<param_set->alphabet_size; ++i)
			param_set->pos2char[i] = tmp1[i];
		for (i = 0; i<26; ++i)
			param_set->char2pos[i] = tmp2[i];
		param_set->M = read_matrice("dna_idmat");
	}
	else
	{
		param_set->alphabet_size = 24;
		char tmp1[] = {'A','C','G','T','F','D','H','I','K','L','M','N','P','Q','R','S','E','V','W','Y','B','J','X','Z'};
		int tmp2[] = { 0, 20,  1,  5, 16,  4,  2,  6,  7, 21,  8,  9,  10, 11, -1, 12, 13, 14, 15, 3, -1, 17,  18,22, 19,23};
		for (i = 0; i<param_set->alphabet_size; ++i)
			param_set->pos2char[i] = tmp1[i];
		for (i = 0; i<26; ++i)
			param_set->char2pos[i] = tmp2[i];
		param_set->M = read_matrice("blosum62mt");
	}
}
/*********************************COPYRIGHT NOTICE**********************************/
/* Centro de Regulacio Genomica */
/*and */
/*Cedric Notredame */
/*Tue Oct 27 10:12:26 WEST 2009. */
/*All rights reserved.*/
/*This file is part of T-COFFEE.*/
/**/
/*    T-COFFEE is free software; you can redistribute it and/or modify*/
/*    it under the terms of the GNU General Public License as published by*/
/*    the Free Software Foundation; either version 2 of the License, or*/
/*    (at your option) any later version.*/
/**/
/*    T-COFFEE is distributed in the hope that it will be useful,*/
/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
/*    GNU General Public License for more details.*/
/**/
/*    You should have received a copy of the GNU General Public License*/
/*    along with Foobar; if not, write to the Free Software*/
/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
/*...............................................                                                                                      |*/
/*  If you need some more information*/
/*  cedric.notredame@europe.com*/
/*...............................................                                                                                                                                     |*/
/**/
/**/
/*	*/
/*********************************COPYRIGHT NOTICE**********************************/
