//
//  Fragment_Banks.cpp
//
//
//  Created by Pierre Peterlongo on 25/10/2012
//
#include "Fragment_Bank.h"
extern int size_seeds;

Fragment_Bank::Fragment_Bank(){
    //seeds = hash_create(100000);  	test_alloc(seeds);
    
    seeds_count = hash_create_binarykey(100000);  	test_alloc(seeds_count);
}
Fragment_Bank::~Fragment_Bank(){
    //  hash_delete(seeds, list_of_generic_free);
    //free(seeds_count);
    //  hash_delete(seeds_count, list_of_generic_free);
    hash_free(seeds_count);
    all_fragments.clear(); // All the elements of the vector are dropped: their destructors are called, and then they are removed from the vector container, leaving the container with a size of 0.
    // free the array of couples:
    free(seed_table);
}


void Fragment_Bank::load_fragments(char * fragment_file){
    Bank * bank = new Bank(fragment_file);
    char * read;
    int size_read;
    while(bank->get_next_seq(&read,&size_read)){ // each fragment
        all_fragments.push_back(new Fragment_Starting(read));
    }
    delete bank;
}


int estimated_size_reads=256; // ATTENTION : this parameter is a source of problems... Take care it fits the maximal read size.
void Fragment_Bank::index_fragments_and_init_vector(){
    char * read;
    char seed [size_seeds+1];
    int size_fragment;
    int fragment_id=0;
    total_seeds = 0 ;
    
    long sum=0;
    
    // FIRST COUNT THE NUMBER OF OCCURRENCES OF EACH SEED
    for( int i=0; i<all_fragments.size();i++){
        read = all_fragments[i]->fragment_sequence;
        
        size_fragment=strlen(read);
        all_fragments[i]->starting_id_in_the_boolean_vector=sum+estimated_size_reads;
        sum+=size_fragment+estimated_size_reads;
        int stop = size_fragment-size_seeds+1;
        for (int i=0;i<stop;i++){ // each seed
            int validSeed=1;
            for(int j=0;j<size_seeds;j++) {
                seed[j]=read[i+j];// read the seed
                if(!valid_character(seed[j])){validSeed=0; break;}
            }
            if(validSeed) {
                seed[size_seeds]='\0';
                hash_incr_kmer_count(seeds_count,seed);
                total_seeds++;
            }
        } // end each seed
        fragment_id++;
    } // end each fragment
    printf("total seeds in fragments %lli  size %lli MB \n",total_seeds,total_seeds*sizeof(couple)/1024LL/1024LL);
    
    printf("sum = %ld\n", sum);
    bv = new BooleanVector(sum);
    
    fragment_id=0;
    seed_table = (couple *) calloc(total_seeds,sizeof(couple));
    test_alloc(seed_table);
    iterate_and_fill_offsets(seeds_count);
    
    // NOW FILL THE INFORMATION ABOUT EACH SEED
    for( int i=0; i<all_fragments.size();i++){
        read = all_fragments[i]->fragment_sequence;
        size_fragment=strlen(read);
        int stop = size_fragment-size_seeds+1;
        for (int i=0;i<stop;i++){ // each seed
            int validSeed=1;
            for(int j=0;j<size_seeds;j++) {
                seed[j]=read[i+j];// read the seed
                if(!valid_character(seed[j])){validSeed=0; break;}
            }
            if(validSeed) {
                seed[size_seeds]='\0';
                hash_fill_kmer_index(seeds_count,seed,seed_table, fragment_id, i);
            }
        } // end each seed
        fragment_id++;
    } // end each fragment
}



/**
 *  |<->|  pwi (may be negative)
 *       |<->| pseed (where seed mapped on fragment)
 *       --------------  fragment
 *           ||||||    seed
 *  *******************      read
 *
 *  Tests if the overlapping part between read and fragment do not have more than <code>subst_allowed</code> substitutions and if the overlap size is bigger or equal to <code>min_overlap</code>
 *  returns 1 if true between read and fragment, 0 else
 */
char read_coherent_enhanced(const int pwi, const int pseed, const char * fragment, const char * read, const int subst_allowed){
	int substitution_seen=0; // number of seen substitutions for now
	int pos_on_read, pos_on_fragment; // where to start
    
	/*
	 *  | pwi (negative)
	 *     --------------  fragment
	 *  *************      read
	 *     | we start here on the read
	 */
	if(pwi<0) {
		pos_on_fragment=0;
		pos_on_read=-pwi;
	}
    
	/*
	 *        | pwi (positive)
	 *     --------------  fragment
	 *        *************      read
	 *        | we start here on the read
	 */
	else{
		pos_on_fragment=pwi;
		pos_on_read=0;
        
	}
    
    
    
	// walk the read and the fragment together, detecting substitutions.
	// stop if the number of substitution is too high
	while(fragment[pos_on_fragment]!='\0' && read[pos_on_read]!='\0'){
        if(pos_on_fragment==pseed){ // we reached the seed, we can skip it, we know it contains no substitutions
#ifdef DEBUG_COHERENT
			printf("seeds (%d on fragment %d on read:\n", pseed, pseed-pwi);
			int z;
			for(z=pseed;z<pseed+size_seeds;z++) printf("%c", fragment[z]); printf("\n");
			for(z=pseed-pwi;z<pseed-pwi+size_seeds;z++) printf("%c", read[z]); printf("\n");
            
            
			// TEMP TODO: REMOVE THESE LINES ONCE SURE
			for(z=0;z<size_seeds;z++)
				if(fragment[z+pos_on_fragment]!=read[z+pos_on_read]){
					printf("fragment[%d]!=read[%d] (%c != %c)\n", z+pos_on_fragment, z+pos_on_read, fragment[z+pos_on_fragment],read[z+pos_on_read]);
					int space=30;
                    for(z=0;z<space;z++) printf(" "); printf("%s - pwi: %d - seed: %d\n", fragment, pwi, pseed);
                    for(z=0;z<space+pwi;z++) printf(" ");
                    printf("%s\n", read);
                    exit(1);
				}
			// END LAST TODO REMARQ
#endif
			pos_on_fragment+=size_seeds;
			pos_on_read+=size_seeds;
		}
		else{
            
			if(fragment[pos_on_fragment]!=read[pos_on_read]){ // one subsitution
				substitution_seen++;
				if(substitution_seen>subst_allowed) return 0;
			}
			pos_on_fragment++;
			pos_on_read++;
		}
	}
	return 1;
}


// checks if, for a fragment, a position mapped already (return 1) or none (return 0)
int exists_tuple_read_position(const vector<int> mapped_positions, const int pos){
    return find(mapped_positions.begin(), mapped_positions.end(), pos)!=mapped_positions.end();
}


// checks if, for a fragment, a tuple read/position mapped already a few position before of after (<min_dist) (return 0) or not (return 1)
char previous_mapping_position_far_enougth(const hash_t map, const char * read, const int pos){
	const int min_dist = 10;
	/***************
	 *  these two reads overlap on |read|-min_dist. Lets consider only one one them
	 *   +++++++++++++++++++++++++++++++++++++  fragment
	 *        ------------------------          R1
	 *           --------------------------     R2
	 *****************/
	list_ * read_coherent_positions;                 //for one couple (read/fragment) : list of coherent mapped positions (usually, one position)
	if(hash_entry_by_key(map, read, (void **) (&read_coherent_positions)) !=0){
		cell_mapsembler *c = ((list_ *)read_coherent_positions)->first;
		while(c != NULL){
			if((*(int *) (c->val)) != pos && // in case the same read exists twice in the read file, mapped twice at the same position.
               (*(int *) (c->val)) > pos - min_dist && (*(int *) (c->val)) < pos+min_dist)	{
# ifdef DEBUG_EXTENTIONS
				//				printf("pos %d to close to %d return 0\n", (*(int *) (c->val)), pos); // DEB
# endif
				return 0;
			}
			c = c->prox;
		}
	}
	return 1;
}

/**
 * test if a position has already been tested (for instance an anchoring position of a mapped read)
 */
bool Fragment_Bank::is_position_already_tested(const Fragment_Starting * fragment, int pos){
    return bv->is_boolean_vector_visited(fragment->starting_id_in_the_boolean_vector+pos);
}
/**
 * set a position has already been tested (for instance an anchoring position of a mapped read)
 */
void Fragment_Bank::set_position_tested(const Fragment_Starting * fragment, int pos){
    bv->set_boolean_vector_visited(fragment->starting_id_in_the_boolean_vector+pos);
}

void Fragment_Bank::map_reads(Bank * reads, const int substitutions_allowed){
    //////////////////////////////////////////////////////////////////////////
	/////////////// read all reads - storing those coherent with reads ///////
	//////////////////////////////////////////////////////////////////////////
    
    
	//char * fragment;
	//list * mapping_positions;             //offsets of seeds mapping positions for a read
	// working variables
	int pwi,  stop, read_coherence_value, extention_number=0;//
	//int read_id=0;
    uint64_t offset_seed;
    uint64_t nb_seeds;
    
    
    
    
	// book space for the revcomp read that going to be read.
	char * rev_comp_read = (char *)malloc(sizeof(char)*16384); //
	test_alloc(rev_comp_read);
    
	// read all the reads
	char seed [size_seeds+1];
	char * read;
	int size_read;
    
	while(reads->get_next_seq(&read,&size_read)){ // each read
        stop = size_read-size_seeds+1;
        // read all seeds present on the read:
        for(int direction=0;direction<2;direction++) { // 0 = forward, 1 = rev comp
            for (int i=0;i<stop;i++){
                int j;
                for(j=0;j<size_seeds;j++)
                    seed[j]=read[i+j]; seed[j]='\0';// read the seed
//                printf("query seed = %s\n", seed);
                // if the seed is indexed in the fragments:
                if(get_seed_info(seeds_count,seed,&offset_seed,&nb_seeds)){
                    //	    if(hash_entry_by_key(seeds, seed, (void **) (& mapping_positions)) !=0){
                    //	      cell_mapsembler * c = mapping_positions->first;
                    // for each occurrence of this seed on the fragment:
                    int ii;
                    for (ii=offset_seed; ii<offset_seed+nb_seeds; ii++) {
                        couple * value = &(seed_table[ii]);
                        Fragment_Starting* fragment = all_fragments[value->a];
                        
                        if(!fragment->Mapped){
                            fragment->Mapped=hash_create(1000);
                            test_alloc(fragment->Mapped);
                        }
                        
                        pwi = value->b-i; // starting position of the read on the fragment.
                        
//                        printf("with seed %s occurring pos %d on read and %d on fragment\n", seed, i, value->b);
                        
                        // overview of the situation:
                        
                        //        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;  fragment
                        //        <---------> b
                        //                   [--------]                     seed
                        //             ******************************       read
                        //             <----> i
                        //        <---> pwi
                        
                        // if the tuple read/fragment/position was not already mapped nor already detected as unmappable, try to map.
                        if(!is_position_already_tested(fragment, pwi)){// &&
                            //		   previous_mapping_position_far_enougth (fragment->Mapped, read, pwi)){
                            read_coherence_value = read_coherent_enhanced(pwi, value->b, fragment->fragment_sequence, read, substitutions_allowed);
                            //		  printf("test read coherence between %s and %s %d %d = %d\n", fragment->fragment_sequence, read, read_id, pwi, read_coherence_value);
                            //		  printf("with seed %s occurring pos %d on read and %d on fragment\n", seed, i, value->b);
                            if(read_coherence_value == 1){ // tuple read fragment->fragment_sequence position is read coherent
                                hash_add_int_to_list(fragment->Mapped, read, pwi);
                            } // end tuple read fragment->fragment_sequence position is read coherent
                            
                            set_position_tested(fragment, pwi);
                        } // end tuple read/fragment->fragment_sequence/position was not already mapped nor already detected as unmappable
                        //					else printf("%s %s %d %d already tested \n", fragment->fragment_sequence, read, read_id, pwi);
                        //		c=c->prox;
                    } // end for each occurrence of this seed on the fragment->fragment_sequence
                } // end if the seed is indexed in the fragment->fragment_sequences:
            } // end all seeds of the read
            
            bv->reinit();
            revcomp(read,size_read);
        } // end each direciton
    } // end each read
    
    
}

void Fragment_Bank::show_alignments(){
    
    //int print_fragment_cover(hash_t map, char * fragment, signed int left){
    int j,p, space=70;
    char *key;
    int *position_fragment;
    list_ *mapping_positions;
    hash_iter iterator;
    
    //  for(vector<Fragment>::iterator fragment_it=all_fragments.begin(); fragment_it!= all_fragments.end(); ++(fragment_it)){
    for(int i=0;i<all_fragments.size();i++){
        Fragment * fragment_it=all_fragments[i];
        char * fragment_seq = fragment_it->fragment_sequence;
        printf("\n");
        for(j=0;j<space;j++) printf(" "); printf("%s = Fragment", fragment_seq);
        printf("\n");
        if(fragment_it->Mapped==NULL){
            for(j=0;j<space;j++) printf(" "); printf("NO_MAPPED_READS\n");
            continue;
        }
        hash_t map = fragment_it->Mapped;
        iterator=hash_iterator_init(map); // get the first element of the Mapped hashmap
        // Go through the hashmap Mapped and for each fragment
        // get the reads that have seeds that match somewhere on this fragment
     
        if((long int)iterator!=-1){
            while(!hash_iterator_is_end(map, iterator)){     // go through all the mapped reads
                hash_iterator_return_entry(map, iterator, &key, (void**)&mapping_positions); // get key(read) and corresponding mapping positions (pwi)
                if(mapping_positions){
                    cell_mapsembler * c=mapping_positions->first;
                    //print mapping positions
                    while(c!=NULL){
                        //	    couple *value= (couple *) c->val;
                        position_fragment=(int *)c->val;
                        //position_fragment=(int) *(c->val);
                        for(j=0;j<space+(*position_fragment);j++) printf(" ");
                        for(p=0;p<(signed)strlen(key);p++){
                            if((*position_fragment)+p>=0 && (*position_fragment)+p<(signed)strlen(fragment_seq) && key[p] != fragment_seq[(*position_fragment)+p]) printf("%c", tolower(key[p]));
                            else printf("%c", key[p]);
                        }
                        printf(" %d\n", (*position_fragment));
                        
                        c=c->prox;
                    }
                }
                iterator=hash_iterator_next(map, iterator);
            }
        }
    }
}

void Fragment_Bank::output_mapped_reads(const char * output_file_name){
    
    //int print_fragment_cover(hash_t map, char * fragment, signed int left){
    int j,p;
    char *key;
    FILE * out = fopen(output_file_name, "w");
    if(out == NULL){
        fprintf(stderr, "Cannot open file %s for writing mapped reads, sorry\n", output_file_name);
        return;
    }
    int *position_fragment;
    list_ *mapping_positions;
    hash_iter iterator;
    
    //  for(vector<Fragment>::iterator fragment_it=all_fragments.begin(); fragment_it!= all_fragments.end(); ++(fragment_it)){
    for(int i=0;i<all_fragments.size();i++){
        Fragment * fragment_it=all_fragments[i];
        
        char * fragment_seq = fragment_it->fragment_sequence;
        if(fragment_it->Mapped==NULL){
            fprintf(out, ">Fragment %s NO_MAPPED_READS\n", fragment_seq);
            continue;
        }
        fprintf(out, ">Fragment %s\n", fragment_seq);
        
        
        hash_t map = fragment_it->Mapped;
        
        
        iterator=hash_iterator_init(map); // get the first element of the Mapped hashmap
        // Go through the hashmap Mapped and for each fragment
        // get the reads that have seeds that match somewhere on this fragment
    
        
        if((long int)iterator!=-1){
            while(!hash_iterator_is_end(map, iterator)){     // go through all the mapped reads
                hash_iterator_return_entry(map, iterator, &key, (void**)&mapping_positions); // get key(read) and corresponding mapping positions (pwi)
                if(mapping_positions){
                    cell_mapsembler * c=mapping_positions->first;
                    //print mapping positions
                    while(c!=NULL){
                        //	    couple *value= (couple *) c->val;
                        position_fragment=(int *)c->val;
                        //position_fragment=(int) *(c->val);
                        fprintf(out, ">read - occ %d\n", (*position_fragment));
                        for(p=0;p<(signed)strlen(key);p++){

                            if((*position_fragment)+p>=0 && (*position_fragment)+p<(signed)strlen(fragment_seq) && key[p] != fragment_seq[(*position_fragment)+p]) fprintf(out,"%c", tolower(key[p]));
                            else fprintf(out,"%c", key[p]);
                        }
                        fprintf(out,"\n");
                        
                        c=c->prox;
                    }
                }
                iterator=hash_iterator_next(map, iterator);
            }
        }
    }
    fclose(out);
}
