/*=======================================================================
 * All files in the distribution of the DPS system are Copyright
 * 1996 by the Computational Biology group in the Department of Biological
 * Sciences at Purdue University.  All rights reserved.
 *
 * Redistribution and use in source and binary forms are permitted
 * provided that this entire copyright notice is duplicated in all such
 * copies, and that any documentation, announcements, and other materials
 * related to such distribution and use acknowledge that the software was
 * developed by the Computational Biology group in the Department of
 * Biological Sciences at Purdue University, W. Lafayette, IN by Ingo
 * Steller and Michael G. Rossmann. No charge may be made for copies,
 * derivations, or distributions of this material without the express
 * written consent of the copyright holder.  Neither the name of the
 * University nor the names of the authors may be used to endorse or
 * promote products derived from this material without specific prior
 * written permission.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ANY PARTICULAR PURPOSE.
 *======================================================================*/

/*=====================================================================*
 *                                                                     *
 *                         Data Processing Suit                        *
 *                                                                     *
 *                              dps_index                              *
 *                                                                     *
 *                        Written by Ingo Steller                      *
 *                                                                     *
 *                        File: ind_sort_merge.c                       *
 *                                                                     *
 *=====================================================================*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
/* #include <malloc.h> */
#include "util.h"
#include "index.h"
#include "ind_sort_merge.h"

/* t_vector_exchange(): Exchange two t_vectors				*/

void t_vector_exchange(struct tvector *a, struct tvector *b)

{
	struct tvector tmp;

	/* a to tmp */
	cp_vectorf(a->vec,&(tmp.vec));
	tmp.fit=a->fit;
	tmp.len=a->len;
	/* b to a */
	cp_vectorf(b->vec,&(a->vec));
	a->fit=b->fit;
	a->len=b->len;
	/* tmp to b */
	cp_vectorf(tmp.vec,&(b->vec));
	b->fit=tmp.fit;
	b->len=tmp.len;
}


/* sort_tvectors_on_l(): Sort a given list of tvectors on theirlength	*/

void sort_tvectors_on_l(int t_vectors, struct tvector t[])

{
	int i,j;		/* counters */

	/* Sort by exchange						*/
	for(i=0;i<= t_vectors;i++) {
		for(j=0; j<= t_vectors-1; j++) {
			if (( t[j].len/t[j].fit > t[j+1].len/t[j+1].fit ) ) {
				t_vector_exchange(&t[j],&t[j+1]);
			}
		}
	}


}

/* sort_tvestors_on_n(): Sort a given list of tvectors on the number	*
 *			 of fitting reflections				*/

void sort_tvectors_on_n(int t_vectors, struct tvector t[])

{
	int i,j;		/* counters */

	/* Sort by exchange						*/
	for(i=0;i<= t_vectors;i++) {
		for(j=0; j<= t_vectors-1; j++) {
			if (( t[j].fit < t[j+1].fit ) ) {
				t_vector_exchange(&t[j],&t[j+1]);
			}
		}
	}


}


/* sort_tvestors_on_n_and_len(): Sort a given list of tvectors on the 	*
 *			number of fitting reflections and the on the 	*
 *			length of the vector.				*/

void sort_tvectors_on_n_and_l(int t_vectors, struct tvector t[])

{
	int i,j;		/* counters */

	/* Sort by exchange						*/
	for(i=0;i<= t_vectors;i++) {
		for(j=0; j<= t_vectors-1; j++) {
			if (( t[j].fit < t[j+1].fit ) ||
			    ( ( t[j].fit == t[j+1].fit ) &&
			    ( t[j].len > t[j+1].len  ) ) ) {
				t_vector_exchange(&t[j],&t[j+1]);
			}
		}
	}


}
/* Harry's new version!!
 * merge_tvectors_same_n(): Merge a list of t_vectors. Merge	*
 *		vectors with same length and same or opposite direction *
 *		that have approx the same number of fitting reflections.	 *
 *		Input a list with t_vectors, output			*
 *		a list of merged t_vectors and the number		*
 *		of merged t_vectors					*
 *		The allowed absolute and relative difference is given	*
 *		by MERGE_DIFF_ABS and MERGE_DIFF_REL.			*/

int merge_tvectors_same_n(int t_vectors, struct tvector t[])

{
	int i,j,k;		/* counters */
	struct tvector *res;	/* temporary array for the merged t_vectors */
	struct res_count_struct {
		int count;
		struct tvector *ref;
	} 
	*res_count;		/* In this array the number of t_vectors
			   to merge is stored. e.g. res_count[3].count=5
			   means that in res[3] five vectors are
			   merged. with res_count[3].ref the original
				   t_vector can be found for comparison.     */
	float vec_scale,sin_vector,sign_vector;
	struct vector normvec[2];
	int found_slot;		/* Flag, wheter a vector to merge was found 
					   =1 or not =0.			*/
	int res_vectors;	/* Number of merged vectors and index of
				   res and res_count			*/
	float diff_x, diff_y, diff_z, diff_len,diff_n;	/* temporary differences */

#define SIN_CUTOFF 0.1
#define DIFF_REL   1.05

#ifdef MERGE_TVECTORS_SAME_N_DEBUG
	printf(" MERGE_TVECTORS_SAME_N entered with %d t_vectors...\n", t_vectors);
#endif

	/* Allocate memory for the temporary array */

	res = dps_malloc("merge_tvectors_same_n", "res", sizeof(struct tvector)*(t_vectors+1));

	/* Allocate memory for count array */

	res_count = dps_malloc("merge_tvectors_same_n", "res_count", sizeof(struct res_count_struct)*(t_vectors+1));

	k = -1;

	/* Outer loop - first new vector copied to temporary vector, then compared to each 
     other vector in turn */


	for(i = 0; i <= t_vectors; i++) {

		/* Check to see if it's been set to zero already; if so, go to the next vector
       in the list */

		if(t[i].vec.x != 0.0 && t[i].vec.y != 0.0 && t[i].vec.z != 0.0){
			++k;
			cp_vectorf(t[i].vec,&(res[k].vec));
			res[k].fit = t[i].fit;
			res[k].len = t[i].len;
			res[k].vec.x *= res[k].fit;
			res[k].vec.y *= res[k].fit;
			res[k].vec.z *= res[k].fit;
			res_vectors = 0;
			res_count[k].count = 1;
			res_count[k].ref = &t[i];

			/* Normalize first vector... */

			vec_scale = 1.0 / vec_lenf(t[i].vec);
			normvec[0].x = vec_scale * t[i].vec.x;
			normvec[0].y = vec_scale * t[i].vec.y;
			normvec[0].z = vec_scale * t[i].vec.z;

			/* Now zero this vector before it does any harm */

			t[i].vec.x = 0.0;
			t[i].vec.y = 0.0;
			t[i].vec.z = 0.0;
			t[i].len   = 0.0;
			t[i].fit   = 0.0;

			/* No slot res vector found until now */

			found_slot = 0;

			/* start inner loop across the remaining vectors */

			j = ++i;
			while (j <= t_vectors) {

				/* We should merge the vectors where they are more-or-less
	   parallel or antiparallel and possibly with a similar n. Ingo's 
	   original code checked for the same n but I'm not sure if this is 
	   really the right way to proceed. Sin_vector should be bigger than 
	   some threshold value for the vectors to be considered 
	   non-linear*/

				if(t[j].vec.x != 0.0 && t[j].vec.y != 0.0 && t[j].vec.z != 0.0){

					/* Normalize second vector... */

					vec_scale = 1.0 / vec_lenf(t[j].vec);
					normvec[1].x = vec_scale * t[j].vec.x;
					normvec[1].y = vec_scale * t[j].vec.y;
					normvec[1].z = vec_scale * t[j].vec.z;

					/* Now get the angle between first and second vector */

					sign_vector = (normvec[0].x * normvec[1].x)+
					    (normvec[0].y * normvec[1].y)+
					    (normvec[0].z * normvec[1].z);
					sin_vector = fabs(sin(acos(sign_vector)));
					if (sin_vector <= SIN_CUTOFF){

						/* Check to see if they are the same length more-or-less */

						if (((t[j].len/(res[k].len/res_count[k].count)) < DIFF_REL) &&
						    ((t[j].len/(res[k].len/res_count[k].count)) > 1.0/DIFF_REL)) {
							if(sign_vector>0){
								res[k].vec.x += (t[j].vec.x * t[j].fit);
								res[k].vec.y += (t[j].vec.y * t[j].fit);
								res[k].vec.z += (t[j].vec.z * t[j].fit);
							}
							else {
								res[k].vec.x -= (t[j].vec.x * t[j].fit);
								res[k].vec.y -= (t[j].vec.y * t[j].fit);
								res[k].vec.z -= (t[j].vec.z * t[j].fit);
							}
							res[k].fit   += t[j].fit;
							res[k].len += t[j].len;

							/* Now zero _this_ vector before it does any harm */

							t[j].vec.x = 0.0;
							t[j].vec.y = 0.0;
							t[j].vec.z = 0.0;
							t[j].len   = 0.0;
							t[j].fit   = 0.0;
							++res_count[k].count;
						}
						/* end of length check */
					}
					/* end of parallelism check */
				}
				/* End of test for inner loop vectors which have not been zeroed */
				++j;
			}

			/* End of inner loop around vectors */
			--i;
		}
		/* End of test to see if outer loop vector has been looked at already */
	}
	/* End of outer loop around vectors */

	/* Now we have to loop through the temporary structure "res" to calculate
     the weighted mean of each vector and its weight and length... */

	res_vectors = k--;
	for(i = 0; i <= res_vectors; i++) {
		t[i].vec.x = res[i].vec.x / res[i].fit;
		t[i].vec.y = res[i].vec.y / res[i].fit;
		t[i].vec.z = res[i].vec.z / res[i].fit;
		t[i].len = vec_lenf(t[i].vec);
		t[i].fit = res[i].fit / res_count[i].count;
	}

	/* Free memory */

	free(res);
	free(res_count);

	/* return the new t_vector count */

#ifdef MERGE_TVECTORS_SAME_N_DEBUG
	printf(" MERGE_TVECTORS_SAME_N left with %d merged vectors...\n",res_vectors);
#endif
	return(res_vectors);
}


/* merge_tvectors_same_n(): Merge a list of t_vectors. Merge	*
 *		vectors with same length and same or opposite direction *
 *		that have approx the same number of fitting reflections.	*
 *		Input a list with t_vectors, output			*
 *		a list of merged t_vectors and the number		*
 *		of merged t_vectors					*
 *		The allowed absolute and relative difference is given	*
 *		by MERGE_DIFF_ABS and MERGE_DIFF_REL.			*/

int merge_tvectors_same_nm(int t_vectors, struct tvector t[])

{
	int i,j;		/* counters */
	struct tvector *res;	/* temporaery array for the merged t_vectors */
	struct res_count_struct {
		int count;
		struct tvector *ref;
	} 
	*res_count;		/* In this array the number of t_vectors
				   to merge is stored. e.g. res_count[3].count=5
				   means that in res[3] five vectors are
				   merged. with res_count[3].ref the original
					   t_vector can be found for comparison.     */
	int found_slot;		/* Flag, wheter a vector to merge was found 
					   =1 or not =0.			*/
	int res_vectors;	/* Number of merged vectors and index of
					   res and res_count			*/
	float diff_x, diff_y, diff_z, diff_len,diff_n;	/* temporaery differences */

#ifdef MERGE_TVECTORS_SAME_N_DEBUG
	printf(" MERGE_TVECTORS_SAME_N entered with %d t_vectors...\n", t_vectors);
#endif

	/* Allocate memory for the temporary array */
	res = dps_malloc("merge_tvectors_same_n", "res", sizeof(struct tvector)*(t_vectors+1));

	/* Allocate memory for count array */
	res_count = dps_malloc("merge_tvectors_same_n", "res_count", sizeof(struct res_count_struct)*(t_vectors+1));

	/* Copy first vector into temporary array */
	cp_vectorf(t[0].vec,&(res[0].vec));
	res[0].fit=t[0].fit;
	res[0].len=t[0].len;
	res_vectors=0;
	res_count[0].count=1;
	res_count[0].ref=&t[0];

	/* Now look for every t_vector (without t[0] see above) for a 
	   fitting res vector. If we find one, add t_vector to res vector and
	   increase res_count of this vector. If we find no res vector, append
	   the t_vector to the res vector list and increase res_vectors. 
	   Initialize a new res_count record.				 */
	for(i=1;i<=t_vectors;i++) {
		/* No slot res vector found until now */
		found_slot=0;
		/* For all vectors in res get difference and compare until
		   a res vector is found or no more res vectors are 
		   available.						*/
		for(j=0;j<=res_vectors;j++) {
			/* Only merge for same n. Therfore check for n 	*/
			diff_n=fabsf(t[i].fit-res_count[j].ref->fit);
			if (diff_n<=fabsf(res_count[j].ref->fit*MERGE_DIFF_REL)) {
				/* Get the differences between ith t_vector and jth
				   merged vector.				*/
				diff_x=fabs(t[i].vec.x-res_count[j].ref->vec.x);
				diff_y=fabs(t[i].vec.y-res_count[j].ref->vec.y);
				diff_z=/*fabs(t[i].vec.z-res_count[j].ref->vec.z)*/0.0;
				diff_len=fabs(t[i].len-res_count[j].ref->len);
				/* Decide if ith t_vector can be merged with jth merged vector.
				   For this all differences must be smaler than the relative
				   or absolute cutoff. The absolute cutoff will make problems
				   with smaller unit cells. Perhaps change to relative regarding
				   d_max????					
				   The length will not be copied, but recalculated afterwards	*/
				if  (((diff_x <= fabs(res_count[j].ref->vec.x*MERGE_DIFF_REL)) || 
				    (diff_x < MERGE_DIFF_ABS ))					&&
				    ((diff_y <= fabs(res_count[j].ref->vec.y*MERGE_DIFF_REL)) || 
				    (diff_y <  MERGE_DIFF_ABS ))					&&
				    ((diff_z <= fabs(res_count[j].ref->vec.z*MERGE_DIFF_REL)) || 
				    (diff_z <  MERGE_DIFF_ABS ))					&&
				    ((diff_len <= fabs(res_count[j].ref->len*MERGE_DIFF_REL)) || 
				    (diff_len <  MERGE_DIFF_ABS )) ) {
					res[j].vec.x=res[j].vec.x+t[i].vec.x;
					res[j].vec.y=res[j].vec.y+t[i].vec.y;
					res[j].vec.z=res[j].vec.z+t[i].vec.z;
					++res_count[j].count;
					found_slot=1;
#ifdef MERGE_TVECTORS_SAME_N_DEBUG
					printf(" t-vector %d merged into res vector %d \n",i,j);
#endif
					break;
				}
				/* Get the differences between ith t_vector and jth
				   merged vector in opposite direction.		*/
				diff_x=fabs(t[i].vec.x+res_count[j].ref->vec.x);
				diff_y=fabs(t[i].vec.y+res_count[j].ref->vec.y);
				diff_z=fabs(t[i].vec.z+res_count[j].ref->vec.z);
				diff_len=fabs(t[i].len-res_count[j].ref->len);
				/* Decide if ith t_vector can be merged with jth merged vector.
				   For this the all differences must be smaler than the relative
				   or absolute cutoff. The absolute cutoff will make problems
				   with smaller unit cells. Perhaps change to relative regarding
				   d_max????					*/
				if  (((diff_x <= fabs(res_count[j].ref->vec.x*MERGE_DIFF_REL)) || 
				    (diff_x < MERGE_DIFF_ABS ))					&&
				    ((diff_y <= fabs(res_count[j].ref->vec.x*MERGE_DIFF_REL)) || 
				    (diff_y <  MERGE_DIFF_ABS ))					&&
				    ((diff_z <= fabs(res_count[j].ref->vec.x*MERGE_DIFF_REL)) || 
				    (diff_z <  MERGE_DIFF_ABS ))					&&
				    ((diff_len <= fabs(res_count[j].ref->vec.x*MERGE_DIFF_REL)) || 
				    (diff_len <  MERGE_DIFF_ABS )) ) {
					res[j].vec.x=res[j].vec.x-t[i].vec.x;
					res[j].vec.y=res[j].vec.y-t[i].vec.y;
					res[j].vec.z=res[j].vec.z-t[i].vec.z;
					++res_count[j].count;
					found_slot=1;
#ifdef MERGE_TVECTORS_SAME_N_DEBUG
					printf(" t-vector %d merged into res vector %d \n",i,j);
#endif
					break;
				}

			}
		}
		/* Check if a slot was found...if not, add ith t_vector to
		   the merged vector list.				*/
		if (found_slot == 0) {
			res_vectors++;
			cp_vectorf(t[i].vec,&res[res_vectors].vec);
			res[res_vectors].fit=t[i].fit;
			res_count[res_vectors].count=1;
			res_count[res_vectors].ref=&t[i];
#ifdef MERGE_TVECTORS_SAME_N_DEBUG
			printf(" t-vector %d is new res vector %d \n",i,res_vectors);
#endif
		}
	}
	/* Now we have in the summed up t_vectors in the merged record. 
	   In the next step they will be divided by the number of merged 
	   vectors and stored back in the t_vector array t.		*/
	for(i = 0; i <= res_vectors; i++) {
		t[i].vec.x=res[i].vec.x/res_count[i].count;
		t[i].vec.y=res[i].vec.y/res_count[i].count;
		t[i].vec.z=res[i].vec.z/res_count[i].count;
		t[i].len=vec_lenf(t[i].vec);
		t[i].fit=res[i].fit;
	}

	/* Free memory */
	free(res);
	free(res_count);

	/* return the new t_vector count */
#ifdef MERGE_TVECTORS_SAME_N_DEBUG
	printf(" MERGE_TVECTORS_SAME_N left with %d merged vectors...\n",res_vectors);
#endif
	return(res_vectors);
}

/* cut_tvector_list(): Cuts the tvector list at a sharp drop in n.	*/

int cut_tvector_list(int t_vectors, float cut_crit, struct tvector t[])

{
	int i;
	float sum, new_sum;
	float third=1.0/3.0;
	float two_third=1.0-third;
#ifdef CUT_TVECTOR_LIST
	printf(" CUT_TVECTOR_LIST entered with %d t_vectors...\n", t_vectors);
	printf(" CUT_CRIT = %f\n",cut_crit);
#endif

	sum=(t[0].fit+t[1].fit+t[2].fit);

	for(i=3;i<=t_vectors;i++) {
		new_sum=two_third*sum+t[i].fit;
#ifdef CUT_TVECTOR_LIST
		printf("t[%d].fit %f sum: %f new_sum: %f\n", i, t[i].fit, sum, new_sum);
#endif
		if (fabsf(new_sum-sum) > sum*cut_crit) {
#ifdef CUT_TVECTOR_LIST
			printf(" CUT_TVECTOR_LIST left with %d t_vectors...\n", i);
#endif
			return(i-1);
		}
		else {
			sum=new_sum;
		}
	}
#ifdef CUT_TVECTOR_LIST
	printf(" CUT_TVECTOR_LIST left with unchanged number of t_vectors ...\n");
#endif
	return(t_vectors);
}
