#define VERSION "V0.1"

#include<math.h>
#include <stdlib.h>
#include<stdio.h>
#include <string.h>
#include<semaphore.h>
/* #include<ieeefp.h>
 */
#define ARG(a) (*a)

#ifdef LINUX
  #define _FILE_OFFSET_BITS  64
#endif

int C_HST_RECON_1OVER(
   int *num_bins,        /* Number of bins in each sinogram  */
   int *num_projections, /* Number of projections in  each sinogram */
   int *oversampling,    /*  0 = Linear, 1 = Nearestpixel,
                             2, 3, etc. equal over-sampling factor  */ 
   int *start_x,         /* First X-pixel in region required by user    */
   int *start_y,         /* First Y-pixel in region required by user    */
   int *num_x,           /* Number of X-pixels in reconstruction region */
   int *num_y,           /* Number of Y-pixels in reconstruction region */
   int *y_start, 
   int *y_end,
   int *X_STARTS,   /* X_Start tells the limits for each y ( idem for X_ENDS) */
   int *X_ENDS,
   float* angle_offset,
   float* angle_increment,
   float* axis_position,
   int *dim_exponts,
   float  *EXPONENTS, 
   int    *dim_fft,
   float  *FILTER,
   int *dim_over,
   float* SINOGRAMS ,
   float *time_cpu_filter, 
   float *time_elapse_filter, 
   float *time_cpu_back,
   float *time_elapse_back, 
   float *SLICE,
   float *OVERSAMPLE,
   float *WORK,
   int *status,
   int nthreads,
   int thread,
   sem_t *sema,
   sem_t *semb
   )
{


  int i,j;
  int True=1;
  float angle;
  float cos_angle ,sin_angle;

#ifdef FLOAT_ARI

  #define A_float float

  #ifdef LINUX
    #define FLOAT_TO_INT(in,out)  \
      __asm__ __volatile__ ("fistpl %0" : "=m" (out) : "t" (in) : "st") ;
  #else
    #define FLOAT_TO_INT(in,out)  \
      out=in ;
  #endif

  #define FLOAT_TO_INT_R(a,b)  FLOAT_TO_INT(a,b)
  #define ZOOM 

#else

  #define A_float long_long


  #ifdef LINUX

    #define ZOOM *0XFFFF

    #define FLOAT_TO_INT(in,out)  \
      out=((in)>>16) ;

  #else
    #define ZOOM *0XFFFFFFFF

    #define FLOAT_TO_INT(in,out)  \
      out=((in)>>32) ;
    #define FLOAT_TO_INT_R(in,out) \
      FLOAT_TO_INT(in,out)  ;
  #endif

#endif

  
#ifdef SOLARIS
  /*  c89 -c  -KPIC -Xt -fast -xarch=v8plusb -xtarget=ultra3 -xexplicitpar -D SOLARIS -D INT_ARI -D NO_MANUAL_UNROLL c_hst_recon_1over.c
   */
  
 /*  old_fp=fpsetround(FP_RZ);   */  /* set rounding mode, return previous */ 
  #define long_long long long
#else
  #define long_long long
#endif
  
  #define FLOAT_TO_INT_R(a,b)  FLOAT_TO_INT(a,b)


  A_float increment_position, increment_position_2, increment_position_3;
  A_float increment_position_4, increment_position_5, increment_position_6;
  A_float increment_position_7, increment_position_8;
  A_float slice_position;
  
  int hst_ffa8__(int* , int* , int* , float *, int* , float *, int *) ;
  int hst_ffs8__(int* , int* , int* , float *, int* , float *, int *) ;
  
  int y;
  int x_start, x_end,num_xelem ;
  long_long start_address;
  long_long  projection, address;
  long_long  bin,bin1,bin2,bin3;
  long_long  bin4,bin5,bin6,bin7;


#ifdef FLOAT_ARI
  float  fbin ,fbin1,fbin2,fbin3;
  float  fbin4,fbin5,fbin6,fbin7;
#else
  #define fbin bin
  #define fbin1 bin1
  #define fbin2 bin2
  #define fbin3 bin3
  #define fbin4 bin4
  #define fbin5 bin5
  #define fbin6 bin6
  #define fbin7 bin7
#endif

  int semvalue;
  
  /*fp_rnd old_fp;
   */


  printf(" c_hst_recon_1over modifie par Christian \n");
  
  printf(" sono in thread %d \n", thread);
  
  if(thread==0) {
    memset(SLICE,0, ARG(num_x)* ARG(num_y) *sizeof(float) );
  }
  
  for(projection=0; projection < *num_projections; projection++) {
    if(thread==0) {    
      if(projection%100==0)
        printf("%d\n",projection);
    }
    
    angle = *angle_offset + projection * (*angle_increment);
    
    if(thread==0) { 
      memcpy(WORK,SINOGRAMS + projection * ARG(num_bins),
	     ARG(num_bins) * sizeof(float));
      memset(WORK + ARG(num_bins),0,
	     (ARG(dim_fft) - ARG(num_bins)) * sizeof(float));
	}	
    if(thread==0 && FILTER[1] >= 0.) {   
      hst_ffa8__(dim_fft, dim_fft, dim_exponts, EXPONENTS, &True, WORK, status);
      /******************************************************
       * Multiply the Fourier transformed array with filter *
       ******************************************************/    
      for(i=0;i<ARG(dim_fft);i++) {
        WORK[i] = WORK[i] * FILTER[i];
      }
      
      /***************************************************
       * Inverse Fourier transform to give convolution   *
       ***************************************************/
      
      hst_ffs8__(dim_fft, dim_fft, dim_exponts, EXPONENTS, &True, WORK, status);
    }
    if(thread==0) { 
      WORK[ARG(num_bins)]=WORK[ARG(num_bins)-1];
      for(i=0; i<ARG(num_bins); i++) {
        for(j=0; j< ARG(oversampling); j++) {
          OVERSAMPLE[i*ARG(oversampling) + j] = 
            (WORK[i]*(ARG(oversampling)-j) + WORK[i+1]*j) / (ARG(oversampling));
        }
      }
    }
    
/*  !     Set extrapolated values */
/*          Do bin = (num_bins - 1) * oversampling + 1, num_bins * oversampling */
/*             OVERSAMPLE(bin) = upper_value */
/*          End Do */


    cos_angle = cos(angle);
    sin_angle = sin(angle);
    increment_position = (A_float)( cos_angle * (*oversampling) ZOOM);
    increment_position_2 =(A_float) ( increment_position*2);
    increment_position_3 =(A_float) ( increment_position*3);
    increment_position_4 = (A_float)( increment_position*4);
    increment_position_5 = (A_float)( increment_position*5);
    increment_position_6 = (A_float)( increment_position*6);
    increment_position_7 = (A_float)( increment_position*7);
    increment_position_8 = (A_float)( increment_position*8);

    /**********************/
    /* all must meet here */
    if(thread!=0) {
      sem_wait(sema);
      sem_wait(sema);
    } else {
/*      printf(" semaforo \n"); */
      for(i=0; i<2*nthreads-2; i++) {
        while(1) {
          sem_getvalue(sema,&semvalue);
          if(semvalue==0)
            break;
        }
        sem_post(sema);
      }
/*  printf(" semaforo A OK\n"); */
    }
    /*****************************/

    for(y = *y_start - 1; y < *y_end; y++) {
      if(((y / 100 ) % nthreads) != thread)
        continue;
      x_start = X_STARTS[y]-1;
      x_end = X_ENDS[y]-1;
      num_xelem = x_end - x_start;
 
      slice_position = (A_float) ( (*oversampling) * ((*axis_position) + 
        ((x_start + *start_x  - 1) + 0.5 - *axis_position) * cos_angle - 
        ((y + *start_y - 1) + 0.5 - *axis_position) * sin_angle -0.5 )  ZOOM);




/*  !        Calculate position of back-projected 1st element */
/*             slice_position = Real(oversampling) * (axis_position + & */
/*               (Real(x_start + start_x - 1) - 0.5 - axis_position) * cos_angle - & */
/*               (Real(y + start_y - 1) - 0.5 - axis_position) * sin_angle - 0.5) & */
/*               + 0.99999 */


  /* start_address = (y - 1) * (*num_x) + x_start; */

      start_address = (y ) * (*num_x) + x_start;
      address = start_address ; 
      j=0;

#ifndef NO_MANUAL_UNROLL

      for(; j < num_xelem - 7; j += 8) {

        fbin = slice_position;
	fbin1 = slice_position+ increment_position;
	fbin2 = slice_position+ increment_position_2;
	fbin3 = slice_position+ increment_position_3;

        FLOAT_TO_INT_R(fbin,bin);
        FLOAT_TO_INT_R(fbin1,bin1);
        FLOAT_TO_INT_R(fbin2,bin2);
        FLOAT_TO_INT_R(fbin3,bin3);
     
        SLICE[address  ] = SLICE[address  ] + OVERSAMPLE[bin]; 
        SLICE[address+1] = SLICE[address+1] + OVERSAMPLE[bin1]; 
        SLICE[address+2] = SLICE[address+2] + OVERSAMPLE[bin2]; 
        SLICE[address+3] = SLICE[address+3] + OVERSAMPLE[bin3];
     

        fbin4 = slice_position+ increment_position_4;
	fbin5 = slice_position+ increment_position_5;
	fbin6 = slice_position+ increment_position_6;
	fbin7 = slice_position+ increment_position_7;

        FLOAT_TO_INT_R(fbin4,bin4);
        FLOAT_TO_INT_R(fbin5,bin5);
        FLOAT_TO_INT_R(fbin6,bin6);
        FLOAT_TO_INT_R(fbin7,bin7);

 

        SLICE[address+4  ] = SLICE[address+4] + OVERSAMPLE[bin4]; 
        SLICE[address+5  ] = SLICE[address+5] + OVERSAMPLE[bin5]; 
        SLICE[address+6  ] = SLICE[address+6] + OVERSAMPLE[bin6]; 
        SLICE[address+7  ] = SLICE[address+7] + OVERSAMPLE[bin7];
     
        slice_position = slice_position + increment_position_8;
        address += 8;
      }      
      for(; j < num_xelem - 3; j += 4) {
        FLOAT_TO_INT(slice_position,bin);
        FLOAT_TO_INT(slice_position+ increment_position,bin1);
        SLICE[address] = SLICE[address] + OVERSAMPLE[bin]; 
        SLICE[address+1] = SLICE[address+1] + OVERSAMPLE[bin1]; 
        FLOAT_TO_INT(slice_position+ increment_position_2,bin2);
        FLOAT_TO_INT(slice_position+ increment_position_3,bin3);
        SLICE[address+2] = SLICE[address+2] + OVERSAMPLE[bin2]; 
        SLICE[address+3] = SLICE[address+3] + OVERSAMPLE[bin3];
        slice_position = slice_position + increment_position_4;
        address+=4;
      }      

#endif

      for(; j < num_xelem; j += 1) {
        FLOAT_TO_INT(slice_position,bin);
        SLICE[address] = SLICE[address] + OVERSAMPLE[bin]; 

        slice_position = slice_position + increment_position;
        address += 1;

      }   
    }
    /**********************/
    /* all must meet here */
    if(thread!=0) {
      sem_wait(semb);
      sem_wait(semb);
    } else {
/*      printf(" semaforo \n"); */
      for(i = 0; i < 2 * nthreads - 2; i++) {
        while(1) {
          sem_getvalue(semb, &semvalue);
          if(semvalue==0)
            break;
        }
        sem_post(semb);
      }
/*      printf(" semaforo B OK\n"); */
    }
    /*****************************/    

/*    printf(" semaforo 2 OK \n"); */
  }
/*  printf("sum %e\n", sum); */



#ifdef SOLARIS
  /* fpsetround(old_fp);    */ /* set rounding mode, return previous */
#endif

  return 1;
}



