/***************************************************************************
                          Sparsa.cc  -  description
                             -------------------
    begin                : Tue Feb 1 2000
    copyright            : (C) 2000 by Alessandro MIRONE
    email                : mirone@lure.u-psud.fr
 ***************************************************************************/


#include<string.h>
#include<string.h>
#include<stdio.h>
#include <stdlib.h>
#include<iostream.h>
#include<math.h>
#include<complex.h>
#include<nMatrix/nMatrix.h>
#include<nMatrix/nLapack.h>
#include"Sparsa.h"

// #define DEBUG(a)
#define DEBUG(a) a

MatriceSparsa::~MatriceSparsa()
{

};

void MatriceSparsa::Moltiplica(double *ris, double *vect  )
{
	cout << " Problema :MatriceSparsa::Moltiplica\n";
	exit(0);
};

void MatriceSparsa::solveCGSym( double *x, double *b, double tol, int MAXIT, double *initial=0)
{
	cout << " Problema :MatriceSparsa::solveCGSym\n";
	exit(0);
};	

void MatriceSparsa::trasforma( double x, double b)
{
	cout << " Problema :MatriceSparsa::trasforma\n";
	exit(0);
};
	
void MatriceSparsa::copiasu(MatriceSparsa * & m)
{
	cout << " Problema :MatriceSparsa::copiasu\n";
	exit(0);
};	


Lanczos::Lanczos() {
	matrice=0;
	shiftata=0;
}

Lanczos::~Lanczos() {
	if(shiftata) delete shiftata;
}

int Lanczos::cerca(int nd, double shift, double tole) {
		
	if(shiftata) delete shiftata;
	matrice->copiasu(shiftata);
	shiftata->trasforma(1.0,shift);
	steps.setMatrice(shiftata,4*nd);

	int m=4*nd;
	double *iniziale;
	
	iniziale=new double[steps.dim];
	for(int j=0; j< steps.dim; j++) {
		iniziale[j]= random()*1.0/RAND_MAX;
	}
	
	int nc=0;
	int k=0;
	steps.setNoOldDiags();
	DEBUG(cout << " passeggio \n";)
	steps.passeggia(k,m,iniziale);
	while(nc<nd) {
		DEBUG(cout << "diagonalizzo \n";)
		steps.diagonalizza(k,m);
		DEBUG(cout << "diagonalizzo OK\n";)
		nc=steps.convergeds(m, 2.0e-16);
		
	 	DEBUG(cout << " per ora nc = " << nc << endl;)
		if(k){	
			if( steps.lineaestnulla(k, tole) ) break;
		}
		if((nc+2*nd)>=m)  k=m-1;
	 	else k=nc+2*nd;
	 	steps.ricipolla(k,m);
	  	DEBUG(cout << " passeggio di nuovo  \n";)
		steps.passeggia(k,m,steps.q[m]);
	}	
	DEBUG(cout << " soddisfatto\n";)
	delete iniziale;
	return k;
}

void LanczosSteps::ricipolla(int k, int m) {

	for(int i=0; i<k;i++) {
		alpha[i]= eval(i,i) ;
	}

	for(int i=0; i<k;i++) {
		beta  [i]=beta[m-1]*evect( m-1,i );
		// cout <<"beta " << beta  [i] << endl;
	}
	
	static uMatrix<double> a,b,E,o;
	
	a.resize(dim,k);
	b.resize(dim,m);
	E.resize(m,k, evect.address());
	// e.resize(dim,k ,evect.address());

	for(int i=0; i<m;i++) {
		memcpy( b.address()+ i*dim, q[i], dim*sizeof(double) );
	}
	// cout << " m = " << m << endl;
	// cout << " k = " << k<< endl;
	// cout << " dim = " << dim<< endl;
	a= b*E ;
	
	for(int i=0; i<k;i++) {
		memcpy(  q[i],   a.address()+ i*dim,   dim*sizeof(double) );
	}
	memcpy(  q[k],q[m],   dim*sizeof(double) );
	// esibisce i prodotti scalari
	DEBUG(for(int i=0; i<=k ; i++) { cout << " scalare " << scalare(q[k],q[i], dim)<< endl;})
	// !! trattamento degli omega!!!!!!!
	o.resize(m,m);
	for(int i=0; i<m;i++) {
		memcpy( o.address()+ i*m,  omega[i] ,  m*sizeof(double)  );
	}
	o= o*evect ;
	for( int i=0; i<k;i++) {
		omega[i][k]=omega[k][i]=o(i,k);
	}	
	
	o=hTranspose(evect)* o ;
	for(int i=0; i<k;i++) {
		memcpy(omega[i] ,   o.address()+ i*m,  k*sizeof(double)  );
	}
	DEBUG(cout << "  *************************** \n" << endl;)
}

int  LanczosSteps::convergeds(int m, double tole)
{
	static uMatrix<double> a,b;
	a.resize(m);
	b.resize(m);
	
	for(int j=1;j<m; j++)
		{
			for(int i=0; i<m-j; i++)
				{
					if(fabs(eval(i,i))<fabs(eval(i+1,i+1)) ) {
					    DEBUG(cout << i << " " << j << endl;)
						double dum=eval(i,i);
						eval(i,i)=eval(i+1,i+1);
						eval(i+1,i+1)=dum;
						memcpy(a.address(), evect.address()+i*m, m*sizeof(double));
						memcpy(b.address(), evect.address()+(i+1)*m, m*sizeof(double) );
						
						memcpy(evect.address()+i*m,b.address(),  m*sizeof(double) );
						memcpy(evect.address()+(i+1)*m, a.address(),  m*sizeof(double) );
					}
				}
		}
	DEBUG(for(int j=1;j<m; j++){		cout << alpha[j]<< endl;}	)
	int res=0;
	if(olddiags) {
		while(res<m) {
			DEBUG(cout << " eval  " <<eval(res,res)<< " old " << oldalpha[res] << endl;)
			if( fabs( eval(res,res)-oldalpha[res]  ) /  fabs( oldalpha[res] ) >tole ) break;
			res++;
		}
	}

	for(int j=0;j<m; j++)
	{
		oldalpha[j]=eval(j,j);
	}	
	olddiags=1;
	return res;
}


void  LanczosSteps::GramSchmidt(double* z, int n )
{
	double s;
	for(int h=0; h<4; h++) {
		for(int i=0; i<n; i++)
			{
				s=scalare(z,q[i],dim);
				somma(z,q[i],-s,dim);				
			}
	}	
};

int LanczosSteps::lineaestnulla(int k, double tole)
{
	int res=1;
	for(int i=0; i<k;i++) {
		DEBUG(cout << " **************** " << A(k,i) << endl;)
		if( fabs(A(k,i))>tole) {
			res=0;
			break;
		}
	}
	return res;
};

void LanczosSteps::diagonalizza(int k, int m)
{
	A.resize(m,m);
	evect.resize(m,m);
	eval.resize(m,m);
	
	A=0.0;
	
	for(int i=0; i<m;i++)
		{
			A(i,i)=alpha[i];
		}
	for(int i=0;  i<k ; i++)
		{
			A(k,i)=A(i,k)=beta[i];
		}	
	for(int i=k; i<m-1; i++)
		{
			A(i,i+1)=A(i+1,i)=beta[i];
		}	
 	// Eigenvectors are computed here. B contains the eigenvectors and
	// C contains the eigenvalues
	DEBUG(cout << " matrice A \n" <<A << endl;)
	uSymEig(A,evect,eval);
	 DEBUG(cout << " matrice evect \n" <<evect << endl;)
	 DEBUG(cout << " matrice eval    \n" <<eval << endl;)

}


void LanczosSteps::passeggia(int k , int m,  double * iniziale  ) {
	
	if(k<0 || m>nsteps) {
		cout << " problema in passeggia (k<0 || m>nsteps)  \n";
		exit(0);
	}	

	double sn= sqrt(1.0*dim);
	double eu = 1.1e-16;
	double eq= sqrt(eu);
	if(k==0) {
		double norma= scalare( iniziale,iniziale,dim);
		norma=sqrt(norma);
		normalizza(iniziale, norma, dim );
	
		memcpy(p , iniziale, dim*sizeof(double) );
	
		memcpy(q[0] , p, dim*sizeof(double) );
	} else{
		/*
		for(int i=0; i<k; i++) {
			memset(p , 0, dim*sizeof(double) );
			if(inversione==0) {
				matrice->Moltiplica(p, q[i] );
			} else {
				matrice->solveCGSym(p,q[i],tol,maxit);
			}
			// cout << scalare(p,q[k],dim)<< endl;
		 }
		*/
	}
	
	for(int i=k; i<m; i++) {
		memset(p, 0,dim*sizeof(double) );
		if(inversione==0) {
				matrice->Moltiplica(p, q[i] );
		} else {
				matrice->solveCGSym(p,q[i],tol,maxit);
		}
		
		alpha[i]= scalare( p, q[i],dim);
	  cout << " alpha " << alpha[i] << endl		;
		if(i==k) {
			somma(p, q[k],- alpha[k],dim);
			for(int l=0; l<k; l++) {
					somma(p, q[l],- beta[l] , dim);
			}				
		}	else    {
			somma(p,q[i], - alpha[i], dim  );
			somma(p,q[i-1], - beta[i-1], dim  );
		}
		double norma=  scalare( p,p,dim) ;
		beta[i]=sqrt(norma);
		
		omega[i][i]=1;
		double max=0.0;
		if( beta[i]!=0) {
			for(int j=0; j<i+1; j++) {
				omega[i+1][j] = eu*sn;
				if(j<k) {
					double add= 2*  eu*sn   ;
					add += fabs (  alpha[j] -alpha[i]     )*omega[i][j];
					if(i != k ) add +=beta[j]*omega[i][k];
					if(i>0) {
							if(j != i-1) add += beta[i-1]*omega[i-1][j] ;
					}
					// cout << " a " <<add<< " " <<beta[i] << endl;
					omega[i+1][j] += add /beta[i] ;  											
				} else if (j==k) {
					double add= 2*  eu*sn   ;
					add += fabs (  alpha[j] -alpha[i]     )*omega[i][j];
					for(int w=0; w<k;w++) {
						  add += beta[w]*omega[i][w];
					}
					if(i !=(k+1)) add+=beta[k]*omega[i][k+1];
					if(i>0) {
							if(i !=(k+1))  add += beta[i-1]*omega[i-1][k] ;
					}
					// cout << " qua qua 0 " <<add<< " " <<beta[i] << endl;
					omega[i+1][j] += add /beta[i] ;  																
				} else if(j<i ) {
					double add= 2*  eu*sn   ;
					add += fabs (  alpha[j] -alpha[i]     )*omega[i][j];
					if( i != j+1 )  add +=beta[j]*omega[i][j+1];
					if(i>0 && j>0)   {
							add += beta[j-1]*omega[i-1][j-1];
					}
					if(i>0) {
							if( i != j+1 ) add += beta[i-1]*omega[i-1][j] ;
					}
					
					// cout << " qua  1 " <<add<< " " <<beta[i] << endl;
					omega[i+1][j] += add /beta[i] ;  	
				}	else  {
					double add=   eu*sn   ;
					if(i>0) add+= beta[i-1]*omega[i][i-1] ;
					// cout << " 2 " <<add<< " " <<beta[i] << endl;
					omega[i+1][j]+= add /beta[i] ;  				
				}
				omega[j][i+1]=omega[i+1][j];
				max=max+ omega[i+1][j]*omega[i+1][j]         ;
				// if(max<omega[i+1][j]) max=omega[i+1][j];
			}
		}
		// cout << " omega finiti \n" ;
		if( 	beta[i]==0  ||  max> eq*eq) {
			DEBUG(cout << " max " << max << endl;)
			DEBUG(cout << " beta " << beta[i] << endl;)

			if(i>0) {
					// cout << " gram \n";
					GramSchmidt( q[i],  i  );
					// cout << " gram OK \n";
					double d  =		scalare( q[i],q[i] ,dim)    ;
					d =sqrt(d);
        			normalizza( q[i] ,  d ,  dim);
        			memset(p, 0,dim*sizeof(double) );
					if(inversione==0) {
							matrice->Moltiplica(p, q[i] );
					} else {
							matrice->solveCGSym(p,q[i],tol,maxit);
					}
					alpha[i]= scalare( p, q[i],dim);
					for(int j=0; j< dim; j++) {
						p[j]=p[j] - alpha[i]*q[i][j] - beta[i-1]*q[i-1][j];
					}	
			}
			
			// cout << " gram \n";
			GramSchmidt( p,i  );
			// cout << " gram OK \n";
			double d=  scalare( p,p,dim);
			beta[i]=sqrt(d);	

			if( 	beta[i]<
			          (      (i>0) ?      ( eu*sqrt(dim*(alpha[i]*alpha[i]+beta[i-1]*beta[i-1])) )   :      (   eu*sqrt(dim*(alpha[i]*alpha[i])    )))        ) {
				DEBUG(cout << " ripartire con un vettore perpendicolare a caso\n";		)	
				beta[i]=0;
				
				for(int j=0; j< dim; j++) {
					p[j]= random()*1.0/RAND_MAX;
				}	
				
			  GramSchmidt( p,i  );
				double d=  scalare( p,p,dim);
				d=sqrt(d);	
	                     normalizza( p,d,dim);		
			} else{
	                    normalizza( p,beta[i],dim);		
			}
				// cout << " aggiusto gli omega\n";			
			for(int l=0; l<m; l++) {
				omega[i][l]=omega[l][i]=omega[i+1][l]=omega[l][i+1]=0.0;
			}
		} else {
		// cout << "normalizzo \n" ;

		  normalizza( p,beta[i],dim);		
		}
	  memcpy(q[i+1] , p, dim*sizeof(double) );
	}
};


LanczosSteps::~LanczosSteps() {
	liberamemoria();
	cout << " OK \n";
}

LanczosSteps::LanczosSteps() {
	matrice=0;
	dim=0;
	nsteps=0;
	q=0;
	alpha=0;
	oldalpha=0;
	olddiags=0;
	beta=0;
	p=0;
	omega=0;
	inversione=0;
}

void LanczosSteps::assicuramemoria(int d, int ns){
	if(d==dim && ns==nsteps) return;
	
	liberamemoria();
	
	dim=d;
	nsteps=ns;
	
	alpha = new double [nsteps];
	memset(alpha,0, nsteps*sizeof(double));
	
	oldalpha = new double [nsteps];
	memset(oldalpha,0, nsteps*sizeof(double));
	
	beta  = new double [nsteps];
	memset(beta,0, nsteps*sizeof(double));
	
	p = new double [dim];
	memset(p,0, dim*sizeof(double));

	q= new double * [nsteps+1];
	
	omega= new double * [nsteps+1];
		
	for(int i=0; i<nsteps+1;i++) {
		 q[i] = new double [dim];
		memset(q[i],0, dim*sizeof(double));
	}
	for(int i=0; i<nsteps+1;i++) {
		 omega[i]= new double [nsteps+1];
		memset(omega[i],0, (nsteps+1)*sizeof(double));
	}
};

void LanczosSteps::liberamemoria() {
	
	if(alpha) 		delete alpha;
	if(oldalpha) delete oldalpha;
	if(beta) 		delete beta;
	if(p) 			delete p;
	
	if(q && omega) {
		for(int i=0; i<nsteps+1;i++) {
			delete q[i];
		}
		for(int i=0; i<nsteps+1;i++) {
			delete omega[i];
		}
		if(q) delete q;
		if(omega) delete omega;
	}
	
	nsteps=0;
	dim=0;
	alpha=0;
	oldalpha=0;
	beta=0;
	p=0;
	q=0;
	omega=0;
};


#define _UNROLL_

Sparsa3A::Sparsa3A() {
  n=0;
  dim=0;
  nsize=1000;
  coeff=new double[nsize];  
  col=new int[nsize]   ;
  row=new int[nsize]   ;
  nG=0;
  Gmin=0;
  Gmax=0;
};


void Sparsa3A::pulisci() {
  delete 	coeff ;
  delete 	col   ;
  delete 	row   ;

  n=0;
  dim=0;
  nsize=1000;

  coeff=new double[nsize];
  col=new int[nsize]   ;
  row=new int[nsize]   ;
};

void Sparsa3A::gohersch()
{
  if(nG) {
		delete Gmin;
		delete Gmax;
  }
	nG=n;
	Gmin= new double[nG];		
	Gmax= new double[nG];	
	
	for(int i=0; i<nG;i++) {
		Gmin[i]=Gmax[i]=0.0;
	}
	
	for(int i=0; i<n;i++) {
		if(row[i]==col[i]) {
			Gmin[col[i]]=Gmax[col[i]]=coeff[i];
		}
	}
	for(int i=0; i<n;i++) {
		if(row[i]!=col[i]) {
			Gmin[col[i]]-=fabs( coeff[i] );
			Gmax[col[i]]+=fabs( coeff[i] );
		}
	}
}

double Sparsa3A::goherschMin()
{
	double res=0;
	for(int i=0; i<nG;i++) {
		if(i==0) res=Gmin[i];
		if(res>Gmin[i]) res=Gmin[i];
	}
	return res;
}

double Sparsa3A::goherschMax()
{
	double res=0;
	for(int i=0; i<nG;i++) {
		if(i==0) res=Gmax[i];
		if(res<Gmax[i]) res=Gmax[i];
	}
	return res;

}

Sparsa3A::~Sparsa3A() {
	if(nsize) {
	  delete coeff  ;
	  delete col   ;
	  delete row  ;
	}
  if(nG) {
		delete Gmin;
		delete Gmax;
  }
};

void Sparsa3A::ottieniuM(uMatrix<double> &a)
{
     	a.resize(dim,dim);
		a=0.0;
		for(int i=0; i<n;i++)
			{
                  	a(row[i],col[i])= coeff[i];
			}	

};

void Sparsa3A::inizializza(char *file, double fact=1) {
 pulisci();
 somma(file,fact);
}

void Sparsa3A::scrivi(char *file) {
  FILE *f=fopen(file,"w");
  if(!f) {
    printf("problemi per l'aperture del file %s nella routine  Sparsa3A::scrivi(char *file)\n", file);
    exit(-1);
  }

 fprintf(f,"%d\n", n);
 for(int i=0; i<n; i++) {
	 fprintf(f,"%24.15e  %d %d\n", coeff[i],row[i],col[i]);
 }

  fclose(f);
}


void Sparsa3A::scrivi() {
  if(!stdout) {
    printf("problemi per stdout nella routine  Sparsa3A::scrivi()\n");
    exit(-1);
  }

 fprintf(stdout,"%d\n", n);
 for(int i=0; i<n; i++) {
	 fprintf(stdout,"%24.15e  %d %d\n", coeff[i],row[i],col[i]);
 }


}

void Sparsa3A::somma(char *file, double fact) {
   FILE *f=fopen(file,"r");
  if(!f) {
    printf("problemi per l'aperture del file %s nella routine  Sparsa3A::somma(char *file, double fact)\n", file);
    exit(-1);
  }
  int nc;		
  int dum;
  fscanf(f,"%d",  &nc);
  int r,c;
  double a;

  for(int i=0; i<nc; i++) {
    fscanf(f,"%le %d  %d",&a, &r,&c);
     aggiungiElemento(a*fact,r,c);
  }
  fclose(f);
}

void Sparsa3A::inizializza( Sparsa3A &a ) {
	delete 	coeff ;
  delete 	col   ;
  delete 	row   ;

  n=a.n;
  dim=a.dim;
  nsize=a.nsize;

  coeff=new double[nsize];
  col=new int[nsize]   ;
  row=new int[nsize]   ;

	memcpy(coeff, a.coeff, nsize*sizeof(double) );
	memcpy(col, a.col, nsize*sizeof(int) );
	memcpy(row, a.row, nsize*sizeof(int) );
}

void Sparsa3A::inizializza( Sparsa3A &a , double fact) {
	pulisci();
	somma(a,fact);
}

void Sparsa3A::somma( Sparsa3A &a , double fact) {
	for(int i=0; i<a.n; i++) {
		aggiungiElemento(a.coeff[i]*fact, a.row[i], a.col[i] );
	}
}


void Sparsa3A::copiasu(MatriceSparsa * & m)
{
	m=new Sparsa3A ;
	((Sparsa3A *) m)->inizializza(*this );
}


void Sparsa3A::raddoppia(double imag) {
 double *newcoeff;
  int    *newcol;
  int    *newrow;

  newcoeff =  new double [2* nsize+2*dim]      ;
  newcol   =  new int    [ 2*nsize+2*dim]      ;
  newrow   =  new int    [ 2*nsize+2*dim]      ;

  memcpy(newcoeff, coeff, n*sizeof(double));
  memcpy(newcol,   col  , n*sizeof(int));
  memcpy(newrow,   row  , n*sizeof(int));
	
  memcpy(newcoeff+n, coeff, n*sizeof(double));

  for(int i=0; i<n; i++) {
	newcol[i+n]=dim+col[i];
	newrow[i+n]=dim+row[i];
  }

  for(int k=0; k<dim;k++) {
	int pos=2*n+k;
	newrow[pos]=k+dim;
	newcol[pos]=k;
	newcoeff[pos] =imag;
  }
  for(int k=0; k<dim;k++) {
	int pos=2*n+dim+k;
	newrow[pos]=k;
	newcol[pos]=k+dim;
	newcoeff[pos] =-imag;
  }
	
  delete col;
  delete row;
  delete coeff;

  col = newcol;
  row = newrow;
  coeff = newcoeff;

  nsize = 2*nsize+2*dim ;
  n=2*n+2*dim;
  dim=2*dim;
}

void Sparsa3A::aggiungimemoria() {
  double *newcoeff;
  int    *newcol;
  int    *newrow;

  newcoeff =  new double [ nsize+1000]      ;
  newcol   =  new int    [ nsize+1000]      ;
  newrow   =  new int    [ nsize+1000]      ;
  
  memcpy(newcoeff, coeff, nsize*sizeof(double));
  memcpy(newcol,   col  , nsize*sizeof(int));
  memcpy(newrow,   row  , nsize*sizeof(int));

  delete col;
  delete row;
  delete coeff;

  col = newcol;
  row = newrow;
  coeff = newcoeff;

  nsize = nsize + 1000;
}

void Sparsa3A::trasforma(double fattore, double addendo) {
	for(int i=0; i<n; i++) {
		 coeff[i]=fattore*coeff[i];
			if(col[i]==row[i])    coeff[i]+=addendo;
	}
}
double  Sparsa3A::getelement(int i,int j) {
	for(int k=0; k<n;k++) {
		if(row[k]==i && col[k]==j) return coeff[k];
	}
	return 0;
}

/*
void Sparsa3A::cholesky(int *ordine, Sparsa3A & chol) {
	int fatto[dim];
	memset(fatto,0, dim*sizeof(int));
	chol.azzera();
	while() {
		int posmax;
		double max=-1.0e12,dum;
		for(int i=0; i<dim; i++) {
			if(  !fatto[i] &&   (dum=getelement(i,i)) > max) {
				max=dum;
				posmax=i;
			}
		}
		for(int i=0; i<dim; i++) {
			if(fatto[i]) {
				double f= getelement(posmax,i);
				if(f)				dum -= f*f;
			}
		}
		dum=sqrt(dum);
		if(dum) chol.aggiungielemento(dum,posmax,posmax);
		fatto[posmax]=1;
		for(int i=0; i<dim; i++) {
			if(!fatto[i]) {
				
			}
		}
		
		
			
		}
	
	
	}
	

}

*/

void Sparsa3A::riordina() {
  double dum;
  int dumi;
  for(int i=0; i<n-1; i++)
    {
      for(int j=0; j<n-1-i; i++)
	{
	  if ( row[j]>row[j+1] || (row[j]==row[j+1] && col[j]>col[j+1] )  )
	    {
	      dum=coeff[j+1];
	      coeff[j+1]=coeff[j];
	      coeff[j]= dum;

	      dumi=col[j+1];
	      col[j+1]=col[j];
	      col[j]= dumi;

	      dumi=row[j+1];
	      row[j+1]=row[j];
	      row[j]= dumi;
	    }
	}
    }
}

#define EPS 1.0e-14
void Sparsa3A::solveBCG( double *x,double *b, double tol, int itmax,int & iter,double &err)
{
	int itol=1;
	int n=dim;
	void asolve(),atimes();
     long j;
	double ak,akden,bk,bkden=0,bknum,bnrm,zm1nrm,znrm;
	double *p,*pp,*r,*rr,*z,*zz;


	p = new double [n];
	pp= new double [n];
	r = new double [n];
	rr= new double [n];
	z = new double [n];
	zz= new double [n];
	

	iter=0;
	memset(r,0,n*sizeof(double));
	Moltiplica(r,x);

	// atimes(n,x,r,0);


	for (j=0;j<n;j++) {
		r[j]=b[j]-r[j];
		rr[j]=r[j];
	}
	znrm=1.0;
	if (itol == 1) bnrm=sqrt(scalare(b,b,n));

	else if (itol == 2) {
	  // asolve(n,b,z,0);
	  // bnrm=snrm(n,z,itol);
	}
	else if (itol == 3 || itol == 4) {
	  // asolve(n,b,z,0);
	  // bnrm=snrm(n,z,itol);
	  // asolve(n,r,z,0);
	  // znrm=snrm(n,z,itol);
	} else {
	  cout << "illegal itol in linbcg" << endl;
	  exit(0);
	}
	memcpy(z,r,n*sizeof(double) );
	// asolve(n,r,z,0);
	while (iter <= itmax) {
		++(iter);
		zm1nrm=znrm;

		memcpy(zz,rr,n*sizeof(double) );
		// asolve(n,rr,zz,1);
		for (bknum=0.0,j=0;j<n;j++) bknum += z[j]*rr[j];
		if (iter == 1) {
			for (j=0;j<n;j++) {
				p[j]=z[j];
				pp[j]=zz[j];
			}
		}
		else {
			bk=bknum/bkden;
			for (j=0;j<n;j++) {
				p[j]=bk*p[j]+z[j];
				pp[j]=bk*pp[j]+zz[j];
			}
		}
		bkden=bknum;
		memset(z,0,n*sizeof(double));
		Moltiplica(z,p);

		// atimes(n,p,z,0);
		for (akden=0.0,j=0;j<n;j++) akden += z[j]*pp[j];
		ak=bknum/akden;
		transpose();
		memset(zz,0,n*sizeof(double));
		Moltiplica(zz,pp);
		//atimes(n,pp,zz,1);
		transpose();
		for (j=0;j<n;j++) {
			x[j] += ak*p[j];
			r[j] -= ak*z[j];
			rr[j] -= ak*zz[j];
		}
		memcpy(z,r,n*sizeof(double));
		// asolve(n,r,z,0);
		if (itol == 1 || itol == 2) {
			znrm=1.0;

			err= sqrt(scalare(r,r,n));
			// *err=snrm(n,r,itol)/bnrm;
		} else if (itol == 3 || itol == 4) {
		  // znrm=snrm(n,z,itol);
		  // if (fabs(zm1nrm-znrm) > EPS*znrm) {
		  // dxnrm=fabs(ak)*snrm(n,p,itol);
		  // *err=znrm/fabs(zm1nrm-znrm)*dxnrm;
		  //} else {
		  // *err=znrm/bnrm;
		  // continue;
		  // }
		  // xnrm=snrm(n,x,itol);
		  //if (*err <= 0.5*xnrm) *err /= xnrm;
		  //else {
		  //*err=znrm/bnrm;
		  //continue;
		  //}
		}
		// cout << " iter " << iter << "  err " << err << endl;
		if (err <= tol) break;
	}
	if(iter>itmax) cout << " ITMAX superato in BCG\n";
	delete p;
	delete pp;
	delete r;
	delete rr;
	delete z;
	delete zz;
}
#undef EPS

void Sparsa3A::solveCGSym( double *x, double *b, double tol, int MAXIT, double *initial=0)
{
	double  be,g;
	double *r0,*d0,*x0;
	double *r1,*d1 ;
	
	double *spaziodum;
	
	r0 = new double [dim];
	d0 = new double [dim];
	x0 = new double [dim];

	r1 = new double [dim];
	d1 = new double [dim];

	spaziodum = new double [dim];
	
	memset(r0, 0, dim*sizeof(double) );
	
	if (initial ) {
		memcpy(x0, initial, dim*sizeof(double));
	} else {
		memset(x0, 0, dim*sizeof(double) );
		x0[0]=1.0;
	}
	
	Moltiplica(r0, x0);
	reqmrpbCG( r0, b);
	
	memcpy(d0, r0, dim*sizeof(double) );
	
	double r0r0, r1r1,r0r1;
	r0r0=0;
	for(int i=0; i<dim; i++) {
		r0r0+=r0[i]*r0[i];
	}	
	
	double error=1;
	double dener=0;
	int count=0;
	
	for(int i=0; i<dim; i++) dener  += b[i]*b[i];

	while( error> tol ) {
		cout <<"count ************************** " <<   count<< " err " << error  << endl;
		if(count> MAXIT) {
			cout << " MAXIT superato in Sparsa3A::solveCGSym\n";
			exit(0);
		}
		// cout << " r0r0 " << r0r0 << endl;
		// cout << "dAd( d0, spaziodum ) "<< dAd( d0, spaziodum ) << endl;
		g= r0r0/dAd( d0, spaziodum );
		// cout << " g " << g << endl;
		for(int i=0; i<dim; i++) {
			r1[i] = r0[i] - g*spaziodum[i] ;
		}
	
		r1r1=0;
		r0r1=0;
		
		for(int i=0; i<dim; i++) {
			r1r1+=r1[i]*r1[i];
			r0r1+=r0[i]*r1[i];
		}	
		
		be= (r1r1- r0r1)/r0r0 ;
						
		// cout << "be " << be << endl;
		
		error=0.0;
		for(int i=0; i<dim; i++) {
			x0[i] = x0[i] + g*d0[i] ;
			d1[i] = r1[i] + be*d0[i] ;
			error  +=(r1[i])*(r1[i]);
		}
		// cout << " error \n";
		// cout <<" error = " << error<< endl;
		// cout <<" denere " <<  dener << endl;
		error= sqrt(error/dener);
		
		// cout <<" error " <<  error << endl;
		memcpy(r0,r1, dim*sizeof(double) );
		memcpy(d0,d1, dim*sizeof(double) );
		r0r0=r1r1;
		count++;
	}
	
	memcpy(x,x0, dim*sizeof(double));
	delete x0;
	delete r0;
	delete d0;
	
	delete r1;
	delete d1;
	delete spaziodum;
};


void Sparsa3A::solveCGSym2menoImag( double *x, double *ba, double imag, double tol, int MAXIT, double *initial=0)
{
// 	cout << MAXIT << endl;
	double  be,g;
	double *r0,*d0,*x0, *rdum;
	double *r1,*d1 ;
	
	double *spaziodum;
	
	r0 = new double [dim];
	rdum = new double [dim];
	d0 = new double [dim];
	x0 = new double [dim];

	r1 = new double [dim];
	d1 = new double [dim];

	double *preco= new double [dim];
	double *b= new double [dim];
	
	memcpy(b,ba,dim*sizeof(double));
	
	for(int k=0; k<dim; k++) {
		preco[k]=imag;
	}	
	for(int k=0; k<n; k++) {
		preco[ row[k] ] += coeff[k]*coeff[k] ;
	}
	for(int k=0; k<dim; k++) preco[k]=sqrt(preco[k]);
	// for(int k=0; k<dim; k++) preco[k]=1;


	for(int k=0; k<dim; k++) {
		b[k]=b[k]/preco[k];
	}	
	
	for(int k=0; k<n; k++) {
		coeff[k] /= preco[col[k]];
	}
	

	spaziodum = new double [dim];
	
	memset(r0, 0, dim*sizeof(double) );
	memset(rdum, 0, dim*sizeof(double) );
	
	if (initial ) {
		memcpy(x0, initial, dim*sizeof(double));
	} else {
		memset(x0, 0, dim*sizeof(double) );
		x0[0]=1.0;
	}
	for(int k=0; k<dim; k++) {
		x0[k] *= preco[k];
	}

	Moltiplica(rdum, x0);
	transpose();
	Moltiplica(r0, rdum);
	transpose();
	for(int k=0; k<dim; k++) r0[k]  =(r0[k]+ imag*x0[k] /preco[k] /preco[k] ) ;
	
	reqmrpbCG( r0, b);
	
	memcpy(d0, r0, dim*sizeof(double) );
	
	double r0r0, r1r1,r0r1;
	r0r0=0;
	for(int i=0; i<dim; i++) {
		r0r0+=r0[i]*r0[i];
	}	
	
	double error=100;
	double dener=0;
	int count=0;
	
	for(int i=0; i<dim; i++) dener  += b[i]*b[i];

	while( error> tol ) {
		// cout <<"count ************************** " <<   count <1< endl;
		if(count> abs(MAXIT) ) {
			cout << " MAXIT superato in Sparsa3A::solveCGSym\n";
			if(MAXIT >0) exit(0);
			else break;
		}
		// cout << " r0r0 " << r0r0 << endl;
		// cout << "dAd( d0, spaziodum ) "<< dAd( d0, spaziodum ) << endl;
		g= r0r0/dA2menoImag( d0, spaziodum,  imag , preco);
		memset(rdum,0, dim*sizeof(double));
		transpose();
		Moltiplica(rdum, spaziodum);
		transpose();
		for(int i=0; i<dim; i++) spaziodum[i] = (rdum[i] + imag*d0[i]/preco[i]/preco[i] );
		
		// cout << " g " << g << endl;
		for(int i=0; i<dim; i++) {
			r1[i] = r0[i] - g*spaziodum[i] ;
		}
	
		r1r1=0;
		r0r1=0;
		
		for(int i=0; i<dim; i++) {
			r1r1+=r1[i]*r1[i];
			r0r1+=r0[i]*r1[i];
		}	
		
		be= (r1r1- r0r1)/r0r0 ;
						
		// cout << "be " << be << endl;
		
		error=0.0;
		for(int i=0; i<dim; i++) {
			x0[i] = x0[i] + g*d0[i] ;
			d1[i] = r1[i] + be*d0[i] ;
			error  +=(r1[i])*(r1[i]);
		}
		// cout << " error \n";
		// cout <<" error = " << error<< endl;
		// cout <<" denere " <<  dener << endl;
		error= sqrt(error/dener);
		
		// cout <<" error " <<  error << endl;
		memcpy(r0,r1, dim*sizeof(double) );
		memcpy(d0,d1, dim*sizeof(double) );
		r0r0=r1r1;
		count++;
	}
	memcpy(x,x0, dim*sizeof(double));
	for(int i=0; i<dim; i++) x[i]/=preco[i];
	delete x0;
	delete r0;
	delete rdum;
	delete d0;
	
	delete r1;
	delete d1;
	delete spaziodum;
	delete preco;
	delete b;
};



void Sparsa3A::solveCGSym( double *x, double *y,
												   double *b, double *d,
												   double tol, int MAXIT, double *initial=0)
{
	complex<double>  be;
	complex<double> g;
	double *r0,*d0,*x0;
	double *r1,*d1 ;
	
	double *ri0,*di0,*xi0;
	double *ri1,*di1 ;
	
	double *spaziodum, *spaziodumi;
	
	r0 = new double [dim];
	d0 = new double [dim];
	x0 = new double [dim];

	r1 = new double [dim];
	d1 = new double [dim];
	
	ri0 = new double [dim];
	di0 = new double [dim];
	xi0 = new double [dim];

	ri1 = new double [dim];
	di1 = new double [dim];

	spaziodum  = new double [dim];
	spaziodumi = new double [dim];
	
	
	if (initial ) {
		memcpy(x0, initial, dim*sizeof(double));
	} else {
		memset(x0, 0, dim*sizeof(double) );
		x0[0]=1.0;
	}
	
	memset(xi0, 0, dim*sizeof(double) );
		
	memset(r0, 0, dim*sizeof(double) );
	memset(ri0, 0, dim*sizeof(double) );
	
	Moltiplica(r0, x0);
	Moltiplica(ri0, xi0);
	
	for(int i=0; i<dim; i++) {
		r0 [i]  =-r0[i] +b[i] + xi0[i]*d[i];
		ri0[i]  =  -ri0[i] - x0 [i]*d[i];
	}
	
	memcpy(d0 ,  r0, dim*sizeof(double) );
	memcpy(di0, ri0, dim*sizeof(double) );
	
	complex<double> r0r0,  r1r1,r0r1,dumc;
	r0r0=0.0;

	
	for(int i=0; i<dim; i++) {
		dumc=complex<double> ( r0[i] , ri0[i] );
		r0r0  +=  dumc*dumc ;
	}	
	
	double error=1;
	double dener=0;
	int count=0;
	
	for(int i=0; i<dim; i++) dener  += b[i]*b[i];

	while( error> tol ) {
		if(count> abs(MAXIT)) {
			cout << " MAXIT superato in Sparsa3A::solveCGSym\n";
			if(MAXIT>0) exit(0);
			else break;
		}
		
	
		
		g=r0r0/dAd( d0,di0,d , spaziodum, spaziodumi  );
		
		double gr,gi;
		
		gr= 		g.real();
		gi=		g.imag();
		
		for(int i=0; i<dim; i++) {
			r1[i]  =  r0[i] - gr*spaziodum [i]  + gi * spaziodumi[i] ;
			ri1[i] = ri0[i] - gr*spaziodumi[i]  -gi * spaziodum [i] ;
		}
	
		r1r1=0.0;
		r0r1=0.0;
		double add11I=0, add01I=0, add11R=0, add01R=0;
		for(int i=0; i<dim; i++) {
			
			add11R+=r1[i]*r1[i]- ri1[i]* ri1[i];
			add11I +=2*r1[i]*ri1[i];
			
			add01R+=r1[i]*r0[i]- r0[i]* ri1[i];
			add01I +=r0[i]*ri1[i]+r1[i]*ri0[i]  ;
			
			// dumc=complex<double> ( r1[i] , ri1[i] );
			// r1r1+=dumc*dumc;
			// r0r1+=dumc*complex<double> ( r0[i]  , ri0[i] );
		}	
		
		r1r1=complex<double>(add11R,add11I);
		r0r1=complex<double>(add01R,add01I);
		
		be= (r1r1- r0r1)/r0r0 ;
	    double ber,bei;
	    ber=be.real();
	    bei=be.imag();
		
		error=0.0;
		for(int i=0; i<dim; i++) {
			x0[i]  =  x0[i] + gr*d0[i] - gi * di0[i] ;
			xi0[i] = xi0[i] + gr*di0[i] +  gi * d0[i]     ;
			d1[i] = r1[i] + ber*d0[i] -  bei*di0[i];
			di1[i] = ri1[i] + bei*d0[i]+ ber*di0[i]  ;
			error  +=(r1[i])*(r1[i])+ (ri1[i])*(ri1[i]);
		}
		
		error= sqrt(error/dener);
		
		
		memcpy(r0,r1, dim*sizeof(double) );
		memcpy(d0,d1, dim*sizeof(double) );
		
		memcpy(ri0,ri1, dim*sizeof(double) );
		memcpy(di0,di1, dim*sizeof(double) );
		
		r0r0=r1r1;
		count++;
	}
  dAd( x0,xi0,d , spaziodum, spaziodumi  );	
	memcpy(x,x0, dim*sizeof(double));
	memcpy(y,xi0, dim*sizeof(double));
	delete x0;
	delete r0;
	delete d0;
	
	delete xi0;
	delete ri0;
	delete di0;
	
	delete r1;
	delete d1;
	
	delete ri1;
	delete di1;
	delete spaziodum;
};



double  Sparsa3A::dA2menoImag( double *d, double *dum ,double Imag, double * preco) {
	double deno=0;
	memset(dum,0, dim*sizeof(double));
	Moltiplica(dum, d );
	for(int i=0; i<dim; i++) {
		deno += dum[i]*dum[i]+Imag*d[i]*d[i]/preco[i]/preco[i] ;
	}	
	return deno;
}

double  Sparsa3A::dAd( double *d, double *dum ) {
	double deno=0;
	memset(dum,0, dim*sizeof(double));
	Moltiplica(dum, d );
	for(int i=0; i<dim; i++) {
		deno += d[i]*dum[i];
	}	
	return deno;
}

complex<double>  Sparsa3A::dAd(double *d0, double *di0, double *d, double *dum,
															double *dumi ) {
	double denor=0, denoi=0;
	memset(dum,0, dim*sizeof(double));
	Moltiplica(dum, d0 );
	
	for(int i=0; i<dim; i++) {
		denor +=    d0[i]*dum[i];
		denoi += 2*di0[i]*dum[i];
		
		dum[i]+=	-di0[i]*d[i] ;
	
	}	
	
	memset(dumi,0, dim*sizeof(double));
	Moltiplica(dumi, di0 );
	
	for(int i=0; i<dim; i++) {
		denor -=    di0[i]*dumi[i] + 2*di0[i]*d0[i]*d[i] ;
		denoi +=    (d0[i]*d0[i]-di0[i]*di0[i])*d[i];	
		
		dumi[i]+=	 d0[i]*d[i] ;
	
	}	
	
	return complex<double> (denor, denoi) ;
}


void Sparsa3A::reqmrpbCG( double *r, double *b) {
	for(int i=0; i< dim ; i++) {
		r[i] = -r[i] + b[i] ;
	}
}
void somma      (double *a ,    double * b,  double s ,     int n)
{
#ifdef _UNROLL_
  int end;
  end=n/4;
  double r1,r2,r3,r4;
  double b1,b2,b3,b4;
   int ci=0;

  for(int i=0; i<n/4; i++)
    {

	  b1=b[ci];
	  b2=b[ci+1];
	  b3=b[ci+2];
	  b4=b[ci+3];
	
	  r1=s*b1;
	  r2=s*b2;
	  r3=s*b3;
	  r4=s*b4;
	
	  a[ci] +=r1 ;
	  a[ci+1] +=r2;
	  a[ci+2] +=r3;
	  a[ci+3] +=r4;
	

      ci+=4;
    }
  for(int i=4*(n/4); i<n; i++)
    {
      a[i]  += s*b[i];
    }
#else
  for(int i=0; i<n; i++)
    {
      a[i]  += s*b[i];
    }
#endif
};

double scalare(double * a, double *b, int n) {
	double ris=0.0;
#ifdef _UNROLL_
  int end;
  end=n/4;
  double r1,r2,r3,r4;
  double b1,b2,b3,b4;
  double a1,a2,a3,a4;
  int ci=0;

  for(int i=0; i<n/4; i++)
    {
	  a1=a[ci];
	  a2=a[ci+1];
	  a3=a[ci+2];
	  a4=a[ci+3];
	
	  b1=b[ci];
	  b2=b[ci+1];
	  b3=b[ci+2];
	  b4=b[ci+3];
	
	  r1=a1*b1;
	  r2=a2*b2;
	  r3=a3*b3;
	  r4=a4*b4;
	
	  ris=ris+r1+r2+r3+r4;	
      ci+=4;
    }

  for(int i=4*(n/4); i<n; i++)
    {
      ris  += a[i]*b[i];
    }

#else
  for(int i=0; i<n; i++)
    {
       ris  += a[i]*b[i];
    }
#endif
	return ris;
}


void normalizza(double * a, double b, int n) {
#ifdef _UNROLL_
  int end;

  end=n/4;
  int ci=0;

  for(int i=0; i<n/4; i++)
    {
	  a[ci]=a[ci]/b;
	
 	  a[ci+1]=a[ci+1]/b;
 	  a[ci+2]=a[ci+2]/b;
 	  a[ci+3]=a[ci+3]/b;
      ci+=4;
    }

  for(int i=4*(n/4); i<n; i++)
    {
	  a[i]=a[i]/b;
    }

#else
  for(int i=0; i<n; i++)
    {
	  a[i]=a[i]/b;
    }
#endif
}


void Sparsa3A::Moltiplica(double *ris, double *vect  )
{
#ifdef _UNROLL_
  int end;
  end=n/4;
  double v1,v2,v3,v4;
  double c1,c2,c3,c4;
  double a1,a2,a3,a4;
  int ci=0;

  for(int i=0; i<n/4; i++)
    {

      v1=vect[col[ci  ]];
      v2=vect[col[ci+1]];
      v3=vect[col[ci+2]];
      v4=vect[col[ci+3]];

      c1 = coeff[ci  ];
      c2 = coeff[ci+1];
      c3 = coeff[ci+2];
      c4 = coeff[ci+3];

      a1 = c1*v1;
      a2 = c2*v2;
      a3 = c3*v3;
      a4 = c4*v4;

      ris[row[ci  ]]+=a1;
      ris[row[ci+1]]+=a2;
      ris[row[ci+2]]+=a3;
      ris[row[ci+3]]+=a4;

      ci+=4;
    }

  for(int i=4*(n/4); i<n; i++)
    {
      ris[row[i]] += coeff[i]*vect[col[i]];
    }

#else
  for(int i=0; i<n; i++)
    {
      ris[row[i]] += coeff[i]*vect[col[i]];
    }
#endif
};




void Sparsa3A::aggiungiElemento(double a, int r, int c) {
	
  int found=0;
  int i;
  for( i=0; i<n; i++)
    {
      if( row[i]==r && col[i]==c) 
	{
	  found=1;
	  break  ;
	}
    }
  if(found==1) 
    {
      coeff[i]+=a;
    }
  else
    {
      if(n==nsize) aggiungimemoria();
      row[n]=r;
      col[n]=c;

      if(r>dim-1)  dim=r+1;
      if(c>dim-1) dim=c+1;

      coeff[n]=a ;
      n++;
    }
};








void SparsaS::inizializza(Sparsa3A &A) {
  if(ncol) delete ncol;
  if(coeff ) delete  coeff;

  n=0;
  int nhors=0;
  for(int j=0; j<A.n; j++)
    {
      if( A.col[j]>A.row[j]) {
	nhors++;
      }
      else {
      }
      if(A.col[j]>n) n= A.col[j]+1;
      if(A.row[j]>n) n= A.row[j]+1;
    }

  for(int i=0; i<n; i++) A.aggiungiElemento(0.0,i,i);

  A.riordina();

  coeff= new double [n+nhors];
  ncol = new int    [n+nhors];

  for(int i=0; i<n; i++) coeff[i]=0.0;
  
  for(int j=0; j<A.n; j++)
    {
      if( A.col[j]>A.row[j]) {
	coeff[n+nhors]= A.coeff[j];
	ncol [n+nhors]= A.col  [j];
	nhors++;
      }
      else if( A.col[j]== A.row[j]) {
	coeff[A.row[j]] = A.coeff[j];
	if(A.row[j]>0 ) ncol [A.row[j]-1] = n+nhors ;
      }
    }
};



void SparsaS::inizializza(SparsaS &A) {
  if(ncol) delete ncol;
  if(coeff ) delete  coeff;
  n=A.n        ;
  nhors=A.nhors;
  coeff= new double [n+nhors];
  ncol = new int    [n+nhors];
  memcpy(coeff, A.coeff, (n+nhors)*sizeof(double) );
  memcpy(ncol , A.ncol , (n+nhors)*sizeof(int   ) );
};


SparsaS::~SparsaS() {
  if(ncol) delete ncol;
  if(coeff ) delete  coeff;
  n=0     ;
  nhors=0 ;
  coeff=0 ;
  ncol=0  ;
}


void SparsaS::Moltiplica(double *ris, double *vect  )
{
  // lasciata vuota
  // Probabilmente SparsaS servira soltanto per fare 
  // funzionare delle vecchie routine in fortran

}


 SparsaS::SparsaS( double *x, double *b, double tol, int MAXIT, double *initial=0)
{
	cout << " Problema SparsaS::SparsaS \n";
	exit(0);
};	



void SparsaS::copiasu(MatriceSparsa * & m)
{
	cout << " Problema :SparsaS::copiasu\n";
	exit(0);
};	



