#ifndef SBCLIB_SQUAREMATRIX
// Operations for square matrices (multiplication, exponentiation, inversion)
#include <array_2d.c>
#include <array_1d.c>
#include <math.h>
#include <rnd/Maxi.c>
#include <rnd/Mini.c>

REAL **matrixid(const int m)
{ // Returns the square m-by-m identity matrix
	int i,j; REAL **ret=array_2d(m,m);
	for (i=m-1;i>=0;i--) for (j=m-1;j>=0;j--) ret[i][j]=(i==j);
	return ret;
}

REAL **matrixdiag(const REAL *d)
{
	int n,m=array_1d_m(d); REAL **ret=array_2d_zeroed(m,m);
	for (n=m-1;n>=0;n--) ret[n][n]=d[n];
	return ret;
}

REAL **matrixmul(const REAL * const *a,const REAL * const *b)
{ // Constructs AB (also works for compatible oblong matrices)
	int i,j,k,mi=array_2d_xs(a),mj=array_2d_ys(b),mk=array_2d_ys(a);
	REAL t,**ret=array_2d(mi,mj);
	for (i=mi-1;i>=0;i--) for (j=mj-1;j>=0;j--)
	{
		t=0;
		for (k=mk-1;k>=0;k--) t+=a[i][k]*b[k][j];
		ret[i][j]=t;
	}
	return ret;
}

void matrixmulinto(REAL **ret,const REAL * const *a,const REAL * const *b)
{ // Writes AB into ret (also works for compatible oblong matrices)
	int i,j,k,mi=array_2d_xs(a),mj=array_2d_ys(b),mk=array_2d_ys(a); REAL t;
	for (i=mi-1;i>=0;i--) for (j=mj-1;j>=0;j--)
	{
		t=0;
		for (k=mk-1;k>=0;k--) t+=a[i][k]*b[k][j];
		ret[i][j]=t;
	}
}

void matrixmulby(REAL **a,const REAL * const *b)
{ // Writes AB overwriting A (only works for square B compatible with A)
	int i,j,k,mi=array_2d_xs(a),mj=array_2d_ys(a); REAL t,*a_i=array_1d(mj);
	for (i=mi-1;i>=0;i--)
	{
		memcpy(a_i,a[i],mj*sizeof(REAL));
		for (j=mj-1;j>=0;j--)
		{
			t=0;
			for (k=mj-1;k>=0;k--) t+=a_i[k]*b[k][j];
			a[i][j]=t;
		}
	}
	array_1d_free(a_i);
}

void matrixmulay(const REAL * const *a,REAL **b)
{ // Writes AB overwriting B (only works for square A compatible with B)
	int i,j,k,mi=array_2d_xs(b),mj=array_2d_ys(b); REAL t,*btj=array_1d(mi);
	for (j=mj-1;j>=0;j--)
	{
		for (i=mi-1;i>=0;i--) btj[i]=b[i][j];
		for (i=mi-1;i>=0;i--)
		{
			t=0;
			for (k=mi-1;k>=0;k--) t+=a[i][k]*btj[k];
			b[i][j]=t;
		}
	}
	array_1d_free(btj);
}

REAL *matrixmulvector(const REAL * const *a,const REAL *b)
{ // Constructs a vector result, works for rectangular A too, but b must be at least long enough for A
	int i,j,mi=array_2d_xs(a),mj=array_2d_ys(a); REAL *ret=array_1d(mi),t;
	for (i=mi-1;i>=0;i--)
	{
		t=0;
		for (j=mj-1;j>=0;j--) t+=a[i][j]*b[j];
		ret[i]=t;
	}
	return ret;
}

REAL *matrixtmulvector(const REAL * const *a,const REAL *b)
{ // Constructs a vector result (Atb) 
	int i,j,mi=array_2d_ys(a),mj=array_2d_xs(a); REAL *ret=array_1d(mi),t;
	for (i=mi-1;i>=0;i--)
	{
		t=0;
		for (j=mj-1;j>=0;j--) t+=a[j][i]*b[j];
		ret[i]=t;
	}
	return ret;
}

void matrixmulvectorby(const REAL * const *a,REAL *b)
{ // A must be square and same size as b
	int i,j,m=array_2d_xs(a); REAL *c=array_1d(m),t;
	for (i=m-1;i>=0;i--)
	{
		t=0;
		for (j=m-1;j>=0;j--) t+=a[i][j]*b[j];
		c[i]=t;
	}
	array_1d_eq(b,c); array_1d_free(c);
}

void matrixvectormulby(REAL *b,const REAL * const *a)
{ // A must be square and same size as b (equivalent to matrixmulvectorby with A^t)
	int i,j,m=array_2d_xs(a); REAL *c=array_1d(m),t;
	for (i=m-1;i>=0;i--)
	{
		t=0;
		for (j=m-1;j>=0;j--) t+=b[j]*a[j][i];
		c[i]=t;
	}
	array_1d_eq(b,c); array_1d_free(c);
}

void matrixtranspose(REAL **a)
{ // In-place, works for square matrices only
	int m=array_2d_xs(a),i,j; REAL t;
	for (i=m-1;i>0;i--) for (j=i-1;j>=0;j--)
		{t=a[i][j]; a[i][j]=a[j][i]; a[j][i]=t;}
}

REAL matrixsupnorm(const REAL * const *a)
{ // Works for any size
	int i,j,mi=array_2d_xs(a),mj=array_2d_ys(a); REAL ret=0;
	for (i=mi-1;i>=0;i--) for (j=mj-1;j>=0;j--) ret=Max(ret,fabs(a[i][j]));
	return ret;
}

REAL matrixsupmetric(const REAL * const *a,const REAL * const *b)
{ // A and B have to be the same size
	int i,j,mi=array_2d_xs(a),mj=array_2d_ys(a); REAL ret=0;
	for (i=mi-1;i>=0;i--) for (j=mj-1;j>=0;j--) ret=Max(ret,fabs(a[i][j]-b[i][j]));
	return ret;
}

REAL matrixtrace(const REAL * const *a)
{
	REAL ret=0;
	for (int n=array_2d_xs(a)-1;n>=0;n--) ret+=a[n][n];
	return ret;
}

REAL matrixdet(const REAL * const *a)
{ // Gaussian elimination keeping track of row swaps
	int n,i,j,m=array_2d_xs(a); REAL **b=array_2d_copy(a); REAL ret=1,t;
	for (n=0;n<m;n++)
	{
//array_2d_print(b);getchar();
		j=n; // Determine the biggest parrot
		for (i=n+1;i<m;i++) if (fabs(b[i][n])>fabs(b[j][n])) j=i;
		if (j!=n) {ret*=-1; for (i=n;i<m;i++) {t=b[n][i]; b[n][i]=b[j][i]; b[j][i]=t;}} // Move parrot into place	
//array_2d_print(b);
//printf("ret = %lg\n",ret);getchar();
		if (b[n][n]!=0) for (i=n+1;i<m;i++) // Now subtract multiples of the parrot off the other rows
		{
			t=b[i][n]/b[n][n];
			for (j=n;j<m;j++) b[i][j]-=t*b[n][j];
		}
//array_2d_print(b);getchar();
	}
	for (n=m-1;n>=0;n--) ret*=b[n][n];
//printf("ret = %lg\n",ret);getchar();
	array_2d_free(b);
	return ret;
}

int invertmatrix(REAL **);

REAL **matrixpow(const REAL * const *a,const int p)
{ // Constructs A^p for integer p (requires squareness)
	if (p<0)
	{
		REAL **ret=matrixpow(a,-p);
		invertmatrix(ret);
		return ret;
	}
	else
	{
		int m=array_2d_xs(a);
		if (p==0) return matrixid(m);
		else
		{
			REAL **ret=array_2d_copy(a),**tmp=array_2d(m,m); int n;
			for (n=8*sizeof(int)-1;!(p>>n);n--);
			for (n--;n>=0;n--)
			{
				matrixmulinto(tmp,ret,ret);
				if ((p>>n)&1) matrixmulinto(ret,tmp,a);
				else array_2d_eq(ret,tmp);
			}
			array_2d_free(tmp);
			return ret;
		}
	}
}

REAL **matrixexp(const REAL * const *a)
{ // Constructs e^A (A has to be square this time!)
	int m=array_2d_xs(a);
	REAL **ret=matrixid(m),**p=array_2d_copy(a),**q=array_2d(m,m); int n,i,j;
	for (i=m-1;i>=0;i--) for (j=m-1;j>=0;j--) ret[i][j]+=p[i][j];
	for (n=2;matrixsupnorm(p)>matrixsupnorm(ret)*1e-16 && n<1000;n++)
	{
		matrixmulinto(q,p,a); array_2d_scale(q,(REAL)1.0/n);
		array_2d_eq(p,q);
		for (i=m-1;i>=0;i--) for (j=m-1;j>=0;j--) ret[i][j]+=p[i][j];
	}
	array_2d_free(p); array_2d_free(q);
	if (n>=1000) {array_2d_free(ret); return array_2d_zeroed(m,m);}
	else return ret;
}

REAL maxeigenvalue(const REAL * const *a,REAL **ev=NULL)
{ // Work out the asymptotic increase factor...
	int m=array_2d_xs(a);
	REAL ret=1,p=1,**b=array_2d_copy(a),**c=array_2d(m,m),t;
	while (p>1e-12)
	{
		t=matrixsupnorm(b);
		if (t==0) {array_2d_free(b); array_2d_free(c); return 0;}
		array_2d_scale(b,1.0/t);
		ret*=pow(t,p);
		matrixmulinto(c,b,b); array_2d_eq(b,c);
		p*=0.5;
	}
	array_2d_free(c);
	if (ev) // Work out eigenvector too
	{
		int n,i,bn; double bt=-1;
		for (n=m-1;n>=0;n--)
		{
			t=0;
			for (i=m-1;i>=0;i--) t=Max(t,fabs(b[i][n]));
			if (t>bt) {bt=t; bn=n;}
		}
		t=0;
		for (n=m-1;n>=0;n--) t+=b[n][bn]*b[n][bn];
		t=1.0/sqrt(t);
		*ev=array_1d(m);
		for (n=m-1;n>=0;n--) (*ev)[n]=b[n][bn]*t;
	}
	array_2d_free(b);
	return ret;
}

// Matrix inversion stuff (used to be <invertmatrix.c>)

int invertmatrix(REAL **a)
{ // (squareness again necessary)
	int m=array_2d_xs(a);
	int i,j,k; REAL x,**ai=array_2d(m,m);
	for (i=m-1;i>=0;i--) for (j=m-1;j>=0;j--) ai[i][j]=(i==j);
	for (i=0;i<m;i++)
	{
		x=0;
		for (j=i;j<m;j++) if (fabs(a[j][i])>=x)
			{k=j; x=fabs(a[j][i]);}
		for (j=i;j<m;j++)
			{x=a[i][j]; a[i][j]=a[k][j]; a[k][j]=x;}
		for (j=m-1;j>=0;j--)
			{x=ai[i][j]; ai[i][j]=ai[k][j]; ai[k][j]=x;}
		if (a[i][i]==0) return 0;
		for (j=i+1;j<m;j++)
		{
			x=-a[j][i]/a[i][i];
			for (k=0;k<m;k++)
				{a[j][k]+=x*a[i][k]; ai[j][k]+=x*ai[i][k];}
			a[j][i]=0;
		}
		x=1.0/a[i][i];
		for (j=0;j<m;j++) {a[i][j]*=x; ai[i][j]*=x;}
		a[i][i]=1;
	}
	for (i=m-2;i>=0;i--) for (j=i+1;j<m;j++)
	{
		x=-a[i][j];
		for (k=m-1;k>=0;k--) ai[i][k]+=x*ai[j][k];
	}
	for (i=m-1;i>=0;i--) for (j=m-1;j>=0;j--) a[i][j]=ai[i][j];
	array_2d_free(ai);
	return 1;
}

int invertmatrix_long(REAL **a_ext)
{ // Useful for checking for rounding errors with the above
	int i,j,k,m=array_1d_m(a_ext); long double x,
		**a=(long double **)malloc(m*sizeof(long double *)),
		**ai=(long double **)malloc(m*sizeof(long double *));
	for (i=m-1;i>=0;i--)
	{
		a[i]=(long double *)malloc(m*sizeof(long double));
		ai[i]=(long double *)malloc(m*sizeof(long double));
		for (j=m-1;j>=0;j--) {a[i][j]=a_ext[i][j]; ai[i][j]=(i==j);}
	}
	for (i=0;i<m;i++)
	{
		x=0;
		for (j=i;j<m;j++) if (fabs(a[j][i])>=x)
			{k=j; x=fabs(a[j][i]);}
		for (j=i;j<m;j++)
			{x=a[i][j]; a[i][j]=a[k][j]; a[k][j]=x;}
		for (j=m-1;j>=0;j--)
			{x=ai[i][j]; ai[i][j]=ai[k][j]; ai[k][j]=x;}
		if (a[i][i]==0) return 0;
		for (j=i+1;j<m;j++)
		{
			x=-a[j][i]/a[i][i];
			for (k=0;k<m;k++)
				{a[j][k]+=x*a[i][k]; ai[j][k]+=x*ai[i][k];}
			a[j][i]=0;
		}
		x=1.0/a[i][i];
		for (j=0;j<m;j++) {a[i][j]*=x; ai[i][j]*=x;}
		a[i][i]=1;
	}
	for (i=m-2;i>=0;i--) for (j=i+1;j<m;j++)
	{
		x=-a[i][j];
		for (k=m-1;k>=0;k--) ai[i][k]+=x*ai[j][k];
	}
	for (i=m-1;i>=0;i--)
	{
		for (j=m-1;j>=0;j--) a_ext[i][j]=ai[i][j];
		free(a[i]); free(ai[i]);
	}
	free(a); free(ai);
	return 1;
}

#ifdef SBCLIB_NZQ
void printQmatrix(const Q **a,const int m)
{
	int i,j;
	for (i=0;i<m;i++)
	{
		for (j=0;j<m;j++) printf("%s\t",Qstr(a[i][j]));
		printf("\n");
	}
	getchar();
}

int invertmatrix_exact(Q **a,const int m)
{ // Exactly the same as invertmatrix but using arithmetic on Q
	int i,j,k,t; Q x,xx,de,**ai=(Q **)malloc(m*sizeof(Q *));
	for (i=m-1;i>=0;i--)
	{
		ai[i]=(Q *)malloc(m*sizeof(Q));
		for (j=m-1;j>=0;j--) ai[i][j]=Qint(i==j);
	}
	for (i=0;i<m;i++)
	{
		x=Qint(-1);
		for (j=i;j<m;j++)
		{
			t=a[j][i].p.neg; a[j][i].p.neg=0;
			if (Qgt(a[j][i],x)) {k=j; Qfree(x); x=Qcopy(a[j][i]);}
			a[j][i].p.neg=t;
		}
		Qfree(x);
		for (j=i;j<m;j++)
			{x=a[i][j]; a[i][j]=a[k][j]; a[k][j]=x;}
		for (j=m-1;j>=0;j--)
			{x=ai[i][j]; ai[i][j]=ai[k][j]; ai[k][j]=x;}
		de=Qcopy(a[i][i]);
		if (Qiszero(de)) {printf("!!!!!!\n"); return 0;}
		for (j=i+1;j<m;j++)
		{
			xx=Qdiv(a[j][i],de);
			for (k=i+1;k<m;k++)
				{x=Qmul(xx,a[i][k]); Qsubeq(a[j][k],x); Qfree(x);}
			for (k=m-1;k>=0;k--)
				{x=Qmul(xx,ai[i][k]); Qsubeq(ai[j][k],x); Qfree(x);}
			Qfree(xx);
			Qfree(a[j][i]); a[j][i]=Qint(0);
		}
		for (j=0;j<m;j++) {Qdiveq(a[i][j],de); Qdiveq(ai[i][j],de);}
		Qfree(de);
	}
	for (i=m-2;i>=0;i--) for (j=i+1;j<m;j++)
	{
		for (k=m-1;k>=0;k--)
		{
			x=Qmul(a[i][j],ai[j][k]);
			Qsubeq(ai[i][k],x); Qfree(x);
		}
	}
	for (i=m-1;i>=0;i--)
	{
		for (j=m-1;j>=0;j--) {Qfree(a[i][j]); a[i][j]=ai[i][j];}
		free(ai[i]);
	}
	free(ai);
	return 1;
}
#endif

REAL *invertsolvepolish(const REAL * const *a,const REAL *b,const char ep=0)
{ // Constructs the solution to a system the "stupid" way by inverting the matrix, but also does root polishing
	int n,i,m=array_1d_m(b); REAL **ai=array_2d_copy(a);
	(ep?invertmatrix_long:invertmatrix)(ai); // ep is for "extra precision"
	REAL *r=array_1d(m),*ret=array_1d_zeroed(m),rr=-1,orr;
	do
	{
		orr=rr; rr=0;
		for (n=m-1;n>=0;n--)
		{
			r[n]=b[n];
			for (i=m-1;i>=0;i--) r[n]-=a[n][i]*ret[i];
			rr+=r[n]*r[n];
		}
		for (n=m-1;n>=0;n--) for (i=m-1;i>=0;i--) ret[n]+=ai[n][i]*r[i];
	}
	while (orr<0 || rr>0 && orr/rr>=1.1);
	array_2d_free(ai); array_1d_free(r);
	return ret;
}

int invertuppermatrix(REAL **a)
{ // A must be a square upper-triangular matrix (a_ij=0 unless i<=j)
	int i,j,k,m=array_2d_xs(a); REAL t,**ai=array_2d_zeroed(m,m);
	for (i=m-1;i>=0;i--) for (j=i;j<m;j++)
	{
		t=(i==j);
		for (k=j-1;k>=0;k--) t-=ai[i][k]*a[k][j];
		if (a[j][j]==0) return 0;
		ai[i][j]=t/a[j][j];
	}
	for (i=m-1;i>=0;i--) for (j=m-1;j>=0;j--) a[i][j]=ai[i][j];
	array_2d_free(ai);
	return 1;
}

// END matrix inversion stuff

int cholesky(REAL **a)
{ // Turns (hopefully SPD matrix) A into upper triangular U such that UtU=A
	int n,i,j,m=array_2d_xs(a),ret=1; REAL q;
	for (n=0;n<m;n++)
	{
		q=a[n][n];
		if (q<0) {q=-q; ret=-1;}
		else if (q==0) return 0;
		q=sqrt(q);
		a[n][n]=q; q=1.0/q;
		for (i=n+1;i<m;i++) {a[i][n]=0; a[n][i]*=q;}
		for (i=n+1;i<m;i++) for (j=n+1;j<m;j++) a[i][j]-=a[n][i]*a[n][j];
	}
	return ret;
} // There is another "direct" way of doing this, calculating the entries of U in turn, too

REAL **matrixata(REAL **a)
{ // Constructs the square (symmetric) matrix AtA; A can be oblong
	int i,j,k,xs=array_2d_xs(a),ys=array_2d_ys(a); REAL **ret=array_2d(ys,ys),t;
	for (i=ys-1;i>=0;i--) for (j=i;j>=0;j--)
	{
		t=0;
		for (k=xs-1;k>=0;k--) t+=a[k][i]*a[k][j];
		ret[i][j]=ret[j][i]=t;
	}
	return ret;
}

REAL **matrixaat(REAL **a)
{ // Constructs the square (symmetric) matrix AAt; A can be oblong
	int i,j,k,xs=array_2d_xs(a),ys=array_2d_ys(a); REAL **ret=array_2d(xs,xs),t;
	for (i=xs-1;i>=0;i--) for (j=i;j>=0;j--)
	{
		t=0;
		for (k=ys-1;k>=0;k--) t+=a[i][k]*a[j][k];
		ret[i][j]=ret[j][i]=t;
	}
	return ret;
}

void matrixqr(const REAL * const *a,REAL **q,REAL **r)
{ // Gives Q,R such that (square) A=QR, QtQ=I (orthonormal) and R is upper ("right") triangular
	int n,i,j,m=array_2d_xs(a); REAL np,dp;
	array_2d_eq(q,a);
	for (i=m-1;i>=0;i--) for (j=m-1;j>=0;j--) r[i][j]=(i==j);
	for (n=0;n<m;n++)
	{ // We orthonormalise Q column-by-column, storing the operations in R
		for (i=0;i<n;i++)
		{
			np=0; dp=0;
			for (j=m-1;j>=0;j--) {np+=q[j][n]*q[j][i]; dp+=q[j][i]*q[j][i];}
			dp=np/dp;
			for (j=m-1;j>=0;j--) {q[j][n]-=dp*q[j][i]; r[i][j]+=dp*r[n][j];}
		}
		dp=0;
		for (j=m-1;j>=0;j--) dp+=q[j][n]*q[j][n];
		np=sqrt(dp); dp=1.0/np;
		for (j=m-1;j>=0;j--) {q[j][n]*=dp; r[n][j]*=np;}
	}
}

int sympair(const int x,const int y)
{ // 00 -> 0, 01 or 10 -> 1, 11 -> 2, 02 or 20 -> 3, etc.
	int a=Maxi(x,y);
	return a*(a+1)/2+Mini(x,y);
}

REAL *sympack(const REAL * const *a)
{ // Assume a was symmetric (or lower triangular) to start with
	int n,r=array_2d_xs(a); REAL *ret=array_1d(sympair(r,0)),*p=ret;
	for (n=0;n<r;n++) {memcpy(p,a[n],(n+1)*sizeof(REAL)); p+=n+1;}
	return ret;
}

REAL **getinvarquad(const REAL * const *a)
{ // Returns an "invariant quadratic" Q such that Q=AtQA (Q=Qt, q00=1), or NULL if unsuccessful
	int i,j,ij,k,l,kl,m=array_2d_xs(a),sm=m*(m+1)/2-1;
	REAL **s=array_2d(sm,sm),*se=array_1d(sm);
	for (i=m-1;i>0;i--) for (j=i;j>=0;j--)
	{
		se[ij=sympair(i,j)-1]=a[0][i]*a[0][j];
		for (k=m-1;k>0;k--)
		{
			s[ij][sympair(k,k)-1]=-a[k][i]*a[k][j];
			for (l=k-1;l>=0;l--)
				s[ij][sympair(k,l)-1]=-(a[k][i]*a[l][j]+a[l][i]*a[k][j]);
		}
		s[ij][ij]++;
	}
	if (!invertmatrix(s)) return NULL;
	REAL **ret=array_2d(m,m),t;
	ret[0][0]=1;
	for (i=m-1;i>0;i--) for (j=i;j>=0;j--)
	{
		t=0; ij=sympair(i,j)-1;
		for (kl=sm-1;kl>=0;kl--) t+=s[ij][kl]*se[kl];
		ret[i][j]=ret[j][i]=t;
	}
	array_2d_free(s); array_1d_free(se);
	return ret;
}

#define SBCLIB_SQUAREMATRIX
#endif
