#ifndef SBCLIB_SVD
// Singular-value decomposition of a possibly-rectangular matrix A = UDV where U,V are orthogonal
// Would be faster with shifting/"Wilkinson's Shift" but that requires a more explicit version of bidiagonal_sweep
#include <squarematrix.c>
#include <rnd/sq.c>

void hhmulleft(REAL **a,const REAL *u)
{ // Replaces A by (I-2uu^t)A = A - u(2u^tA), O(N^2 or XY)
	int x,y,X=array_2d_xs(a),Y=array_2d_ys(a);
	REAL *ua=array_1d(Y),t;
	for (y=Y-1;y>=0;y--)
	{
		t=0;
		for (x=X-1;x>=0;x--) t+=u[x]*a[x][y];
		ua[y]=2.0*t;
	}
	for (x=X-1;x>=0;x--) for (y=Y-1;y>=0;y--) a[x][y]-=u[x]*ua[y];
	array_1d_free(ua);
}

void hhmulright(REAL **a,const REAL *v)
{ // Replaces A by A(I-2vv^t) = A - (2Av)v^t, O(N^2 or XY)
	int x,y,X=array_2d_xs(a),Y=array_2d_ys(a);
	REAL *av=array_1d(X),t;
	for (x=X-1;x>=0;x--)
	{
		t=0;
		for (y=Y-1;y>=0;y--) t+=a[x][y]*v[y];
		av[x]=2.0*t;
	}
	for (x=X-1;x>=0;x--) for (y=Y-1;y>=0;y--) a[x][y]-=av[x]*v[y];
	array_1d_free(av);
}

void hhstepleft(REAL **a,const int sx,const int sy,REAL ** const b=NULL)
{
	int x,X=array_2d_xs(a);
	if (sx+1>=X) return; // You need to check sy<Y when calling though
	REAL d=0,x1=a[sx][sy];
	for (x=sx;x<X;x++) d+=sq(a[x][sy]);
	d=sqrt(d);
	if (x1==d) return; // Already pointing in the right direction, nothing to do!
	REAL *h=array_1d(X),k=1.0/sqrt(2.0*d*(d-x1));
	for (x=0;x<sx;x++) h[x]=0;
	h[sx]=k*(x1-d);
	for (x=sx+1;x<X;x++) h[x]=k*a[x][sy];
	hhmulleft(a,h);
	if (b) hhmulright(b,h);
	array_1d_free(h);
}

void hhstepright(REAL **a,const int sx,const int sy,REAL ** const b=NULL)
{
	int y,Y=array_2d_ys(a);
	if (sy+1>=Y) return; // You need to check sx<X when calling though
	REAL d=0,x1=a[sx][sy];
	for (y=sy;y<Y;y++) d+=sq(a[sx][y]);
	d=sqrt(d);
	if (x1==d) return;
	REAL *h=array_1d(Y),k=1.0/sqrt(2.0*d*(d-x1));
	for (y=0;y<sy;y++) h[y]=0;
	h[sy]=k*(x1-d);
	for (y=sy+1;y<Y;y++) h[y]=k*a[sx][y];
	hhmulright(a,h);
	if (b) hhmulleft(b,h);
	array_1d_free(h);
}

void bidiagonalise(REAL **a,REAL ***pu,REAL ***pv)
{ // O(N^3)
	int X=array_2d_xs(a),Y=array_2d_ys(a);
	*pu=matrixid(X); *pv=matrixid(Y);
	for (int n=0;n<X && n<Y;n++)
	{
		hhstepleft(a,n,n,*pu); 
		hhstepright(a,n,n+1,*pv);
	}
}

void givensrot(const REAL a,const REAL b,REAL *pc,REAL *ps,REAL *pr)
{ // Demmel & Kahan algorithm for a matrix [c s][-s c] that rotates (a,b) to (r,0)
	if (a==0) {*pc=0; *ps=1; *pr=b; return;}
	REAL t,t1;
	if (fabs(a)>=fabs(b))
	{
		t=b/a; t1=sqrt(t*t+1);
		*pc=1.0/t1; *ps=t/t1; *pr=a*t1;
	}
	else
	{
		t=a/b; t1=sqrt(t*t+1);
		*ps=1.0/t1; *pc=t/t1; *pr=b*t1;
	}
}

void givensmulleft(REAL **a,const int i,const int j,const REAL cth,const REAL sth)
{ // A -> GA where G is [cth -sth][sth cth] in rows/cols i,j
	int n,Y=array_2d_ys(a); REAL ain;
	for (n=Y-1;n>=0;n--)
	{
		ain=a[i][n];
		a[i][n]=cth*ain-sth*a[j][n];
		a[j][n]=sth*ain+cth*a[j][n];
	}
}

void givensmulright(REAL **a,const int i,const int j,const REAL cth,const REAL sth)
{ // A -> AG where G is [cth -sth][sth cth] in rows/cols i,j
	int n,X=array_2d_xs(a); REAL ani;
	for (n=X-1;n>=0;n--)
	{
		ani=a[n][i];
		a[n][i]=ani*cth+a[n][j]*sth;
		a[n][j]=ani*-sth+a[n][j]*cth;
	}
}

void bidiagonal_sweep(REAL *d,REAL *e,REAL **u,REAL **v)
{ // O(N^2) step
	int n,m=array_1d_m(d);
	REAL os,oc=1,c=1,s,r;
	for (n=0;n+1<m;n++)
	{
		givensrot(c*d[n],e[n],&c,&s,&r);
		givensmulleft(v,n,n+1,c,-s);
		if (n>0) e[n-1]=r*os;
		givensrot(oc*r,d[n+1]*s,&oc,&os,&d[n]);
		givensmulright(u,n,n+1,oc,os);
	}
	if (array_2d_xs(v)>array_2d_xs(u))
	{
		givensrot(c*d[m-1],e[m-1],&c,&s,&r);
		givensmulleft(v,m-1,m,c,-s);
		e[m-2]=r*os;
		d[m-1]=r*oc;
		e[m-1]=0;
	}
	else
	{
		e[m-2]=c*d[m-1]*os;
		d[m-1]=c*d[m-1]*oc;
	}
}

void bidiagonal_vectors(const REAL * const *a,REAL **pd,REAL **pe)
{ // Extracts the diagonal and superdiagonal as vectors
	int n,m=Mini(array_2d_xs(a),array_2d_ys(a));
	*pd=array_1d(m); *pe=array_1d(m);
	for (n=m-1;n>=0;n--) (*pd)[n]=a[n][n];
	for (n=m-2;n>=0;n--) (*pe)[n]=a[n][n+1];
	if (m<array_2d_ys(a)) (*pe)[m-1]=a[m-1][m];
	else (*pe)[m-1]=0;
}

REAL *svd(const REAL * const *a0,REAL ***pu,REAL ***pv)
{ // Returns the "diagonal" (vector of singular values), also constructs U, V matrices
	REAL **a=array_2d_copy(a0);
	bidiagonalise(a,pu,pv);
	REAL *ret,*e;
	bidiagonal_vectors(a,&ret,&e);
	array_2d_free(a);
	while (array_1d_supnorm(e)>1e-15) bidiagonal_sweep(ret,e,*pu,*pv);
	array_1d_free(e);
	return ret;
}

void svdfree(REAL *s,REAL **u,REAL **v) {array_1d_free(s); array_2d_free(u); array_2d_free(v);}

REAL *svdsolve_inner(const REAL *s,const REAL * const *u,const REAL * const *v,const REAL *b,const REAL lambda=0)
{ // If you've already calculated the SVD, use this O(N^2) routine
	REAL ll=lambda*lambda,*utb=array_1d_copy(b); int um=array_1d_m(utb);
	matrixvectormulby(utb,u);
	int n,m=array_2d_xs(v); REAL *x=array_1d(m);
	for (int n=m-1;n>=0;n--)
		if (n>=um) x[n]=0;
		else if (ll>0 || s[n]!=0) x[n]=s[n]*utb[n]/(s[n]*s[n]+ll);
		else x[n]=0;
	array_1d_free(utb);
	matrixvectormulby(x,v);
	return x;
}

REAL *svdsolve(const REAL * const *a,const REAL *b,const REAL lambda=0)
{ // Ax~=b; lambda->0 for exact solution, lambda->infinity for tiny steepest descent step
	REAL **u,**v,*s=svd(a,&u,&v),
		*ret=svdsolve_inner(s,u,v,b,lambda);
	array_1d_free(s); array_2d_free(u); array_2d_free(v);
	return ret;
}

#define SBCLIB_SVD
#endif
