#################################################################################
## constrained non-parametric additive count least-squares model
##
##  mu_i = f_1(x_1) + ... + f_L(x_L) + Z*beta 
##
##  y_i = mu_i + e_i
##
##  Variable Selection:  chooses variables and shapes.
##        for each x_l, can be increasing, decreasing, convex, concave
## 
##        there are (5^L)(2^K) models to choose from
##
##  Input xmat -- columns are x_1,...x_L
##        zmat -- parametrically modeled covariates (full col rank -- does NOT contain one-vector)
##                    if no parametrically modeled covariates, choose zmat=0
##        y -- response vector of zeros and ones 
##
##  Output: etahat -- constr ML estimate of eta vector
##          muhat -- estimate of probability vector (computed from etahat)
##          betahat -- coefficients for parametrically modeled predictors
##          fhat -- matrix of (possibly non-unique) constrained components
##
##   (sum of columns of fhat) + (zmat %*%betahat) = etahat
##
##  Also gives CIC and E_0(D) for each model
##
#################################################################################
constrlsqcic=function(y,xmat,zmat){
	n=length(y)
	sm=1e-6
	capl=length(xmat)/n
	if(round(capl,8)!=round(capl,1)){print("Incompatible dimensions")}
	capk=length(zmat)/n
	if(capk<1){capk=0}
	if(round(capk,8)!=round(capk,1)){print("Incompatible dimensions")}
#  make the matrix of models
	nm=0
	mdl1=matrix(0,nrow=5^capl,ncol=capl)
	nums=0:4
	for(l in 1:5^capl){
		nm=nm+1
		base5=l
		for(d in capl:1){
			mult=5^(d-1)
			digit=max(nums[mult*nums<base5])
			base5=base5-mult*digit
			mdl1[nm,capl-d+1]=digit
		}
	}	
	if(capk==0){
		mdl=mdl1
	}else{
		mdl=matrix(0,nrow=2^capk*nm,ncol=capl+capk)
		for(k in 1:(2^capk)){
			mdl[((k-1)*nm+1):(k*nm),1:capl]=mdl1
			for(i in 1:nm){
				mdl[(k-1)*nm+i,(capl+1):(capl+capk)]=getbin(k,capk)
			}
		}
	}
	nm=2^capk*5^capl
##
## now loop through models
##
	edf0=1:nm
	cic=1:nm
	llh=1:nm
#   start with null model
	mu0=mean(y)
	edf0[1]=1
	llh[1]=log(sum((mu0-y)^2))
	cic[1]=llh[1]+log(1+2/(n-1))
	for(imod in 2:nm){
#  get variables for the model
		xin=1:(capl+capk)<0
		xin[1:capl]=mdl[imod,1:capl]>0
		cin=xin[1:capl]
		shapes=mdl[imod,xin]
		if(sum(cin)==0){xm=0}else{xm=xmat[,cin]}
		if(capk>0){zin=mdl[imod,(capl+1):(capl+capk)]>0}else{zin=FALSE}
		if(sum(zin)==0){zm=0}else{zm=zmat[,zin]}
		if(sum(zin)==1){zm=matrix(zm,ncol=1)}
		if(sum(cin)==1){xm=matrix(xm,ncol=1)}
#  fit the model
		ans=	constrlsq(y,xm,zm,shapes,1000)
		edf0[imod]=ans$edf0
		llh[imod]=ans$llh
		cic[imod]=ans$llh+log(1+2*edf0[imod]/(n-ans$d0-1.5*(edf0[imod]-ans$d0)))
		print(imod);print(cic[imod])
	}


}

########################################################################
###  fit the constrained GAM & get edf0
########################################################################
constrlsq=function(y,xmat,zmat,shapes,nsim){
	n=length(y)
	capl=length(xmat)/n
	if(capl<1){capl=0}
	if(round(capl,8)!=round(capl,1)){print("Incompatible dimensions")}
	capk=length(zmat)/n
	if(capk<1){capk=0}
	if(round(capk,8)!=round(capk,1)){print("Incompatible dimensions")}
# get basis functions for the constrained components
	if(capl>=1){
		del1=makedelta(xmat[,1],shapes[1])
		if(capl==1){delta=del1;m1=length(del1)/n;varlist=1:m1*0+1}else{
			m1=length(del1)/n
			var1=1:m1*0+1
			for(i in 2:capl){
			 	del2=makedelta(xmat[,i],shapes[i])
			 	m2=length(del2)/n
			 	delta=rbind(del1,del2)
			 	varlist=1:(m1+m2)*0
			 	varlist[1:m1]=var1
			 	varlist[(m1+1):(m1+m2)]=(1:m2)*0+i
			 	var1=varlist
			 	m1=m1+m2
			 	del1=delta
			}
		}
		capm=length(delta)/n
# make constraint matrix 
		amat=matrix(0,nrow=capm,ncol=capm+capk)
		for(i in 1:capm){amat[i,capk+i]=1}
		if(sum(shapes>2)>0&capk>0){
			bigmat=rbind(1:n*0+1,t(zmat),t(xmat[,shapes>2]),delta)
			np=1+capk+sum(shapes>2)
		}else if(sum(shapes>2)>0&capk==0){
			bigmat=rbind(1:n*0+1,t(xmat[,shapes>2]),delta)
			np=1+sum(shapes>2)
		}else if(sum(shapes>2)==0&capk>0){
			bigmat=rbind(1:n*0+1,t(zmat),delta)
			np=capk+1
		}else if(sum(shapes>2)==0&capk==0){
			bigmat=rbind(1:n*0+1,delta)
			np=1
		}else{print("error in capk,shapes")}
	}else{
		if(capk>0){bigmat=rbind(1:n*0+1,t(zmat));capm=0;np=1+capk}
	}
	if(capl+capk>0){
		ans=hingep(y,bigmat,np)
		etahat=t(bigmat)%*%ans$coefs
		zcoefs=ans$coefs[1:np]
		if(capl>0){
			dcoefs=ans$coefs[(np+1):(capm+np)]
			thvecs=matrix(nrow=capl,ncol=n)
			ncon=1
			for(i in 1:capl){
				thvecs[i,]=t(delta[varlist==i,])%*%dcoefs[varlist==i]
				if(shapes[i]>2){ncon=ncon+1;thvecs[i,]=thvecs[i,]+zcoefs[ncon]*xmat[,i]}
			}
		}
		llh=log(sum((y-etahat)^2))
		etakeep=etahat
### get edf0
		if(capl>0){
			dfs=1:nsim
			for(isim in 1:nsim){
				ysim=rnorm(n)
				ans=hingep(ysim,bigmat,np)
				dfs[isim]=sum(abs(ans$coefs)>0)
			}
			dfmean=mean(dfs)
		}else{dfmean=capk}
	}else{dfmean=1;etahat=1:n*0+mean(y)}
	ccnt=new.env()
	ccnt$etahat=etakeep
	ccnt$edf0=dfmean
	ccnt$llh=llh
	ccnt$d0=np
	ccnt$etacomps=thvecs
	ccnt$zcoefs=ans$coefs[1:np]
	ccnt
}


#######################################################
# cone projection code: hinge algorithm in R
# the linear space contained in the constraint set
# is spanned by first p rows of sigma, other 
#  rows of sigma are generators of the cone
#   special version with possibility of NO constraints!
######################################################
hingep=function(y,sigma,p){
	n=length(y)
	vmat=t(sigma[1:p,])
	m=length(sigma)/n-p
	if(p==1){vmat=matrix(vmat,ncol=1)}
	if(m>0){
		sm=1e-4
		h=1:(m+p)<0
		h[1:p]=TRUE
		obs=1:(m+p)
		check=0
		a=solve(t(vmat)%*%vmat)%*%t(vmat)%*%y
		theta=vmat%*%a
		b2=sigma%*%(y-theta)
		if(max(b2)>sm){
			i=min(obs[b2==max(b2)])
			h[i]=TRUE
#			print(max(b2));print(i)
		}
		if(max(b2)<=sm){
			check=1
		}
		nrep=0
		while(check==0&nrep<500){
#			print(nrep)
#			print(obs[h])
#			print(p)
			nrep=nrep+1
			xmat=sigma[h,]
			if(length(xmat)==n){xmat=matrix(sigma[h,],ncol=n)}
			a=solve(xmat%*%t(xmat))%*%xmat%*%y
			if(length(a)>p){
			if(min(a[(p+1):length(a)])<(-sm)){
				avec=1:(m+p)*0
		 		avec[h]=a
				i=max(obs[avec==min(avec[(p+1):(p+m)])])
				h[i]=FALSE  #;print("removing");print(i)
				check=0
			}
			if(min(a[(p+1):length(a)])>(-sm)) {
				check=1
				theta=t(xmat)%*%a
				b2=sigma%*%(y-theta)
#			obs=1:m
				if(max(b2)>sm){
					i=min(obs[b2==max(b2)])
					check=0
					h[i]=TRUE
#					print(max(b2))
				}
			}}
		}
	}else{
		avec=solve(t(vmat)%*%vmat)%*%t(vmat)%*%y
		theta=vmat%*%avec
		nrep=1
	}
	ans=new.env()
	ans$yhat=theta
	if(m>0){
		avec=1:(m+p)*0
		avec[h]=a
	}
	ans$coefs=avec
	ans$nrep=nrep
	ans
}

#######################################################
#  make cone edges
#######################################################
makedelta=function(x,sh){
	n=length(x)
	xt=(x-min(x))/(max(x)-min(x))
	xt=round(xt,10)
	xs=sort(xt)
# find unique x values
	xu=unique(xs)
	nu=length(xu)
	if(sh==1|sh==2){
		delta=matrix(nrow=nu-1,ncol=n)
		for(i in 1:(nu-1)){
			delta[i,xt<=xu[i]]=0;delta[i,xt>xu[i]]=1
			delta[i,]=delta[i,]-mean(delta[i,])
		}
		if(sh==2){delta=-delta}
	}else if(sh==3|sh==4){
		delta=matrix(nrow=nu-2,ncol=n)
		for(i in 1:(nu-2)){
			delta[i,xt<=xu[i+1]]=0
			delta[i,xt>xu[i+1]]=(xt[xt>xu[i+1]]-xu[i+1])
		}
		xp=cbind(1:n*0+1,x)
		pp=xp%*%solve(t(xp)%*%xp)%*%t(xp)
		delta=delta-delta%*%pp
		if(sh==4){delta=-delta}
	}
	delta
}


###################################################
#  function to find binary representation of number                   #
##################################################
getvars=function(num){
	i=num
	digits=0
	power=0
	while(digits==0){
		if(i<2^power){digits=power}
		power=power+1
	}
	binry=1:digits*0
	if(num>0){binry[1]=1}
	i=i-2^(digits-1)
	power=digits-2
	for(p in power:0){
		if(i>=2^p){
			i=i-2^p
			binry[digits-p]=1
		}
	}
	binry
}
getbin=function(num,capl){
	br=getvars(num-1)
	digits=length(br)
	binrep=1:capl*0
	binrep[(capl-digits+1):capl]=br
	binrep
}