\name{bdglm.fit}
\alias{bdglm.fit}
\title{Fit generalized linear model model.}
\description{
Fits generalized linear model (glm) to the data. The function is configurable to accomodate distributed data.
}
\usage{
bdglm.fit(x, y, family = binomial("logit"), weights = NULL, start, etastart = NULL, mustart = NULL, intercept = T, matfun = list(), minfun = "bdcg", control = list(...), ...)
}
\arguments{
  \item{x}{
	data object. It can be a matrix, or a filename in a distributed filesystem.  
	It is never interpreted, but passed to matrix-vector operations.  
	}
  \item{y}{
	response vector. Expected to contain numerical binary data.  
	}
  \item{family}{ 
	glm family defining the type of data and link function. 
	Follows the standard established by \code{\link{glm}}. 
	(See \code{\link{family}} for family functions).  
	}
  \item{weights}{
	vector of weights, optional. If set, expected to contain numerical, non-negative data.  
	}
  \item{start}{
	starting values for the optimization problem. 
	The length of the starting vector is used to derived the number of parameters in the model.  
	}
  \item{etastart}{
	starting values for linear predictors \eqn{\eta}. 
	Preserved for compatibility with \code{\link{glm}}.  
	}
  \item{mustart}{
	starting values for the response mean \eqn{\mu}. 
	Preserved for compatibility with \code{\link{glm}}.  
	}
  \item{intercept}{
	include the intercept in the model.  
	}
  \item{matfun}{
	a list of matrix-vector functions: xtxfun:\eqn{X^TWv},xvfun:\eqn{Xv},xtxvfun:\eqn{X^TWXv}, xtxfun:\eqn{X^TWX}.
	Each function takes parameters, \code{x} passed as argument to bdglm.fit, 
	vector v and vector w - a diagonal of matrix W. 
	The return is a vector representing the result of the operation.  
	}
  \item{minfun}{
	The method to be used to perform \eqn{L_2} minimization executed in each step of \code{iteratively reweighted least squares}.
	Defaults to conjugate-gradient method \code{\link{bdcg}}.  
	}
  \item{control}{
	a list of control parameters, epsilon, maxit, etc. 
	See \code{\link{bdglm.control}} for details.  
	}
}
\details{
}
\value{
\code{bdglm.fit} returns an object of class 'bdglm' which inherits from 'glm'. The function \code{\link{summary}} can be called on that object.

An object of class 'bdglm' is a list containing the following components:
  \item{coefficients}{a vector of coefficients}
  \item{residuals}{model residuals}
  \item{fitted.values}{model fitted values}
  \item{rank}{model rank}
  \item{family}{family used to fir the model}
  \item{linear.predictors}{linear predictors}
  \item{null.deviance}{deviance for the null model}
  \item{deviance}{deviance for the fitted model}
  \item{aic}{model Akaike's Information Criterion (AIC)}
  \item{iter}{number of \code{irls} iterations}
  \item{weights}{final weights}
  \item{prior.weights}{original weights}
  \item{df.residual}{degrees of freedom for the fitted model}
  \item{df.null}{degrees of freedom for the null model}
  \item{y}{reponse used}
  \item{x}{data used}
  \item{converged}{if \code{True} desired tolerance reached, otherwise, maximum iterations reached before the desired tolerance.}
  \item{call}{original call}
  \item{formula}{simplified model formula}
  \item{terms}{simplified model terms}
  \item{xtvfun}{used \eqn{X^TWv} function}
  \item{xvfun}{used \eqn{Xv} function}
  \item{xtxvfun}{used \eqn{X^TXWv} function}
}
\references{
[1] McCullagh P. and Nelder, J. A. (1989) Generalized Linear Models. London: Chapman and Hall.

[2] Akaike, H. (1973). Information theory and an extension of the maximum likelihood principle. In Proceeding of the 2nd International Symposium on Information Theory, pages 267–281.
}

\author{
Ela Sienkiewicz
}
\note{
}

\seealso{
\code{\link{summary.bdglm}}, \code{\link{predict.bdglm}}
\code{\link{glm}} for fitting glm model to traditional data
\code{bigglm} in package \code{biglm} for an alternative to fitting glm model to big data
}
\examples{
library(bdglm)
y=c(0,0,1,0,0,1,1,1,0,1,1,0,0,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0)
x1=c(32,45,60,53,25,68,82,38,67,92,72,21,26,40,33,45,61,16,18,22,27,35,40,10,24,15,23,19,22,61,21,32,17)
x2=c(3,2,2,1,4,1,2,5,2,2,3,5,3,4,3,1,2,3,4,6,3,3,3,4,3,4,3,5,2,2,3,5,1)

x=cbind(x1,x2)

dimnames(x)=list(NULL,c("x1","x2"))

# defaults
start=c(0,0,0)
res.bdglm.fit(x,y,family=binomial("probit"),start=start,intercept=T, trace=F)
summary(res.bdglm)

xtyfun=function(x,y,weights) {
	x1=cbind(1,x)
	return(t(x1) \%*\% (weights*y))
}

xvfun=function(x, b) {
	if (ncol(x)  == length(b)) return (x \%*\% b)
	x1 <- cbind(1,x)
	return ((x1) \%*\% b)
}

xtxvfun=function(x,b,weights) {
	x1 <- cbind(1,x)
	return((t(x1)) \%*\% (weights*x1) \%*\% b)
}

xtxfun=function(x,weights) {
	x1 <- cbind(1,x)
	return(t(x1) \%*\% (weights*x1) )
}

xjfun=function(x,ind) {
	if (ind[1]==1) return(rep(1,nrow(x)))
	if (ind[1] <= ncol(x)+1) return(x[,ind[1]-1])
	return (c(NA))	
}

matfun=list(xtyfun=xtyfun,xvfun=xvfun,xtxvfun=xtxvfun,xtxfun=xtxfun)

res.bdglm=bdglm.fit(x,y,family=binomial("probit"),start=start,matfun=matfun,intercept=T, trace=F)
summary(res.bdglm)
plot(res.bdglm)

matfun=list(xtyfun=xtyfun,xvfun=xvfun,xtxvfun=xtxvfun,xtxfun=xtxfun,xjfun=xjfun)
lambda=c(0,10,2)
res.lasso=bdglm.fit.lasso(x, y, family=binomial("probit"), lambda=lambda, start=res.bdglm$coefficients, 
	matfun=matfun, intercept=T, hasmle=T)

summary(res.lasso)

}
\keyword{ glm }
\keyword{ big data }

