\name{geese}
\alias{geese}
\alias{geese.fit}
\alias{print.geese}
\alias{summary.geese}
\alias{print.summary.geese}
\title{Function to solve a Generalized Estimating Equation Model}
\description{
  Produces an object of class `geese' which is a Generalized Estimating
  Equation fit of the data.
}
\usage{
geese(formula = formula(data), sformula = ~1, id, waves = NULL,
      data = parent.frame(), subset = NULL, na.action = na.omit,
      contrasts = NULL, weights = NULL, zcor = NULL, corp = NULL,
      control = geese.control(...), b = NULL, alpha = NULL, gm = NULL,
      family = gaussian(), mean.link = NULL, variance = NULL,
      cor.link = "identity", sca.link = "identity", link.same = TRUE,
      scale.fix = FALSE, scale.value = 1, corstr = "independence", ...)

geese.fit(x, y, id, offset = rep(0, N), soffset = rep(0, N),
          weights = rep(1,N), waves = NULL, zsca = matrix(1, N, 1),
          zcor = NULL, corp = NULL, control = geese.control(...),
          b = NULL, alpha = NULL, gm = NULL, family = gaussian(),
          mean.link = NULL, variance = NULL, cor.link = "identity",
          sca.link = "identity", link.same = TRUE, scale.fix = FALSE,
          scale.value = 1, corstr = "independence", ...)
}
\arguments{
  \item{formula}{a formula expression as for \code{glm}, of the form
    \code{response ~ predictors}. See the documentation of lm and
    formula for details. As for glm, this specifies the linear predictor
    for modeling the mean. A term of the form \code{offset(expression)}
    is allowed.
  }
  \item{sformula}{a formula expression of the form \code{ ~ predictor},
    the response being ignored. This specifies the linear predictor for
    modeling the dispersion. A term of the form
    \code{offset(expression)} is allowed.
  }
  \item{id}{a vector which identifies the clusters.  The length of `id'
    should be the same as the number of observations.  Data are assumed
    to be sorted so that observations on a cluster are contiguous rows
    for all entities in the formula.
  }
  \item{waves}{an integer vector which identifies components in
    clusters. The length of \code{waves} should be the same as the
    number of observation. components with the same \code{waves} value
    will have the same link functions. 
  }
  \item{data}{
    an optional data frame in which to interpret the variables occurring
    in the \code{formula}, along with the \code{id} and \code{n} variables.
  }
  \item{subset}{
    expression saying which subset of the rows of the data should be used
    in the fit.  This can be a logical vector (which is replicated to have
    length equal to the number of observations), or a numeric vector
    indicating which observation numbers are to be included, or a
    character vector of the row names to be included.
    All observations are included by default.
  }
  \item{na.action}{
    a function to filter missing data.  For \code{gee} only \code{na.omit}
    should be used here.
  }
  \item{contrasts}{
    a list giving contrasts for some or all of the factors appearing
    in the model formula.  The elements of the list should have the
    same name as the variable and should be either a contrast matrix
    (specifically, any full-rank matrix with as many rows as there are
    levels in the factor), or else a function to compute such a matrix
    given the number of levels.
  }
  \item{weights}{an optional vector of weights to be used 
    in the fitting process. The length of \code{weights} should be the
    same as the number of observations. This weights is not (yet) the
    weight as in sas proc genmod, and hence is not recommended to use. 
  }
  \item{zcor}{a design matrix for correlation parameters.}
  \item{corp}{known parameters such as coordinates used for correlation
    coefficients.
  }
  \item{control}{a list of iteration and algorithmic constants. See
    \code{\link{geese.control}} for their names and default
    values. These can also be set as arguments to \code{geese} itself.
  }
  \item{b}{an initial estimate for the mean parameters.}
  \item{alpha}{an initial estimate for the correlation parameters.}
  \item{gm}{an initial estimate for the scale parameters.}
  \item{family}{a description of the error distribution and link
    function to be used in the model, as for \code{\link{glm}}.
  }
  \item{mean.link}{a character string specifying the link function for
    the means. The following are allowed:
    \code{"identity"}, \code{"logit"}, \code{"probit"},
    \code{"cloglog"}, \code{"log"}, and \code{"inverse"}.
    The default value is determined from family.
  }
  \item{variance}{a character string specifying the variance function
    in terms of the mean. The following are allowed:
    \code{"gaussian"}, \code{"binomial"}, \code{"poisson"}, and
    \code{"gamma"}. The default value is determined from family.
  }
  \item{cor.link}{a character string specifying the link function for
    the correlation coefficients. The following are allowed:
    \code{"identity"}, and \code{"fisherz"}.
  }
  \item{sca.link}{a character string specifying the link function for
    the scales. The following are allowed:
    \code{"identity"}, and \code{"log"}.
  }
  \item{link.same}{a logical indicating if all the components in a
    cluster should use the same link.
  }
  \item{scale.fix}{
    a logical variable; if true, the scale parameter is fixed at
    the value of \code{scale.value}.
  }
  \item{scale.value}{
    numeric variable giving the value to which the scale parameter
    should be fixed; used only if \code{scale.fix == TRUE}.
  }
  \item{corstr}{
    a character string specifying the correlation structure.
    The following are permitted:
    \code{"independence"},
    \code{"exchangeable"},
    \code{"ar1"},
    \code{"unstructured"},
    \code{"userdefined"}, and
    \code{"fixed"}
  }
  \item{x, y}{
    \code{x} is a design matrix of dimension \code{n * p}, and \code{y}
    is a vector of observations of length \code{n}.
  }
  \item{offset, soffset}{
    vector of offset for the mean and for the scale, respectively.
  }
  \item{zsca}{
    a design matrix of dimension \code{n * r} for the scales.
  }
  \item{\dots}{further arguments passed to or from other methods.}
}
\value{
  An object of class \code{"geese"} representing the fit.
}
\details{
  when the correlation structure is \code{fixed}, the specification
  of \code{Zcor} should be a vector of length
  \code{sum(clusz * (clusz - 1)) / 2.}
}
\references{
  Yan, J. and J.P. Fine (2004)
  Estimating Equations for Association Structures.
  \emph{Statistics in Medicine}, \bold{23}, 859--880. 
  
}
\author{Jun Yan \email{jyan.stat@gmail.com}}
\seealso{
  \code{\link{glm}}, \code{\link{lm}}, \code{\link{ordgee}}.
}
\examples{
data(seizure)
## Diggle, Liang, and Zeger (1994) pp166-168, compare Table 8.10
seiz.l <- reshape(seizure,
                  varying=list(c("base","y1", "y2", "y3", "y4")),
                  v.names="y", times=0:4, direction="long")
seiz.l <- seiz.l[order(seiz.l$id, seiz.l$time),]
seiz.l$t <- ifelse(seiz.l$time == 0, 8, 2)
seiz.l$x <- ifelse(seiz.l$time == 0, 0, 1)
m1 <- geese(y ~ offset(log(t)) + x + trt + x:trt, id = id,
            data=seiz.l, corstr="exch", family=poisson)
summary(m1)
m2 <- geese(y ~ offset(log(t)) + x + trt + x:trt, id = id,
            data = seiz.l, subset = id!=49,
            corstr = "exch", family=poisson)
summary(m2)
## Using fixed correlation matrix
cor.fixed <- matrix(c(1, 0.5, 0.25, 0.125, 0.125,
                      0.5, 1, 0.25, 0.125, 0.125,
                      0.25, 0.25, 1, 0.5, 0.125,
                      0.125, 0.125, 0.5, 1, 0.125,
                      0.125, 0.125, 0.125, 0.125, 1), 5, 5)
cor.fixed
zcor <- rep(cor.fixed[lower.tri(cor.fixed)], 59)
m3 <- geese(y ~ offset(log(t)) + x + trt + x:trt, id = id,
            data = seiz.l, family = poisson,
            corstr = "fixed", zcor = zcor)
summary(m3)

data(ohio)
fit <- geese(resp ~ age + smoke + age:smoke, id=id, data=ohio,
             family=binomial, corstr="exch", scale.fix=TRUE)
summary(fit)
fit.ar1 <- geese(resp ~ age + smoke + age:smoke, id=id, data=ohio,
                 family=binomial, corstr="ar1", scale.fix=TRUE)
summary(fit.ar1)

###### simulated data
## a function to generate a dataset
gendat <- function() {
  id <- gl(50, 4, 200)
  visit <- rep(1:4, 50)
  x1 <- rbinom(200, 1, 0.6) ## within cluster varying binary covariate
  x2 <- runif(200, 0, 1)   ## within cluster varying continuous covariate
  phi <- 1 + 2 * x1         ## true scale model
  ## the true correlation coefficient rho for an ar(1)
  ## correlation structure is 0.667.
  rhomat <- 0.667 ^ outer(1:4, 1:4, function(x, y) abs(x - y))
  chol.u <- chol(rhomat)
  noise <- as.vector(sapply(1:50, function(x) chol.u \%*\% rnorm(4)))
  e <- sqrt(phi) * noise
  y <- 1 + 3 * x1 - 2 * x2 + e
  dat <- data.frame(y, id, visit, x1, x2)
  dat
}

dat <- gendat()
fit <- geese(y ~ x1 + x2, id = id, data = dat, sformula = ~ x1,
             corstr = "ar1", jack = TRUE, j1s = TRUE, fij = TRUE)
summary(fit)


#### create user-defined design matrix of unstrctured correlation.
#### in this case, zcor has 4*3/2 = 6 columns, and 50 * 6 = 300 rows
zcor <- genZcor(clusz = rep(4, 50), waves = dat$visit, "unstr")
zfit <- geese(y ~ x1 + x2, id = id, data = dat, sformula = ~ x1,
              corstr = "userdefined", zcor = zcor,
              jack = TRUE, j1s = TRUE, fij = TRUE)
summary(zfit)

#### Now, suppose that we want the correlation of 1-2, 2-3, and 3-4
#### to be the same. Then zcor should have 4 columns.
z2 <- matrix(NA, 300, 4)
z2[,1] <- zcor[,1] + zcor[,4] + zcor[,6]
z2[,2:4] <- zcor[, c(2, 3, 5)]
summary(geese(y ~ x1 + x2, id = id, data = dat, sformula = ~ x1,
              corstr = "userdefined", zcor = z2,
              jack = TRUE, j1s = TRUE, fij = TRUE))

#### Next, we introduce non-constant cluster sizes by
#### randomly selecting 60 percent of the data
good <- sort(sample(1:nrow(dat), .6 * nrow(dat))) 
mdat <- dat[good,]

summary(geese(y ~ x1 + x2, id = id, data = mdat, waves = visit,
              sformula = ~ x1, corstr="ar1",
              jack = TRUE, j1s = TRUE, fij = TRUE))

}
\keyword{nonlinear}
\keyword{models}
