diff --git a/DESCRIPTION b/DESCRIPTION index 1dd82c7..89c4468 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,8 +23,9 @@ Imports: stats4, graphics, utils, - SummarizedExperiment(>= 1.5.3), - progress + SummarizedExperiment (>= 1.5.3), + progress, + arm Depends: SingleCellExperiment (>= 1.2.0), R(>= 3.5) @@ -73,7 +74,6 @@ Collate: 'ZlmFit-bootstrap.R' 'ZlmFit-logFC.R' 'ZlmFit.R' - 'bayesglm.R' 'convertMASTClassic.R' 'ebayes-helpers.R' 'filterEval.R' @@ -88,7 +88,7 @@ Collate: 'thresholdSCRNA.R' 'zeroinf.R' 'zlmHooks.R' -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.0 LazyData: true biocViews: GeneExpression, DifferentialExpression, GeneSetEnrichment, RNASeq, Transcriptomics, SingleCell diff --git a/NAMESPACE b/NAMESPACE index ef4dc93..104e604 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,6 @@ # Generated by roxygen2: do not edit by hand S3method(plot,thresholdSCRNACountMatrix) -S3method(predict,ZlmFit) S3method(print,summaryThresholdSCRNA) S3method(print,summaryZlmFit) S3method(summary,thresholdSCRNACountMatrix) @@ -51,6 +50,7 @@ export(melt.SingleCellAssay) export(numexp) export(partialScore) export(plotSCAConcordance) +export(predict.ZlmFit) export(primerAverage) export(read.fluidigm) export(stat_ell) @@ -86,6 +86,8 @@ importFrom(SummarizedExperiment,'rowData<-') importFrom(SummarizedExperiment,assay) importFrom(SummarizedExperiment,assayNames) importFrom(SummarizedExperiment,assays) +importFrom(arm,bayesglm.fit) +importFrom(arm,invlogit) importFrom(data.table,"%like%") importFrom(data.table,":=") importFrom(data.table,.N) @@ -127,50 +129,8 @@ importFrom(plyr,ldply) importFrom(plyr,raply) importFrom(plyr,rbind.fill) importFrom(reshape2,colsplit) -importFrom(stats,"contrasts<-") -importFrom(stats,.checkMFClasses) -importFrom(stats,as.formula) -importFrom(stats,binomial) -importFrom(stats,cov2cor) -importFrom(stats,delete.response) -importFrom(stats,deviance) -importFrom(stats,df.residual) -importFrom(stats,dt) -importFrom(stats,ecdf) -importFrom(stats,family) -importFrom(stats,fitted) -importFrom(stats,formula) -importFrom(stats,gaussian) -importFrom(stats,glm.control) -importFrom(stats,glm.fit) importFrom(stats,influence) -importFrom(stats,lm.fit) -importFrom(stats,median) -importFrom(stats,model.frame) -importFrom(stats,model.matrix.default) -importFrom(stats,na.pass) -importFrom(stats,napredict) -importFrom(stats,naresid) -importFrom(stats,nobs) -importFrom(stats,offset) -importFrom(stats,optim) -importFrom(stats,optimize) -importFrom(stats,p.adjust) -importFrom(stats,pchisq) -importFrom(stats,predict) -importFrom(stats,pt) -importFrom(stats,qnorm) -importFrom(stats,qt) -importFrom(stats,quantile) -importFrom(stats,relevel) importFrom(stats,rstandard) -importFrom(stats,setNames) -importFrom(stats,sigma) -importFrom(stats,t.test) -importFrom(stats,terms) -importFrom(stats,terms.formula) -importFrom(stats,update.formula) -importFrom(stats,weighted.residuals) importFrom(utils,read.csv) importMethodsFrom(Biobase,fData) importMethodsFrom(Biobase,featureData) diff --git a/R/bayesglm.R b/R/bayesglm.R.old similarity index 97% rename from R/bayesglm.R rename to R/bayesglm.R.old index ebc9a3e..97487d0 100644 --- a/R/bayesglm.R +++ b/R/bayesglm.R.old @@ -1,1105 +1,1105 @@ -## from ARM version 1.7-7 (2015-04-07) -## code originally by Gelman and Su -## http://cran.r-project.org/web/packages/arm/index.html -##' @importFrom stats .checkMFClasses as.formula binomial contrasts<- cov2cor delete.response deviance -##' @importFrom stats df.residual dt ecdf -##' @importFrom stats median na.pass napredict naresid nobs offset optim -##' @importFrom stats optimize p.adjust pchisq predict pt qnorm qt quantile relevel setNames sigma t.test -##' @importFrom stats terms terms.formula weighted.residuals -##' @importFrom stats family fitted formula gaussian glm.control glm.fit lm.fit update.formula model.frame model.matrix.default - - -.bayesglm.fit <- function (x, y, weights = rep(1, nobs), start = NULL, - etastart = NULL, mustart = NULL, offset = rep(0, nobs), - family = gaussian(), - control = glm.control(), intercept = TRUE, - prior.mean = 0, - prior.scale = NULL, - prior.df = 1, - prior.mean.for.intercept = 0, - prior.scale.for.intercept = NULL, - prior.df.for.intercept = 1, - min.prior.scale=1e-12, - scaled = TRUE, print.unnormalized.log.posterior=FALSE, Warning=TRUE) -{ -######### INITIALIZE ####### - nobs <- NROW(y) - nvars <- NCOL(x) - conv <- FALSE - EMPTY <- nvars == 0 - - output <- .bayesglm.fit.initialize.priors (family, prior.scale, prior.scale.for.intercept, nvars, - prior.mean, prior.mean.for.intercept, intercept, prior.df, prior.df.for.intercept) - prior.scale <- output$prior.scale - prior.mean <- output$prior.mean - prior.df <- output$prior.df - - prior.scale <- .bayesglm.fit.initialize.priorScale (scaled, family, prior.scale, prior.scale.for.intercept, y, nvars, x, min.prior.scale) - output <- .bayesglm.fit.initialize.x (x, nvars, nobs, intercept, scaled) - x <- output$x - xnames <- output$xnames - x.nobs <- output$x.nobs - - output <- .bayesglm.fit.initialize.other (nobs, y, weights, offset) - ynames <- output$ynames - weights <- output$weights - offset <- output$offset - - family <- .bayesglm.fit.initialize.family (family) - ## TODO: DL -- I would put this inside initialize.family, but this does some magic setting of variables. - ## What I can do is push an environment into the function and wrap it in. - - if (is.null(mustart)){ - eval(family$initialize) - } - else { - mustart.keep <- mustart - eval(family$initialize) - mustart <- mustart.keep - } -######################## - if (EMPTY) { - eta <- rep.int(0, nobs) + offset - if (!family$valideta(eta)){ - stop("invalid linear predictor values in empty model") - } - mu <- family$linkinv(eta) - if (!family$validmu(mu)){ - stop("invalid fitted means in empty model") - } - devold <- sum(family$dev.resids(y, mu, weights)) - w <- ((weights * family$mu.eta(eta)^2)/family$variance(mu))^0.5 - residuals <- (y - mu)/family$mu.eta(eta) - good <- rep(TRUE, length(residuals)) - boundary <- conv <- TRUE - coef <- numeric(0) - iter <- 0 - } - else { - ## 3.1 ## - output <- .bayesglm.fit.loop.setup (etastart, start = start, nvars, xnames, offset, x, nobs, family, - mustart, eta, weights, conv, prior.scale, y, x.nobs) - coefold <- output$Coefold - start <- output$Start - eta <- output$eta - mu <- output$mu - devold <- output$devold - boundary <- output$boundary - prior.sd <- output$prior.sd - dispersion <- output$dispersion - dispersionold <- output$dispersionold -########## - - ## 3.2 ## check the link between 3.1 and 3.2 - output.new <- .bayesglm.fit.loop.main.ideal (control, x, y, nvars, nobs, weights, offset, - intercept, scaled, - start = start, etastart = eta, mustart = mu, dispersion = dispersion, - family, - prior.mean, prior.scale=prior.sd, prior.df, - print.unnormalized.log.posterior, - Warning) - fit <- output.new$fit - good <- output.new$good - z <- output.new$z - w <- output.new$w - ngoodobs <- output.new$ngoodobs - prior.scale <- output.new$prior.scale - prior.sd <- output.new$prior.sd - eta <- output.new$eta - mu <- output.new$mu - dev <- output.new$dev - dispersion <- output.new$dispersion - start <- output.new$start - coef <- output.new$coef - conv <- output.new$conv - iter <- output.new$iter - boundary <- output.new$boundary - Rmat <- output.new$Rmat - residuals <- output.new$residuals - } - - ## 4 ## - output <- .bayesglm.fit.cleanup ( - ynames = ynames, - residuals = residuals, - mu = mu, eta = eta, nobs = nobs, - weights= weights, w = w, good = good, - linkinv = family$linkinv, - dev.resids = family$dev.resids, - y = y, - intercept = intercept, - fit = fit, offset = offset, EMPTY = EMPTY, - dev =dev, aic = family$aic - ) - - - - residuals <- output$residuals - mu <- output$mu - eta <- output$eta - wt <- output$wt - weights <- output$weights - y <- output$y - wtdmu <- output$wtdmu - nulldev <- output$nulldev - n.ok <- output$n.ok - nulldf <- output$nulldf - rank <- output$rank - resdf <- output$resdf - aic.model <- output$aic.model -####### - list(coefficients = coef, - residuals = residuals, - fitted.values = mu, - effects = if (!EMPTY) fit$effects, - R = if (!EMPTY) Rmat, - rank = rank, - qr = if (!EMPTY) structure(getQr(fit)[c("qr", "rank", "qraux", "pivot", "tol")], class = "qr"), - family = family, - linear.predictors = eta, - deviance = dev, - aic = aic.model, - null.deviance = nulldev, - iter = iter, - weights = wt, - prior.weights = weights, - df.residual = resdf, - df.null = nulldf, - y = y, - converged = conv, - boundary = boundary, - prior.mean = prior.mean, - prior.scale = prior.scale, - prior.df = prior.df, - prior.sd = prior.sd, - dispersion = dispersion) -} - -.bayesglm.fit.initialize.priors <- function (family, prior.scale, prior.scale.for.intercept, nvars, - prior.mean, prior.mean.for.intercept, intercept, prior.df, prior.df.for.intercept) { -##### 12.13 #### - if (is.null(prior.scale)){ - prior.scale <- 2.5 - if (family$link == "probit"){ - prior.scale <- prior.scale*1.6 - } - } - - if (is.null(prior.scale.for.intercept)){ - prior.scale.for.intercept <- 10 - if (family$link == "probit"){ - prior.scale.for.intercept <- prior.scale.for.intercept*1.6 - } - } -################ - - if (intercept) { - nvars <- nvars - 1 - } - - if (length(prior.mean) == 1) { - prior.mean <- rep(prior.mean, nvars) - } - else if (length(prior.mean) != nvars) { - stop("Invalid length for prior.mean") - } - - if (length(prior.scale)==1){ - prior.scale <- rep(prior.scale, nvars) - } - else if (length(prior.scale) != nvars) { - stop("Invalid length for prior.scale") - } - - if (length(prior.df) == 1) { - prior.df <- rep(prior.df, nvars) - } - else if (length(prior.df) != nvars) { - stop("Invalid length for prior.df") - } - - if (intercept) { - prior.mean <- c(prior.mean.for.intercept, prior.mean) - prior.scale <- c(prior.scale.for.intercept, prior.scale) - prior.df <- c(prior.df.for.intercept, prior.df) - } - - list (prior.mean=prior.mean, - prior.scale=prior.scale, - prior.df=prior.df) -} - -.bayesglm.fit.initialize.priorScale <- function (scaled, family, prior.scale, prior.scale.for.intercept, y, nvars, x, min.prior.scale) { - if (scaled) { - if (family$family == "gaussian"){ - prior.scale <- prior.scale * 2 * sd(y) - prior.scale.for.intercept <- prior.scale.for.intercept * 2 * sd(y) - } - - for (j in 1:nvars) { - x.obs <- x[, j] - x.obs <- x.obs[!is.na(x.obs)] - num.categories <- length(unique(x.obs)) - x.scale <- 1 - if (num.categories == 2) { - x.scale <- max(x.obs) - min(x.obs) - } - else if (num.categories > 2) { - x.scale <- 2 * sd(x.obs) - } - prior.scale[j] <- prior.scale[j]/x.scale - if (prior.scale[j] < min.prior.scale){ - prior.scale[j] <- min.prior.scale - warning ("prior scale for variable ", j, - " set to min.prior.scale = ", min.prior.scale,"\n") - } - } - } - return (prior.scale) -} - -.bayesglm.fit.initialize.x <- function (x, nvars, nobs, intercept, scaled) { - x <- as.matrix (rbind(x, diag(nvars))) - xnames <- dimnames(x)[[2]] - x.nobs <- x[1:nobs, ,drop=FALSE] - # TODO: **** I moved it here because I think it's right, and changed it to x.nobs - if (intercept & scaled) { - x[nobs+1,] <- colMeans(x.nobs) - } - list (x=x, xnames=xnames, x.nobs=x.nobs) -} - -.bayesglm.fit.initialize.family <- function (family, mustart, enviroment) { - if (!is.function(family$variance) || !is.function(family$linkinv)){ - stop("'family' argument seems not to be a valid family object") - } - if (is.null(family$valideta)){ - family$valideta <- function(eta) TRUE - } - - if (is.null(family$validmu)){ - family$validmu <- function(mu) TRUE - } - - return (family) -} - -.bayesglm.fit.initialize.other <- function (nobs, y, weights, offset) { - if (is.matrix(y)){ - ynames <- rownames(y) - } - else{ - ynames <- names(y) - } - - if (is.null(weights)){ - weights <- rep.int(1, nobs) - } - - if (is.null(offset)){ - offset <- rep.int(0, nobs) - } - - list (ynames=ynames, - weights=weights, - offset=offset) -} - -.bayesglm.fit.loop.setup <- function (etastart, start, nvars, xnames, offset, x, nobs, family, - mustart, eta, weights, conv, prior.scale, y, x.nobs) { - coefold <- NULL - if (!is.null(etastart)) { - eta <- etastart - } else if (!is.null(start)) { - if (length(start) != nvars){ - if(start==0&length(start)==1){ - start <- rep(0, nvars) - eta <- offset + as.vector(ifelse((NCOL(x) == 1), x.nobs[,1]*start, x.nobs %*% start)) - }else{ - stop(gettextf("length of 'start' should equal %d and correspond to initial coefs for %s", - nvars, paste(deparse(xnames), collapse = ", ")), domain = NA) - } - }else { - coefold <- start - eta <- offset + as.vector(ifelse((NCOL(x) == 1), x.nobs[,1]*start, x.nobs %*% start)) - } - } - else { - eta <- family$linkfun(mustart) - } - - mu <- family$linkinv(eta) - if (!isTRUE(family$validmu(mu) && family$valideta(eta))) - stop("cannot find valid starting values: please specify some") - devold <- sum(family$dev.resids(y, mu, weights)) - boundary <- conv <- FALSE - prior.sd <- prior.scale - #========Andy 2008.7.8============= - dispersion <- ifelse((family$family %in% c("poisson", "binomial")), 1, var(y)/10000) - #================================== - dispersionold <- dispersion - list ( Start = start, - Coefold = coefold, - eta=eta, - mu=mu, - devold=devold, - boundary=boundary, - prior.sd=prior.sd, - dispersion=dispersion, - dispersionold=dispersionold) -} - -.bayesglm.fit.loop.initializeState <- function (start, etastart, mustart, dispersion, offset, x.nobs, var.y, nvars, family, weights, prior.sd, y) { - coefold <- NULL - if (!is.null(etastart)) { - eta <- etastart - } - else if (!is.null(start)) { - coefold <- as.matrix (start) - eta <- drop (x.nobs %*% start) + offset - ## TODO: should be able to drop this. coefold is the original start. - #coefold <- start - } - else { - eta <- family$linkfun(mustart) - } - # if (family$family %in% c("poisson", "binomial")) { - # dispersion <- 1 - # } else{ - # dispersion <- var.y / 10000 - # } - mu <- family$linkinv(eta) - mu.eta.val <- family$mu.eta(eta) - dev <- sum (family$dev.resids (y, mu, weights)) - list ( Start = start, Coefold = coefold, eta=eta, - mu=mu, - mu.eta.val=mu.eta.val, - varmu=family$variance(mu), - good=(weights > 0) & (mu.eta.val != 0), - dispersion=dispersion, - dev=dev, - conv=FALSE, - boundary=FALSE, - prior.sd=prior.sd) -} - - - -.bayesglm.fit.loop.validateInputs <- function(etastart, start, nvars, xnames) { - if (is.null(etastart) & !is.null(start) & length(start) != nvars) { - stop(gettextf("length of 'start' should equal %d and correspond to initial coefs for %s", - nvars, paste(xnames, collapse = ", ")), domain = NA) - } -} - - -.bayesglm.fit.loop.validateState <- function (state, family, control, iter, dispersionold, devold) { - if (all(!state$good)) { - warning("no observations informative at iteration ", state$iter) - return (FALSE) - } - if (is.null(state$fit) == FALSE && any (!is.finite(state$fit$coefficients))) { - warning("non-finite coefficients at iteration ", iter) - return (FALSE) - } - if (any(is.na(state$varmu[state$good]))){ - stop("NAs in V(mu)") - } - if (any(state$varmu[state$good] == 0)){ - stop("0s in V(mu)") - } - - if (any(is.na(state$mu.eta.val[state$good]))){ - stop("NAs in d(mu)/d(eta)") - } - - if (is.infinite(state$dispersion)){ - stop("dispersion blows up due to divergence!") - } - - if (iter > 1 & abs(state$dev - devold)/(0.1 + abs(state$dev)) < control$epsilon & - abs(state$dispersion - dispersionold)/(0.1 + abs(state$dispersion)) < control$epsilon) { - return (FALSE) - } - return (TRUE) -} - -.bayesglm.fit.loop.updateState <- function (state, priors, family, #fortran.call.parameters, - offset, weights, - y, x, x.nobs, nvars, nobs, - intercept, scaled, control) { - z <- (state$eta[state$good] - offset[state$good]) + (y[state$good] - state$mu[state$good]) / state$mu.eta.val[state$good] - z.star <- c(z, priors$mean) - w <- sqrt((weights[state$good] * state$mu.eta.val[state$good]^2)/state$varmu[state$good]) - w.star <- c(w, sqrt(state$dispersion)/priors$scale) - good.star <- c(state$good, rep(TRUE, nvars)) - fit <- lm.fit(x = as.matrix(x[good.star, ])*w.star, y = z.star*w.star) - start <- state$Start - coefold <- state$Start - - #fit <- .Fortran("dqrls", - # qr = x[good.star, ] * w.star, - # n = sum (good.star), - # p = nvars, - # y = w.star * z.star, - # ny = fortran.call.parameters$ny, - # tol = fortran.call.parameters$tol, - # coefficients = fortran.call.parameters$coefficients, - # residuals = fortran.call.parameters$residuals, - # effects = fortran.call.parameters$effects, - # rank = fortran.call.parameters$rank, - # pivot = fortran.call.parameters$pivot, - # qraux = fortran.call.parameters$qraux, - # work = fortran.call.parameters$work, - # PACKAGE = "base") - - - #state$prior.sd <- priors$scale - if (all (priors$df==Inf) == FALSE) { - colMeans.x <- colMeans (x.nobs) - centered.coefs <- fit$coefficients - if(NCOL(x.nobs)==1){ - V.coefs <- chol2inv(fit$qr$qr[1:nvars]) - }else{ - V.coefs <- chol2inv(fit$qr$qr[1:nvars, 1:nvars, drop = FALSE]) - } - diag.V.coefs <- diag(V.coefs) - sampling.var <- diag.V.coefs - # DL: sampling.var[1] <- crossprod(colMeans.x, V.coefs) %*% colMeans.x - if(intercept & scaled){ - centered.coefs[1] <- sum(fit$coefficients*colMeans.x) - sampling.var[1] <- crossprod (crossprod(V.coefs, colMeans.x), colMeans.x) - } - sd.tmp <- ((centered.coefs - priors$mean)^2 + sampling.var * state$dispersion + priors$df * state$prior.sd^2)/(1 + priors$df) - sd.coef <- sqrt(sd.tmp) - state$prior.sd[priors$df != Inf] <- sd.coef[priors$df != Inf] - } - - if(NCOL(x.nobs)==1){ - predictions <- x.nobs * fit$coefficients - }else{ - predictions <- x.nobs %*% fit$coefficients - } - - start[fit$qr$pivot] <- fit$coefficients - - if (!(family$family %in% c("poisson", "binomial"))) { - if (exists ("V.coefs") == FALSE) { - if(NCOL(x.nobs)==1){ - V.coefs <- chol2inv(fit$qr$qr[1:nvars]) - }else{ - V.coefs <- chol2inv(fit$qr$qr[1:nvars, 1:nvars, drop = FALSE]) - } - } - #mse.resid <- mean((w * (z - x.nobs %*% fit$coefficients))^2) ## LOCAL VARIABLE - #mse.resid <- mean ( (fit$y[1:nobs] - w * predictions)^2) - ## mse.uncertainty <- mean(diag(x.nobs %*% V.coefs %*% t(x.nobs))) * state$dispersion - mse.resid <- mean ( ((z.star*w.star)[1:sum(state$good)] - w * predictions[state$good,])^2) - mse.uncertainty <- max (0, mean(rowSums(( x.nobs %*% V.coefs ) * x.nobs)) * state$dispersion) #faster ## LOCAL VARIABLE - state$dispersion <- mse.resid + mse.uncertainty - } - - state$eta <- drop(predictions) + offset - state$mu <- family$linkinv(state$eta) - state$mu.eta.val <- family$mu.eta(state$eta) - dev <- sum(family$dev.resids(y, state$mu, weights)) - - if (!is.finite (dev)||(!is.finite(state$dispersion)) || !isTRUE(family$valideta(state$eta) && family$validmu(state$mu))) { - if ((!is.finite(dev))|(!is.finite(state$dispersion))) { - if (is.null(coefold)) { - stop("no valid set of coefficients has been found: please supply starting values", call. = FALSE) - } - warning("step size truncated due to divergence", call. = FALSE) - } else if (!isTRUE(family$valideta(state$eta) && family$validmu(state$mu))) { - warning("step size truncated: out of bounds", call. = FALSE) - } - ii <- 1 - while (ii <= control$maxit & !is.finite (dev)) { - ii <- ii + 1 - start <- (start + coefold) / 2 - state$eta <- if(NCOL(x.nobs)==1){ - drop(x.nobs * start) - }else{ - drop(x.nobs %*% start) - } - state$mu <- family$linkinv(state$eta + offset) - dev <- sum (family$dev.resids (y, state$mu, weights)) - } - if (ii > control$maxit) { - stop("inner loop 1; cannot correct step size") - } - - ii <- 1 - while (ii <= control$maxit & !isTRUE(family$valideta(state$eta) && family$validmu(state$mu))) { - ii <- ii + 1 - start <- (start + coefold) / 2 - state$eta <- if(NCOL(x.nobs)==1){ - drop(x.nobs * start) - }else{ - drop(x.nobs %*% start) - } - state$mu <- family$linkinv(state$eta + offset) - } - if (ii > control$maxit) { - stop("inner loop 2; cannot correct step size") - } - - - state$boundary <- TRUE - if (control$trace){ - cat("Step halved: new deviance =", dev, "\n") - } - } - - list ( Start = start, - Coefold = coefold, - eta=state$eta, - mu=state$mu, - mu.eta.val=state$mu.eta.val, - varmu=family$variance(state$mu), - good=(weights > 0) & (state$mu.eta.val != 0), - dispersion=state$dispersion, - dev=dev, - fit=fit, - conv=FALSE, - boundary=state$boundary, - prior.sd=state$prior.sd, - z=z, - w=w) -} - - -.bayesglm.fit.loop.initializePriors <- function (prior.mean, prior.scale, prior.df) { - list(mean=prior.mean, - scale=prior.scale, - df=prior.df) - -} - -.bayesglm.fit.loop.print <- function (state, priors, family, print.unnormalized.log.posterior, intercept, y) { - if (print.unnormalized.log.posterior && family$family == "binomial") { - logprior <- sum( dt( state$fit$coefficients, priors$df , priors$mean, log = TRUE ) ) - #xb <- invlogit( x.nobs %*% coefs.hat ) - xb <- invlogit (state$eta - offset) - loglikelihood <- sum( log( c( xb[ y == 1 ], 1 - xb[ y == 0 ] ) ) ) - cat( "log prior: ", logprior, ", log likelihood: ", loglikelihood, ", - unnormalized log posterior: ", loglikelihood +logprior, "\n" ,sep="") - } -} - - -.bayesglm.fit.loop.createAuxillaryItems <- function (state, nvars, nobs, xnames, offset) { - if (state$fit$rank < nvars) { - state$fit$coefficients[seq(state$fit$rank + 1, nvars)] <- NA - } - residuals <- rep.int(NA, nobs) - residuals[state$good] <- state$z[state$good] - (state$eta[state$good] - offset[state$good]) - - state$fit$qr$qr <- as.matrix(state$fit$qr$qr) - - nr <- min(sum(state$good), nvars) - if (nr < nvars) { - Rmat <- diag(x = 0, nvars) - Rmat[1:nr, 1:nvars] <- state$fit$qr$qr[1:nr, 1:nvars, drop=FALSE] - } - else{ - if(NCOL(state$fit$qr$qr)==1){ - Rmat <- state$fit$qr$qr[1:nvars] - }else{ - Rmat <- state$fit$qr$qr[1:nvars, 1:nvars, drop=FALSE] - } - } - Rmat <- as.matrix(Rmat) - Rmat[row(Rmat) > col(Rmat)] <- 0 - names (state$fit$coefficients) <- xnames - colnames(state$fit$qr$qr) <- xnames - dimnames(Rmat) <- list(xnames, xnames) - list (residuals=residuals, - Rmat=Rmat, - state=state) -} - -.bayesglm.fit.loop.printWarnings <- function (Warning, state, family) { - if(Warning){ - if (!state$conv){ - warning("algorithm did not converge") - } - if (state$boundary){ - warning("algorithm stopped at boundary value") - } - eps <- 10 * .Machine$double.eps - if (family$family == "binomial") { - if (any(state$mu > 1 - eps) || any(state$mu < eps)) { - warning("fitted probabilities numerically 0 or 1 occurred") - } - } - if (family$family == "poisson") { - if (any(state$mu < eps)){ - warning("fitted rates numerically 0 occurred") - } - } - } -} - - -.bayesglm.fit.loop.main.ideal <- function (control, x, y, nvars, nobs, weights, offset, - intercept, scaled, - start, etastart, mustart, dispersion, - family, - prior.mean, prior.scale, prior.df, - print.unnormalized.log.posterior, - Warning) { - - xnames <- dimnames(x)[[2]] - x.nobs <- x[1:nobs, ,drop=FALSE] - - .bayesglm.fit.loop.validateInputs (etastart, start, nvars, dimnames(x)[[2]]) - - # invalid starting point (should be moved out of here): - # is.null(start)==false & length(start) != nvars - - priors <- .bayesglm.fit.loop.initializePriors (prior.mean = prior.mean, prior.scale = prior.scale, prior.df = prior.df) - - state <- .bayesglm.fit.loop.initializeState (start, etastart, mustart, dispersion, offset, x.nobs, var(y), nvars, family, weights, prior.sd = priors$scale, y) - #core elements of state: - # good: derived from eta - # eta - # x, y, nvars, nobs: invariants - - #fortran.call.parameters <- .bayesglm.fit.loop.initialMemoryAllocation (control$epsilon, nvars, sum (state$good)) - - for (iter in 1:control$maxit) { - dispersionold <- state$dispersion - devold <- state$dev - state <- .bayesglm.fit.loop.updateState (state, priors, family, #fortran.call.parameters, - offset, weights, - y, x, x.nobs, nvars, nobs, - intercept, scaled, control) - if (control$trace){ - cat("Deviance =", state$dev, "Iterations -", iter, "\n") - } - - if (.bayesglm.fit.loop.validateState(state, family, control, iter, dispersionold, devold) == FALSE) { - state$conv <- TRUE - break - } - .bayesglm.fit.loop.print(state, priors, family, print.unnormalized.log.posterior, intercept, y) - } - - .bayesglm.fit.loop.printWarnings(Warning, state, family) - output <- .bayesglm.fit.loop.createAuxillaryItems(state, nvars, nobs, xnames, offset) - ## residuals=residuals - ## Rmat=Rmat - ## state - - list (fit=output$state$fit, - good=output$state$good, - z=output$state$z, - ## z.star=output$state$z.star, - w=output$state$w, - ## w.star=output$state$w.star, - ngoodobs=sum (output$state$good), - prior.scale=priors$scale, - prior.sd=output$state$prior.sd, - eta=output$state$eta, - mu=output$state$mu, - dev=output$state$dev, - dispersion=output$state$dispersion, - dispersionold=dispersionold, - start=output$state$fit$coefficients, - coefold=output$state$fit$coefficients, - devold=devold, - conv=output$state$conv, - iter=iter, - boundary=output$state$boundary, - Rmat=output$Rmat, - residuals=output$residuals) -} - - -.bayesglm.fit.cleanup <- function (ynames, residuals, mu, eta, nobs, weights, w, good, linkinv, dev.resids, y, intercept, fit, offset, EMPTY, dev, aic) { - names(residuals) <- ynames - names(mu) <- ynames - names(eta) <- ynames - wt <- rep.int(0, nobs) - wt[good] <- w^2 - names(wt) <- ynames - names(weights) <- ynames - names(y) <- ynames - wtdmu <- if (intercept){ - sum(weights * y)/sum(weights) - } - else{ - linkinv(offset) - } - nulldev <- sum(dev.resids(y, wtdmu, weights)) - n.ok <- nobs - sum(w == 0) - nulldf <- n.ok - as.integer(intercept) - - rank <- if (EMPTY){ - 0 - } - else{ - fit$rank - } - resdf <- n.ok - rank - aic.model <- aic(y, n.ok, mu, weights, dev) + 2 * rank - - list (residuals=residuals, - mu=mu, - eta=eta, - wt = wt, - weights = weights, - y=y, - wtdmu=wtdmu, - nulldev=nulldev, - n.ok=n.ok, - nulldf=nulldf, - rank=rank, - resdf=resdf, - aic.model=aic.model) -} - - -predict.bayesglm <- function (object, newdata = NULL, type = c("link", "response", - "terms"), se.fit = FALSE, dispersion = NULL, terms = NULL, - na.action = na.pass, ...) -{ - type <- match.arg(type) - na.act <- object$na.action - object$na.action <- NULL - if (!se.fit) { - if (missing(newdata)) { - pred <- switch(type, link = object$linear.predictors, - response = object$fitted.values, terms = predictLM(object, - se.fit = se.fit, scale = 1, type = "terms", - terms = terms)) - if (!is.null(na.act)) - pred <- napredict(na.act, pred) - } - else { - pred <- predictLM(object, newdata, se.fit, scale = 1, - type = ifelse(type == "link", "response", type), - terms = terms, na.action = na.action) - switch(type, response = { - pred <- family(object)$linkinv(pred) - }, link = , terms = ) - } - } - else { - if (inherits(object, "survreg")) - dispersion <- 1 - if (is.null(dispersion) || dispersion == 0) - dispersion <- summary(object, dispersion = dispersion)$dispersion - residual.scale <- as.vector(sqrt(dispersion)) - pred <- predictLM(object, newdata, se.fit, scale = residual.scale, - type = ifelse(type == "link", "response", type), - terms = terms, na.action = na.action) - fit <- pred$fit - se.fit <- pred$se.fit - switch(type, response = { - se.fit <- se.fit * abs(family(object)$mu.eta(fit)) - fit <- family(object)$linkinv(fit) - }, link = , terms = ) - if (missing(newdata) && !is.null(na.act)) { - fit <- napredict(na.act, fit) - se.fit <- napredict(na.act, se.fit) - } - pred <- list(fit = fit, se.fit = se.fit, residual.scale = residual.scale) - } - pred -} - -predictLM <- function (object, newdata, se.fit = FALSE, scale = NULL, df = Inf, - interval = c("none", "confidence", "prediction"), level = 0.95, - type = c("response", "terms"), terms = NULL, na.action = na.pass, - pred.var = res.var/weights, weights = 1, ...) -{ - tt <- terms(object) - keep.order <- object$keep.order - drop.baseline <- object$drop.baseline - if (!inherits(object, "lm")) - warning("calling predict.lm() ...") - if (missing(newdata) || is.null(newdata)) { - mm <- X <- model.matrix(object) - mmDone <- TRUE - offset <- object$offset - } - else { - Terms <- delete.response(tt) - m <- model.frame(Terms, newdata, na.action = na.action, - xlev = object$xlevels) - if (!is.null(cl <- attr(Terms, "dataClasses"))) - .checkMFClasses(cl, m) - X <- model.matrixBayes(Terms, m, contrasts.arg = object$contrasts, keep.order = keep.order, drop.baseline = drop.baseline) - offset <- rep(0, nrow(X)) - if (!is.null(off.num <- attr(tt, "offset"))) - for (i in off.num) offset <- offset + eval(attr(tt, - "variables")[[i + 1]], newdata) - if (!is.null(object$call$offset)) - offset <- offset + eval(object$call$offset, newdata) - mmDone <- FALSE - } - n <- length(object$residuals) - p <- object$rank - p1 <- seq_len(p) - piv <- if (p) - getQr(object)$pivot[p1] - if (p < ncol(X) && !(missing(newdata) || is.null(newdata))) - warning("prediction from a rank-deficient fit may be misleading") - beta <- object$coefficients - predictor <- drop(X[, piv, drop = FALSE] %*% beta[piv]) - if (!is.null(offset)) - predictor <- predictor + offset - interval <- match.arg(interval) - if (interval == "prediction") { - if (missing(newdata)) - warning("Predictions on current data refer to _future_ responses\n") - if (missing(newdata) && missing(weights)) { - w <- .weights.default(object) - if (!is.null(w)) { - weights <- w - warning("Assuming prediction variance inversely proportional to weights used for fitting\n") - } - } - if (!missing(newdata) && missing(weights) && !is.null(object$weights) && - missing(pred.var)) - warning("Assuming constant prediction variance even though model fit is weighted\n") - if (inherits(weights, "formula")) { - if (length(weights) != 2L) - stop("'weights' as formula should be one-sided") - d <- if (missing(newdata) || is.null(newdata)) - model.frame(object) - else newdata - weights <- eval(weights[[2L]], d, environment(weights)) - } - } - type <- match.arg(type) - if (se.fit || interval != "none") { - res.var <- if (is.null(scale)) { - r <- object$residuals - w <- object$weights - rss <- sum(if (is.null(w)) r^2 else r^2 * w) - df <- object$df.residual - rss/df - } - else scale^2 - if (type != "terms") { - if (p > 0) { - XRinv <- if (missing(newdata) && is.null(w)) - qr.Q(getQr(object))[, p1, drop = FALSE] - else X[, piv] %*% qr.solve(qr.R(getQr(object))[p1, p1]) - ip <- drop(XRinv^2 %*% rep(res.var, p)) - } - else ip <- rep(0, n) - } - } - if (type == "terms") { - if (!mmDone) { - mm <- model.matrixBayes(object, keep.order = keep.order, drop.baseline = drop.baseline) - mmDone <- TRUE - } - aa <- attr(mm, "assign") - ll <- attr(tt, "term.labels") - hasintercept <- attr(tt, "intercept") > 0L - if (hasintercept) - ll <- c("(Intercept)", ll) - aaa <- factor(aa, labels = ll) - asgn <- split(order(aa), aaa) - if (hasintercept) { - asgn$"(Intercept)" <- NULL - if (!mmDone) { - mm <- model.matrixBayes(object, keep.order = keep.order, drop.baseline = drop.baseline) - mmDone <- TRUE - } - avx <- colMeans(mm) - termsconst <- sum(avx[piv] * beta[piv]) - } - nterms <- length(asgn) - if (nterms > 0) { - predictor <- matrix(ncol = nterms, nrow = NROW(X)) - dimnames(predictor) <- list(rownames(X), names(asgn)) - if (se.fit || interval != "none") { - ip <- matrix(ncol = nterms, nrow = NROW(X)) - dimnames(ip) <- list(rownames(X), names(asgn)) - Rinv <- qr.solve(qr.R(getQr(object))[p1, p1]) - } - if (hasintercept) - X <- sweep(X, 2L, avx, check.margin = FALSE) - unpiv <- rep.int(0L, NCOL(X)) - unpiv[piv] <- p1 - for (i in seq.int(1L, nterms, length.out = nterms)) { - iipiv <- asgn[[i]] - ii <- unpiv[iipiv] - iipiv[ii == 0L] <- 0L - predictor[, i] <- if (any(iipiv > 0L)) - X[, iipiv, drop = FALSE] %*% beta[iipiv] - else 0 - if (se.fit || interval != "none") - ip[, i] <- if (any(iipiv > 0L)) - as.matrix(X[, iipiv, drop = FALSE] %*% Rinv[ii, - , drop = FALSE])^2 %*% rep.int(res.var, - p) - else 0 - } - if (!is.null(terms)) { - predictor <- predictor[, terms, drop = FALSE] - if (se.fit) - ip <- ip[, terms, drop = FALSE] - } - } - else { - predictor <- ip <- matrix(0, n, 0L) - } - attr(predictor, "constant") <- if (hasintercept) - termsconst - else 0 - } - if (interval != "none") { - tfrac <- qt((1 - level)/2, df) - hwid <- tfrac * switch(interval, confidence = sqrt(ip), - prediction = sqrt(ip + pred.var)) - if (type != "terms") { - predictor <- cbind(predictor, predictor + hwid %o% - c(1, -1)) - colnames(predictor) <- c("fit", "lwr", "upr") - } - else { - if (!is.null(terms)) - hwid <- hwid[, terms, drop = FALSE] - lwr <- predictor + hwid - upr <- predictor - hwid - } - } - if (se.fit || interval != "none") { - se <- sqrt(ip) - if (type == "terms" && !is.null(terms) && !se.fit) - se <- se[, terms, drop = FALSE] - } - if (missing(newdata) && !is.null(na.act <- object$na.action)) { - predictor <- napredict(na.act, predictor) - if (se.fit) - se <- napredict(na.act, se) - } - if (type == "terms" && interval != "none") { - if (missing(newdata) && !is.null(na.act)) { - lwr <- napredict(na.act, lwr) - upr <- napredict(na.act, upr) - } - list(fit = predictor, se.fit = se, lwr = lwr, upr = upr, - df = df, residual.scale = sqrt(res.var)) - } - else if (se.fit) - list(fit = predictor, se.fit = se, df = df, residual.scale = sqrt(res.var)) - else predictor -} - - - -getQr <- function(x, ...){ - if (is.null(r <- x$qr)) - stop("lm object does not have a proper 'qr' component.\n Rank zero or should not have used lm(.., qr=FALSE).") - r -} - -logit <- function (x) { - log(x/(1-x)) -} - -##' Inverse of logistic transformation -##' -##' @export -##' @param x numeric -##' @return numeric -##' @examples -##' x <- 1:5 -##' invlogit(log(x/(1-x))) -invlogit <- function (x) { - 1/(1+exp(-x)) -} - -model.matrixBayes <- function(object, data = environment(object), - contrasts.arg = NULL, xlev = NULL, keep.order=FALSE, drop.baseline=FALSE,...) -{ - #class(object) <- c("terms", "formula") - t <- if( missing( data ) ) { - terms( object ) - }else{ - terms.formula(object, data = data, keep.order=keep.order) - } - attr(t, "intercept") <- attr(object, "intercept") - if (is.null(attr(data, "terms"))){ - data <- model.frame(object, data, xlev=xlev) - }else { - reorder <- match(sapply(attr(t,"variables"), deparse, width.cutoff=500)[-1], names(data)) - if (any(is.na(reorder))) { - stop( "model frame and formula mismatch in model.matrix()" ) - } - if(!identical(reorder, seq_len(ncol(data)))) { - data <- data[,reorder, drop = FALSE] - } - } - int <- attr(t, "response") - if(length(data)) { # otherwise no rhs terms, so skip all this - - if (drop.baseline){ - contr.funs <- as.character(getOption("contrasts")) - }else{ - contr.funs <- as.character(list("contr.bayes.unordered", "contr.bayes.ordered")) - } - - namD <- names(data) - ## turn any character columns into factors - for(i in namD) - if(is.character( data[[i]] ) ) { - data[[i]] <- factor(data[[i]]) - warning( gettextf( "variable '%s' converted to a factor", i ), domain = NA) - } - isF <- vapply(data, function(x) is.factor(x) || is.logical(x), NA) - isF[int] <- FALSE - isOF <- vapply(data, is.ordered, NA) - for( nn in namD[isF] ) # drop response - if( is.null( attr( data[[nn]], "contrasts" ) ) ) { - contrasts( data[[nn]] ) <- contr.funs[1 + isOF[nn]] - } - ## it might be safer to have numerical contrasts: - ## get(contr.funs[1 + isOF[nn]])(nlevels(data[[nn]])) - if ( !is.null( contrasts.arg ) && is.list( contrasts.arg ) ) { - if ( is.null( namC <- names( contrasts.arg ) ) ) { - stop( "invalid 'contrasts.arg' argument" ) - } - for (nn in namC) { - if ( is.na( ni <- match( nn, namD ) ) ) { - warning( gettextf( "variable '%s' is absent, its contrast will be ignored", nn ), domain = NA ) - } - else { - ca <- contrasts.arg[[nn]] - if( is.matrix( ca ) ) { - contrasts( data[[ni]], ncol( ca ) ) <- ca - } - else { - contrasts( data[[ni]] ) <- contrasts.arg[[nn]] - } - } - } - } - } else { # internal model.matrix needs some variable - isF <- FALSE - data <- data.frame(x=rep(0, nrow(data))) - } - #ans <- .Internal( model.matrix( t, data ) ) - ans <- model.matrix.default(object=t, data=data) - cons <- if(any(isF)){ - lapply( data[isF], function(x) attr( x, "contrasts") ) - }else { NULL } - attr(ans, "contrasts" ) <- cons - ans -} - -.weights.default <- function (object, ...) -{ - wts <- object$weights - if (is.null(wts)) - wts - else napredict(object$na.action, wts) -} +## from ARM version 1.7-7 (2015-04-07) +## code originally by Gelman and Su +## http://cran.r-project.org/web/packages/arm/index.html +##' @importFrom stats .checkMFClasses as.formula binomial contrasts<- cov2cor delete.response deviance +##' @importFrom stats df.residual dt ecdf +##' @importFrom stats median na.pass napredict naresid nobs offset optim +##' @importFrom stats optimize p.adjust pchisq predict pt qnorm qt quantile relevel setNames sigma t.test +##' @importFrom stats terms terms.formula weighted.residuals +##' @importFrom stats family fitted formula gaussian glm.control glm.fit lm.fit update.formula model.frame model.matrix.default + + +.bayesglm.fit <- function (x, y, weights = rep(1, nobs), start = NULL, + etastart = NULL, mustart = NULL, offset = rep(0, nobs), + family = gaussian(), + control = glm.control(), intercept = TRUE, + prior.mean = 0, + prior.scale = NULL, + prior.df = 1, + prior.mean.for.intercept = 0, + prior.scale.for.intercept = NULL, + prior.df.for.intercept = 1, + min.prior.scale=1e-12, + scaled = TRUE, print.unnormalized.log.posterior=FALSE, Warning=TRUE) +{ +######### INITIALIZE ####### + nobs <- NROW(y) + nvars <- NCOL(x) + conv <- FALSE + EMPTY <- nvars == 0 + + output <- .bayesglm.fit.initialize.priors (family, prior.scale, prior.scale.for.intercept, nvars, + prior.mean, prior.mean.for.intercept, intercept, prior.df, prior.df.for.intercept) + prior.scale <- output$prior.scale + prior.mean <- output$prior.mean + prior.df <- output$prior.df + + prior.scale <- .bayesglm.fit.initialize.priorScale (scaled, family, prior.scale, prior.scale.for.intercept, y, nvars, x, min.prior.scale) + output <- .bayesglm.fit.initialize.x (x, nvars, nobs, intercept, scaled) + x <- output$x + xnames <- output$xnames + x.nobs <- output$x.nobs + + output <- .bayesglm.fit.initialize.other (nobs, y, weights, offset) + ynames <- output$ynames + weights <- output$weights + offset <- output$offset + + family <- .bayesglm.fit.initialize.family (family) + ## TODO: DL -- I would put this inside initialize.family, but this does some magic setting of variables. + ## What I can do is push an environment into the function and wrap it in. + + if (is.null(mustart)){ + eval(family$initialize) + } + else { + mustart.keep <- mustart + eval(family$initialize) + mustart <- mustart.keep + } +######################## + if (EMPTY) { + eta <- rep.int(0, nobs) + offset + if (!family$valideta(eta)){ + stop("invalid linear predictor values in empty model") + } + mu <- family$linkinv(eta) + if (!family$validmu(mu)){ + stop("invalid fitted means in empty model") + } + devold <- sum(family$dev.resids(y, mu, weights)) + w <- ((weights * family$mu.eta(eta)^2)/family$variance(mu))^0.5 + residuals <- (y - mu)/family$mu.eta(eta) + good <- rep(TRUE, length(residuals)) + boundary <- conv <- TRUE + coef <- numeric(0) + iter <- 0 + } + else { + ## 3.1 ## + output <- .bayesglm.fit.loop.setup (etastart, start = start, nvars, xnames, offset, x, nobs, family, + mustart, eta, weights, conv, prior.scale, y, x.nobs) + coefold <- output$Coefold + start <- output$Start + eta <- output$eta + mu <- output$mu + devold <- output$devold + boundary <- output$boundary + prior.sd <- output$prior.sd + dispersion <- output$dispersion + dispersionold <- output$dispersionold +########## + + ## 3.2 ## check the link between 3.1 and 3.2 + output.new <- .bayesglm.fit.loop.main.ideal (control, x, y, nvars, nobs, weights, offset, + intercept, scaled, + start = start, etastart = eta, mustart = mu, dispersion = dispersion, + family, + prior.mean, prior.scale=prior.sd, prior.df, + print.unnormalized.log.posterior, + Warning) + fit <- output.new$fit + good <- output.new$good + z <- output.new$z + w <- output.new$w + ngoodobs <- output.new$ngoodobs + prior.scale <- output.new$prior.scale + prior.sd <- output.new$prior.sd + eta <- output.new$eta + mu <- output.new$mu + dev <- output.new$dev + dispersion <- output.new$dispersion + start <- output.new$start + coef <- output.new$coef + conv <- output.new$conv + iter <- output.new$iter + boundary <- output.new$boundary + Rmat <- output.new$Rmat + residuals <- output.new$residuals + } + + ## 4 ## + output <- .bayesglm.fit.cleanup ( + ynames = ynames, + residuals = residuals, + mu = mu, eta = eta, nobs = nobs, + weights= weights, w = w, good = good, + linkinv = family$linkinv, + dev.resids = family$dev.resids, + y = y, + intercept = intercept, + fit = fit, offset = offset, EMPTY = EMPTY, + dev =dev, aic = family$aic + ) + + + + residuals <- output$residuals + mu <- output$mu + eta <- output$eta + wt <- output$wt + weights <- output$weights + y <- output$y + wtdmu <- output$wtdmu + nulldev <- output$nulldev + n.ok <- output$n.ok + nulldf <- output$nulldf + rank <- output$rank + resdf <- output$resdf + aic.model <- output$aic.model +####### + list(coefficients = coef, + residuals = residuals, + fitted.values = mu, + effects = if (!EMPTY) fit$effects, + R = if (!EMPTY) Rmat, + rank = rank, + qr = if (!EMPTY) structure(getQr(fit)[c("qr", "rank", "qraux", "pivot", "tol")], class = "qr"), + family = family, + linear.predictors = eta, + deviance = dev, + aic = aic.model, + null.deviance = nulldev, + iter = iter, + weights = wt, + prior.weights = weights, + df.residual = resdf, + df.null = nulldf, + y = y, + converged = conv, + boundary = boundary, + prior.mean = prior.mean, + prior.scale = prior.scale, + prior.df = prior.df, + prior.sd = prior.sd, + dispersion = dispersion) +} + +.bayesglm.fit.initialize.priors <- function (family, prior.scale, prior.scale.for.intercept, nvars, + prior.mean, prior.mean.for.intercept, intercept, prior.df, prior.df.for.intercept) { +##### 12.13 #### + if (is.null(prior.scale)){ + prior.scale <- 2.5 + if (family$link == "probit"){ + prior.scale <- prior.scale*1.6 + } + } + + if (is.null(prior.scale.for.intercept)){ + prior.scale.for.intercept <- 10 + if (family$link == "probit"){ + prior.scale.for.intercept <- prior.scale.for.intercept*1.6 + } + } +################ + + if (intercept) { + nvars <- nvars - 1 + } + + if (length(prior.mean) == 1) { + prior.mean <- rep(prior.mean, nvars) + } + else if (length(prior.mean) != nvars) { + stop("Invalid length for prior.mean") + } + + if (length(prior.scale)==1){ + prior.scale <- rep(prior.scale, nvars) + } + else if (length(prior.scale) != nvars) { + stop("Invalid length for prior.scale") + } + + if (length(prior.df) == 1) { + prior.df <- rep(prior.df, nvars) + } + else if (length(prior.df) != nvars) { + stop("Invalid length for prior.df") + } + + if (intercept) { + prior.mean <- c(prior.mean.for.intercept, prior.mean) + prior.scale <- c(prior.scale.for.intercept, prior.scale) + prior.df <- c(prior.df.for.intercept, prior.df) + } + + list (prior.mean=prior.mean, + prior.scale=prior.scale, + prior.df=prior.df) +} + +.bayesglm.fit.initialize.priorScale <- function (scaled, family, prior.scale, prior.scale.for.intercept, y, nvars, x, min.prior.scale) { + if (scaled) { + if (family$family == "gaussian"){ + prior.scale <- prior.scale * 2 * sd(y) + prior.scale.for.intercept <- prior.scale.for.intercept * 2 * sd(y) + } + + for (j in 1:nvars) { + x.obs <- x[, j] + x.obs <- x.obs[!is.na(x.obs)] + num.categories <- length(unique(x.obs)) + x.scale <- 1 + if (num.categories == 2) { + x.scale <- max(x.obs) - min(x.obs) + } + else if (num.categories > 2) { + x.scale <- 2 * sd(x.obs) + } + prior.scale[j] <- prior.scale[j]/x.scale + if (prior.scale[j] < min.prior.scale){ + prior.scale[j] <- min.prior.scale + warning ("prior scale for variable ", j, + " set to min.prior.scale = ", min.prior.scale,"\n") + } + } + } + return (prior.scale) +} + +.bayesglm.fit.initialize.x <- function (x, nvars, nobs, intercept, scaled) { + x <- as.matrix (rbind(x, diag(nvars))) + xnames <- dimnames(x)[[2]] + x.nobs <- x[1:nobs, ,drop=FALSE] + # TODO: **** I moved it here because I think it's right, and changed it to x.nobs + if (intercept & scaled) { + x[nobs+1,] <- colMeans(x.nobs) + } + list (x=x, xnames=xnames, x.nobs=x.nobs) +} + +.bayesglm.fit.initialize.family <- function (family, mustart, enviroment) { + if (!is.function(family$variance) || !is.function(family$linkinv)){ + stop("'family' argument seems not to be a valid family object") + } + if (is.null(family$valideta)){ + family$valideta <- function(eta) TRUE + } + + if (is.null(family$validmu)){ + family$validmu <- function(mu) TRUE + } + + return (family) +} + +.bayesglm.fit.initialize.other <- function (nobs, y, weights, offset) { + if (is.matrix(y)){ + ynames <- rownames(y) + } + else{ + ynames <- names(y) + } + + if (is.null(weights)){ + weights <- rep.int(1, nobs) + } + + if (is.null(offset)){ + offset <- rep.int(0, nobs) + } + + list (ynames=ynames, + weights=weights, + offset=offset) +} + +.bayesglm.fit.loop.setup <- function (etastart, start, nvars, xnames, offset, x, nobs, family, + mustart, eta, weights, conv, prior.scale, y, x.nobs) { + coefold <- NULL + if (!is.null(etastart)) { + eta <- etastart + } else if (!is.null(start)) { + if (length(start) != nvars){ + if(start==0&length(start)==1){ + start <- rep(0, nvars) + eta <- offset + as.vector(ifelse((NCOL(x) == 1), x.nobs[,1]*start, x.nobs %*% start)) + }else{ + stop(gettextf("length of 'start' should equal %d and correspond to initial coefs for %s", + nvars, paste(deparse(xnames), collapse = ", ")), domain = NA) + } + }else { + coefold <- start + eta <- offset + as.vector(ifelse((NCOL(x) == 1), x.nobs[,1]*start, x.nobs %*% start)) + } + } + else { + eta <- family$linkfun(mustart) + } + + mu <- family$linkinv(eta) + if (!isTRUE(family$validmu(mu) && family$valideta(eta))) + stop("cannot find valid starting values: please specify some") + devold <- sum(family$dev.resids(y, mu, weights)) + boundary <- conv <- FALSE + prior.sd <- prior.scale + #========Andy 2008.7.8============= + dispersion <- ifelse((family$family %in% c("poisson", "binomial")), 1, var(y)/10000) + #================================== + dispersionold <- dispersion + list ( Start = start, + Coefold = coefold, + eta=eta, + mu=mu, + devold=devold, + boundary=boundary, + prior.sd=prior.sd, + dispersion=dispersion, + dispersionold=dispersionold) +} + +.bayesglm.fit.loop.initializeState <- function (start, etastart, mustart, dispersion, offset, x.nobs, var.y, nvars, family, weights, prior.sd, y) { + coefold <- NULL + if (!is.null(etastart)) { + eta <- etastart + } + else if (!is.null(start)) { + coefold <- as.matrix (start) + eta <- drop (x.nobs %*% start) + offset + ## TODO: should be able to drop this. coefold is the original start. + #coefold <- start + } + else { + eta <- family$linkfun(mustart) + } + # if (family$family %in% c("poisson", "binomial")) { + # dispersion <- 1 + # } else{ + # dispersion <- var.y / 10000 + # } + mu <- family$linkinv(eta) + mu.eta.val <- family$mu.eta(eta) + dev <- sum (family$dev.resids (y, mu, weights)) + list ( Start = start, Coefold = coefold, eta=eta, + mu=mu, + mu.eta.val=mu.eta.val, + varmu=family$variance(mu), + good=(weights > 0) & (mu.eta.val != 0), + dispersion=dispersion, + dev=dev, + conv=FALSE, + boundary=FALSE, + prior.sd=prior.sd) +} + + + +.bayesglm.fit.loop.validateInputs <- function(etastart, start, nvars, xnames) { + if (is.null(etastart) & !is.null(start) & length(start) != nvars) { + stop(gettextf("length of 'start' should equal %d and correspond to initial coefs for %s", + nvars, paste(xnames, collapse = ", ")), domain = NA) + } +} + + +.bayesglm.fit.loop.validateState <- function (state, family, control, iter, dispersionold, devold) { + if (all(!state$good)) { + warning("no observations informative at iteration ", state$iter) + return (FALSE) + } + if (is.null(state$fit) == FALSE && any (!is.finite(state$fit$coefficients))) { + warning("non-finite coefficients at iteration ", iter) + return (FALSE) + } + if (any(is.na(state$varmu[state$good]))){ + stop("NAs in V(mu)") + } + if (any(state$varmu[state$good] == 0)){ + stop("0s in V(mu)") + } + + if (any(is.na(state$mu.eta.val[state$good]))){ + stop("NAs in d(mu)/d(eta)") + } + + if (is.infinite(state$dispersion)){ + stop("dispersion blows up due to divergence!") + } + + if (iter > 1 & abs(state$dev - devold)/(0.1 + abs(state$dev)) < control$epsilon & + abs(state$dispersion - dispersionold)/(0.1 + abs(state$dispersion)) < control$epsilon) { + return (FALSE) + } + return (TRUE) +} + +.bayesglm.fit.loop.updateState <- function (state, priors, family, #fortran.call.parameters, + offset, weights, + y, x, x.nobs, nvars, nobs, + intercept, scaled, control) { + z <- (state$eta[state$good] - offset[state$good]) + (y[state$good] - state$mu[state$good]) / state$mu.eta.val[state$good] + z.star <- c(z, priors$mean) + w <- sqrt((weights[state$good] * state$mu.eta.val[state$good]^2)/state$varmu[state$good]) + w.star <- c(w, sqrt(state$dispersion)/priors$scale) + good.star <- c(state$good, rep(TRUE, nvars)) + fit <- lm.fit(x = as.matrix(x[good.star, ])*w.star, y = z.star*w.star) + start <- state$Start + coefold <- state$Start + + #fit <- .Fortran("dqrls", + # qr = x[good.star, ] * w.star, + # n = sum (good.star), + # p = nvars, + # y = w.star * z.star, + # ny = fortran.call.parameters$ny, + # tol = fortran.call.parameters$tol, + # coefficients = fortran.call.parameters$coefficients, + # residuals = fortran.call.parameters$residuals, + # effects = fortran.call.parameters$effects, + # rank = fortran.call.parameters$rank, + # pivot = fortran.call.parameters$pivot, + # qraux = fortran.call.parameters$qraux, + # work = fortran.call.parameters$work, + # PACKAGE = "base") + + + #state$prior.sd <- priors$scale + if (all (priors$df==Inf) == FALSE) { + colMeans.x <- colMeans (x.nobs) + centered.coefs <- fit$coefficients + if(NCOL(x.nobs)==1){ + V.coefs <- chol2inv(fit$qr$qr[1:nvars]) + }else{ + V.coefs <- chol2inv(fit$qr$qr[1:nvars, 1:nvars, drop = FALSE]) + } + diag.V.coefs <- diag(V.coefs) + sampling.var <- diag.V.coefs + # DL: sampling.var[1] <- crossprod(colMeans.x, V.coefs) %*% colMeans.x + if(intercept & scaled){ + centered.coefs[1] <- sum(fit$coefficients*colMeans.x) + sampling.var[1] <- crossprod (crossprod(V.coefs, colMeans.x), colMeans.x) + } + sd.tmp <- ((centered.coefs - priors$mean)^2 + sampling.var * state$dispersion + priors$df * state$prior.sd^2)/(1 + priors$df) + sd.coef <- sqrt(sd.tmp) + state$prior.sd[priors$df != Inf] <- sd.coef[priors$df != Inf] + } + + if(NCOL(x.nobs)==1){ + predictions <- x.nobs * fit$coefficients + }else{ + predictions <- x.nobs %*% fit$coefficients + } + + start[fit$qr$pivot] <- fit$coefficients + + if (!(family$family %in% c("poisson", "binomial"))) { + if (exists ("V.coefs") == FALSE) { + if(NCOL(x.nobs)==1){ + V.coefs <- chol2inv(fit$qr$qr[1:nvars]) + }else{ + V.coefs <- chol2inv(fit$qr$qr[1:nvars, 1:nvars, drop = FALSE]) + } + } + #mse.resid <- mean((w * (z - x.nobs %*% fit$coefficients))^2) ## LOCAL VARIABLE + #mse.resid <- mean ( (fit$y[1:nobs] - w * predictions)^2) + ## mse.uncertainty <- mean(diag(x.nobs %*% V.coefs %*% t(x.nobs))) * state$dispersion + mse.resid <- mean ( ((z.star*w.star)[1:sum(state$good)] - w * predictions[state$good,])^2) + mse.uncertainty <- max (0, mean(rowSums(( x.nobs %*% V.coefs ) * x.nobs)) * state$dispersion) #faster ## LOCAL VARIABLE + state$dispersion <- mse.resid + mse.uncertainty + } + + state$eta <- drop(predictions) + offset + state$mu <- family$linkinv(state$eta) + state$mu.eta.val <- family$mu.eta(state$eta) + dev <- sum(family$dev.resids(y, state$mu, weights)) + + if (!is.finite (dev)||(!is.finite(state$dispersion)) || !isTRUE(family$valideta(state$eta) && family$validmu(state$mu))) { + if ((!is.finite(dev))|(!is.finite(state$dispersion))) { + if (is.null(coefold)) { + stop("no valid set of coefficients has been found: please supply starting values", call. = FALSE) + } + warning("step size truncated due to divergence", call. = FALSE) + } else if (!isTRUE(family$valideta(state$eta) && family$validmu(state$mu))) { + warning("step size truncated: out of bounds", call. = FALSE) + } + ii <- 1 + while (ii <= control$maxit & !is.finite (dev)) { + ii <- ii + 1 + start <- (start + coefold) / 2 + state$eta <- if(NCOL(x.nobs)==1){ + drop(x.nobs * start) + }else{ + drop(x.nobs %*% start) + } + state$mu <- family$linkinv(state$eta + offset) + dev <- sum (family$dev.resids (y, state$mu, weights)) + } + if (ii > control$maxit) { + stop("inner loop 1; cannot correct step size") + } + + ii <- 1 + while (ii <= control$maxit & !isTRUE(family$valideta(state$eta) && family$validmu(state$mu))) { + ii <- ii + 1 + start <- (start + coefold) / 2 + state$eta <- if(NCOL(x.nobs)==1){ + drop(x.nobs * start) + }else{ + drop(x.nobs %*% start) + } + state$mu <- family$linkinv(state$eta + offset) + } + if (ii > control$maxit) { + stop("inner loop 2; cannot correct step size") + } + + + state$boundary <- TRUE + if (control$trace){ + cat("Step halved: new deviance =", dev, "\n") + } + } + + list ( Start = start, + Coefold = coefold, + eta=state$eta, + mu=state$mu, + mu.eta.val=state$mu.eta.val, + varmu=family$variance(state$mu), + good=(weights > 0) & (state$mu.eta.val != 0), + dispersion=state$dispersion, + dev=dev, + fit=fit, + conv=FALSE, + boundary=state$boundary, + prior.sd=state$prior.sd, + z=z, + w=w) +} + + +.bayesglm.fit.loop.initializePriors <- function (prior.mean, prior.scale, prior.df) { + list(mean=prior.mean, + scale=prior.scale, + df=prior.df) + +} + +.bayesglm.fit.loop.print <- function (state, priors, family, print.unnormalized.log.posterior, intercept, y) { + if (print.unnormalized.log.posterior && family$family == "binomial") { + logprior <- sum( dt( state$fit$coefficients, priors$df , priors$mean, log = TRUE ) ) + #xb <- invlogit( x.nobs %*% coefs.hat ) + xb <- invlogit (state$eta - offset) + loglikelihood <- sum( log( c( xb[ y == 1 ], 1 - xb[ y == 0 ] ) ) ) + cat( "log prior: ", logprior, ", log likelihood: ", loglikelihood, ", + unnormalized log posterior: ", loglikelihood +logprior, "\n" ,sep="") + } +} + + +.bayesglm.fit.loop.createAuxillaryItems <- function (state, nvars, nobs, xnames, offset) { + if (state$fit$rank < nvars) { + state$fit$coefficients[seq(state$fit$rank + 1, nvars)] <- NA + } + residuals <- rep.int(NA, nobs) + residuals[state$good] <- state$z[state$good] - (state$eta[state$good] - offset[state$good]) + + state$fit$qr$qr <- as.matrix(state$fit$qr$qr) + + nr <- min(sum(state$good), nvars) + if (nr < nvars) { + Rmat <- diag(x = 0, nvars) + Rmat[1:nr, 1:nvars] <- state$fit$qr$qr[1:nr, 1:nvars, drop=FALSE] + } + else{ + if(NCOL(state$fit$qr$qr)==1){ + Rmat <- state$fit$qr$qr[1:nvars] + }else{ + Rmat <- state$fit$qr$qr[1:nvars, 1:nvars, drop=FALSE] + } + } + Rmat <- as.matrix(Rmat) + Rmat[row(Rmat) > col(Rmat)] <- 0 + names (state$fit$coefficients) <- xnames + colnames(state$fit$qr$qr) <- xnames + dimnames(Rmat) <- list(xnames, xnames) + list (residuals=residuals, + Rmat=Rmat, + state=state) +} + +.bayesglm.fit.loop.printWarnings <- function (Warning, state, family) { + if(Warning){ + if (!state$conv){ + warning("algorithm did not converge") + } + if (state$boundary){ + warning("algorithm stopped at boundary value") + } + eps <- 10 * .Machine$double.eps + if (family$family == "binomial") { + if (any(state$mu > 1 - eps) || any(state$mu < eps)) { + warning("fitted probabilities numerically 0 or 1 occurred") + } + } + if (family$family == "poisson") { + if (any(state$mu < eps)){ + warning("fitted rates numerically 0 occurred") + } + } + } +} + + +.bayesglm.fit.loop.main.ideal <- function (control, x, y, nvars, nobs, weights, offset, + intercept, scaled, + start, etastart, mustart, dispersion, + family, + prior.mean, prior.scale, prior.df, + print.unnormalized.log.posterior, + Warning) { + + xnames <- dimnames(x)[[2]] + x.nobs <- x[1:nobs, ,drop=FALSE] + + .bayesglm.fit.loop.validateInputs (etastart, start, nvars, dimnames(x)[[2]]) + + # invalid starting point (should be moved out of here): + # is.null(start)==false & length(start) != nvars + + priors <- .bayesglm.fit.loop.initializePriors (prior.mean = prior.mean, prior.scale = prior.scale, prior.df = prior.df) + + state <- .bayesglm.fit.loop.initializeState (start, etastart, mustart, dispersion, offset, x.nobs, var(y), nvars, family, weights, prior.sd = priors$scale, y) + #core elements of state: + # good: derived from eta + # eta + # x, y, nvars, nobs: invariants + + #fortran.call.parameters <- .bayesglm.fit.loop.initialMemoryAllocation (control$epsilon, nvars, sum (state$good)) + + for (iter in 1:control$maxit) { + dispersionold <- state$dispersion + devold <- state$dev + state <- .bayesglm.fit.loop.updateState (state, priors, family, #fortran.call.parameters, + offset, weights, + y, x, x.nobs, nvars, nobs, + intercept, scaled, control) + if (control$trace){ + cat("Deviance =", state$dev, "Iterations -", iter, "\n") + } + + if (.bayesglm.fit.loop.validateState(state, family, control, iter, dispersionold, devold) == FALSE) { + state$conv <- TRUE + break + } + .bayesglm.fit.loop.print(state, priors, family, print.unnormalized.log.posterior, intercept, y) + } + + .bayesglm.fit.loop.printWarnings(Warning, state, family) + output <- .bayesglm.fit.loop.createAuxillaryItems(state, nvars, nobs, xnames, offset) + ## residuals=residuals + ## Rmat=Rmat + ## state + + list (fit=output$state$fit, + good=output$state$good, + z=output$state$z, + ## z.star=output$state$z.star, + w=output$state$w, + ## w.star=output$state$w.star, + ngoodobs=sum (output$state$good), + prior.scale=priors$scale, + prior.sd=output$state$prior.sd, + eta=output$state$eta, + mu=output$state$mu, + dev=output$state$dev, + dispersion=output$state$dispersion, + dispersionold=dispersionold, + start=output$state$fit$coefficients, + coefold=output$state$fit$coefficients, + devold=devold, + conv=output$state$conv, + iter=iter, + boundary=output$state$boundary, + Rmat=output$Rmat, + residuals=output$residuals) +} + + +.bayesglm.fit.cleanup <- function (ynames, residuals, mu, eta, nobs, weights, w, good, linkinv, dev.resids, y, intercept, fit, offset, EMPTY, dev, aic) { + names(residuals) <- ynames + names(mu) <- ynames + names(eta) <- ynames + wt <- rep.int(0, nobs) + wt[good] <- w^2 + names(wt) <- ynames + names(weights) <- ynames + names(y) <- ynames + wtdmu <- if (intercept){ + sum(weights * y)/sum(weights) + } + else{ + linkinv(offset) + } + nulldev <- sum(dev.resids(y, wtdmu, weights)) + n.ok <- nobs - sum(w == 0) + nulldf <- n.ok - as.integer(intercept) + + rank <- if (EMPTY){ + 0 + } + else{ + fit$rank + } + resdf <- n.ok - rank + aic.model <- aic(y, n.ok, mu, weights, dev) + 2 * rank + + list (residuals=residuals, + mu=mu, + eta=eta, + wt = wt, + weights = weights, + y=y, + wtdmu=wtdmu, + nulldev=nulldev, + n.ok=n.ok, + nulldf=nulldf, + rank=rank, + resdf=resdf, + aic.model=aic.model) +} + + +predict.bayesglm <- function (object, newdata = NULL, type = c("link", "response", + "terms"), se.fit = FALSE, dispersion = NULL, terms = NULL, + na.action = na.pass, ...) +{ + type <- match.arg(type) + na.act <- object$na.action + object$na.action <- NULL + if (!se.fit) { + if (missing(newdata)) { + pred <- switch(type, link = object$linear.predictors, + response = object$fitted.values, terms = predictLM(object, + se.fit = se.fit, scale = 1, type = "terms", + terms = terms)) + if (!is.null(na.act)) + pred <- napredict(na.act, pred) + } + else { + pred <- predictLM(object, newdata, se.fit, scale = 1, + type = ifelse(type == "link", "response", type), + terms = terms, na.action = na.action) + switch(type, response = { + pred <- family(object)$linkinv(pred) + }, link = , terms = ) + } + } + else { + if (inherits(object, "survreg")) + dispersion <- 1 + if (is.null(dispersion) || dispersion == 0) + dispersion <- summary(object, dispersion = dispersion)$dispersion + residual.scale <- as.vector(sqrt(dispersion)) + pred <- predictLM(object, newdata, se.fit, scale = residual.scale, + type = ifelse(type == "link", "response", type), + terms = terms, na.action = na.action) + fit <- pred$fit + se.fit <- pred$se.fit + switch(type, response = { + se.fit <- se.fit * abs(family(object)$mu.eta(fit)) + fit <- family(object)$linkinv(fit) + }, link = , terms = ) + if (missing(newdata) && !is.null(na.act)) { + fit <- napredict(na.act, fit) + se.fit <- napredict(na.act, se.fit) + } + pred <- list(fit = fit, se.fit = se.fit, residual.scale = residual.scale) + } + pred +} + +predictLM <- function (object, newdata, se.fit = FALSE, scale = NULL, df = Inf, + interval = c("none", "confidence", "prediction"), level = 0.95, + type = c("response", "terms"), terms = NULL, na.action = na.pass, + pred.var = res.var/weights, weights = 1, ...) +{ + tt <- terms(object) + keep.order <- object$keep.order + drop.baseline <- object$drop.baseline + if (!inherits(object, "lm")) + warning("calling predict.lm() ...") + if (missing(newdata) || is.null(newdata)) { + mm <- X <- model.matrix(object) + mmDone <- TRUE + offset <- object$offset + } + else { + Terms <- delete.response(tt) + m <- model.frame(Terms, newdata, na.action = na.action, + xlev = object$xlevels) + if (!is.null(cl <- attr(Terms, "dataClasses"))) + .checkMFClasses(cl, m) + X <- model.matrixBayes(Terms, m, contrasts.arg = object$contrasts, keep.order = keep.order, drop.baseline = drop.baseline) + offset <- rep(0, nrow(X)) + if (!is.null(off.num <- attr(tt, "offset"))) + for (i in off.num) offset <- offset + eval(attr(tt, + "variables")[[i + 1]], newdata) + if (!is.null(object$call$offset)) + offset <- offset + eval(object$call$offset, newdata) + mmDone <- FALSE + } + n <- length(object$residuals) + p <- object$rank + p1 <- seq_len(p) + piv <- if (p) + getQr(object)$pivot[p1] + if (p < ncol(X) && !(missing(newdata) || is.null(newdata))) + warning("prediction from a rank-deficient fit may be misleading") + beta <- object$coefficients + predictor <- drop(X[, piv, drop = FALSE] %*% beta[piv]) + if (!is.null(offset)) + predictor <- predictor + offset + interval <- match.arg(interval) + if (interval == "prediction") { + if (missing(newdata)) + warning("Predictions on current data refer to _future_ responses\n") + if (missing(newdata) && missing(weights)) { + w <- .weights.default(object) + if (!is.null(w)) { + weights <- w + warning("Assuming prediction variance inversely proportional to weights used for fitting\n") + } + } + if (!missing(newdata) && missing(weights) && !is.null(object$weights) && + missing(pred.var)) + warning("Assuming constant prediction variance even though model fit is weighted\n") + if (inherits(weights, "formula")) { + if (length(weights) != 2L) + stop("'weights' as formula should be one-sided") + d <- if (missing(newdata) || is.null(newdata)) + model.frame(object) + else newdata + weights <- eval(weights[[2L]], d, environment(weights)) + } + } + type <- match.arg(type) + if (se.fit || interval != "none") { + res.var <- if (is.null(scale)) { + r <- object$residuals + w <- object$weights + rss <- sum(if (is.null(w)) r^2 else r^2 * w) + df <- object$df.residual + rss/df + } + else scale^2 + if (type != "terms") { + if (p > 0) { + XRinv <- if (missing(newdata) && is.null(w)) + qr.Q(getQr(object))[, p1, drop = FALSE] + else X[, piv] %*% qr.solve(qr.R(getQr(object))[p1, p1]) + ip <- drop(XRinv^2 %*% rep(res.var, p)) + } + else ip <- rep(0, n) + } + } + if (type == "terms") { + if (!mmDone) { + mm <- model.matrixBayes(object, keep.order = keep.order, drop.baseline = drop.baseline) + mmDone <- TRUE + } + aa <- attr(mm, "assign") + ll <- attr(tt, "term.labels") + hasintercept <- attr(tt, "intercept") > 0L + if (hasintercept) + ll <- c("(Intercept)", ll) + aaa <- factor(aa, labels = ll) + asgn <- split(order(aa), aaa) + if (hasintercept) { + asgn$"(Intercept)" <- NULL + if (!mmDone) { + mm <- model.matrixBayes(object, keep.order = keep.order, drop.baseline = drop.baseline) + mmDone <- TRUE + } + avx <- colMeans(mm) + termsconst <- sum(avx[piv] * beta[piv]) + } + nterms <- length(asgn) + if (nterms > 0) { + predictor <- matrix(ncol = nterms, nrow = NROW(X)) + dimnames(predictor) <- list(rownames(X), names(asgn)) + if (se.fit || interval != "none") { + ip <- matrix(ncol = nterms, nrow = NROW(X)) + dimnames(ip) <- list(rownames(X), names(asgn)) + Rinv <- qr.solve(qr.R(getQr(object))[p1, p1]) + } + if (hasintercept) + X <- sweep(X, 2L, avx, check.margin = FALSE) + unpiv <- rep.int(0L, NCOL(X)) + unpiv[piv] <- p1 + for (i in seq.int(1L, nterms, length.out = nterms)) { + iipiv <- asgn[[i]] + ii <- unpiv[iipiv] + iipiv[ii == 0L] <- 0L + predictor[, i] <- if (any(iipiv > 0L)) + X[, iipiv, drop = FALSE] %*% beta[iipiv] + else 0 + if (se.fit || interval != "none") + ip[, i] <- if (any(iipiv > 0L)) + as.matrix(X[, iipiv, drop = FALSE] %*% Rinv[ii, + , drop = FALSE])^2 %*% rep.int(res.var, + p) + else 0 + } + if (!is.null(terms)) { + predictor <- predictor[, terms, drop = FALSE] + if (se.fit) + ip <- ip[, terms, drop = FALSE] + } + } + else { + predictor <- ip <- matrix(0, n, 0L) + } + attr(predictor, "constant") <- if (hasintercept) + termsconst + else 0 + } + if (interval != "none") { + tfrac <- qt((1 - level)/2, df) + hwid <- tfrac * switch(interval, confidence = sqrt(ip), + prediction = sqrt(ip + pred.var)) + if (type != "terms") { + predictor <- cbind(predictor, predictor + hwid %o% + c(1, -1)) + colnames(predictor) <- c("fit", "lwr", "upr") + } + else { + if (!is.null(terms)) + hwid <- hwid[, terms, drop = FALSE] + lwr <- predictor + hwid + upr <- predictor - hwid + } + } + if (se.fit || interval != "none") { + se <- sqrt(ip) + if (type == "terms" && !is.null(terms) && !se.fit) + se <- se[, terms, drop = FALSE] + } + if (missing(newdata) && !is.null(na.act <- object$na.action)) { + predictor <- napredict(na.act, predictor) + if (se.fit) + se <- napredict(na.act, se) + } + if (type == "terms" && interval != "none") { + if (missing(newdata) && !is.null(na.act)) { + lwr <- napredict(na.act, lwr) + upr <- napredict(na.act, upr) + } + list(fit = predictor, se.fit = se, lwr = lwr, upr = upr, + df = df, residual.scale = sqrt(res.var)) + } + else if (se.fit) + list(fit = predictor, se.fit = se, df = df, residual.scale = sqrt(res.var)) + else predictor +} + + + +getQr <- function(x, ...){ + if (is.null(r <- x$qr)) + stop("lm object does not have a proper 'qr' component.\n Rank zero or should not have used lm(.., qr=FALSE).") + r +} + +logit <- function (x) { + log(x/(1-x)) +} + +##' Inverse of logistic transformation +##' +##' @export +##' @param x numeric +##' @return numeric +##' @examples +##' x <- 1:5 +##' invlogit(log(x/(1-x))) +invlogit <- function (x) { + 1/(1+exp(-x)) +} + +model.matrixBayes <- function(object, data = environment(object), + contrasts.arg = NULL, xlev = NULL, keep.order=FALSE, drop.baseline=FALSE,...) +{ + #class(object) <- c("terms", "formula") + t <- if( missing( data ) ) { + terms( object ) + }else{ + terms.formula(object, data = data, keep.order=keep.order) + } + attr(t, "intercept") <- attr(object, "intercept") + if (is.null(attr(data, "terms"))){ + data <- model.frame(object, data, xlev=xlev) + }else { + reorder <- match(sapply(attr(t,"variables"), deparse, width.cutoff=500)[-1], names(data)) + if (any(is.na(reorder))) { + stop( "model frame and formula mismatch in model.matrix()" ) + } + if(!identical(reorder, seq_len(ncol(data)))) { + data <- data[,reorder, drop = FALSE] + } + } + int <- attr(t, "response") + if(length(data)) { # otherwise no rhs terms, so skip all this + + if (drop.baseline){ + contr.funs <- as.character(getOption("contrasts")) + }else{ + contr.funs <- as.character(list("contr.bayes.unordered", "contr.bayes.ordered")) + } + + namD <- names(data) + ## turn any character columns into factors + for(i in namD) + if(is.character( data[[i]] ) ) { + data[[i]] <- factor(data[[i]]) + warning( gettextf( "variable '%s' converted to a factor", i ), domain = NA) + } + isF <- vapply(data, function(x) is.factor(x) || is.logical(x), NA) + isF[int] <- FALSE + isOF <- vapply(data, is.ordered, NA) + for( nn in namD[isF] ) # drop response + if( is.null( attr( data[[nn]], "contrasts" ) ) ) { + contrasts( data[[nn]] ) <- contr.funs[1 + isOF[nn]] + } + ## it might be safer to have numerical contrasts: + ## get(contr.funs[1 + isOF[nn]])(nlevels(data[[nn]])) + if ( !is.null( contrasts.arg ) && is.list( contrasts.arg ) ) { + if ( is.null( namC <- names( contrasts.arg ) ) ) { + stop( "invalid 'contrasts.arg' argument" ) + } + for (nn in namC) { + if ( is.na( ni <- match( nn, namD ) ) ) { + warning( gettextf( "variable '%s' is absent, its contrast will be ignored", nn ), domain = NA ) + } + else { + ca <- contrasts.arg[[nn]] + if( is.matrix( ca ) ) { + contrasts( data[[ni]], ncol( ca ) ) <- ca + } + else { + contrasts( data[[ni]] ) <- contrasts.arg[[nn]] + } + } + } + } + } else { # internal model.matrix needs some variable + isF <- FALSE + data <- data.frame(x=rep(0, nrow(data))) + } + #ans <- .Internal( model.matrix( t, data ) ) + ans <- model.matrix.default(object=t, data=data) + cons <- if(any(isF)){ + lapply( data[isF], function(x) attr( x, "contrasts") ) + }else { NULL } + attr(ans, "contrasts" ) <- cons + ans +} + +.weights.default <- function (object, ...) +{ + wts <- object$weights + if (is.null(wts)) + wts + else napredict(object$na.action, wts) +} diff --git a/R/lmWrapper-bayesglm.R b/R/lmWrapper-bayesglm.R index a9e3601..847e3a0 100644 --- a/R/lmWrapper-bayesglm.R +++ b/R/lmWrapper-bayesglm.R @@ -1,3 +1,10 @@ +##' @importFrom arm bayesglm.fit invlogit +##' @name invlogit +##' @title invlogit +##' @rdname invlogit +##' @export +NULL + ##' @include AllClasses.R ##' @include AllGenerics.R setMethod('fit', signature=c(object='BayesGLMlike', response='missing'), function(object, response, silent=TRUE, ...){ @@ -20,10 +27,10 @@ setMethod('fit', signature=c(object='BayesGLMlike', response='missing'), functio } } - contFit <- if(object@useContinuousBayes) .bayesglm.fit else glm.fit + contFit <- if(object@useContinuousBayes) arm::bayesglm.fit else glm.fit object@fitC <- do.call(contFit, c(list(x=object@modelMatrix[pos,,drop=FALSE], y=object@response[pos], weights=object@weightFun(object@response[pos])), fitArgsC)) - object@fitD <- do.call(.bayesglm.fit, c(list(x=object@modelMatrix, y=object@weightFun(object@response), family=binomial()), fitArgsD)) + object@fitD <- do.call(arm::bayesglm.fit, c(list(x=object@modelMatrix, y=object@weightFun(object@response), family=binomial()), fitArgsD)) object <- .glmDOF(object, pos) object <- .dispersion(object) diff --git a/R/lmWrapper-ridge.R b/R/lmWrapper-ridge.R index ad11ff6..0f4afff 100644 --- a/R/lmWrapper-ridge.R +++ b/R/lmWrapper-ridge.R @@ -20,10 +20,10 @@ setMethod('fit', signature=c(object='RidgeBGLMlike', response='missing'), functi } } - contFit <- if(object@useContinuousBayes) .bayesglm.fit else ridge.fit + contFit <- if(object@useContinuousBayes) arm::bayesglm.fit else ridge.fit fitArgsC$lambda<-object@lambda object@fitC <- do.call(contFit, c(list(x=object@modelMatrix[pos,,drop=FALSE], y=object@response[pos], weights=object@weightFun(object@response[pos])), fitArgsC)) - object@fitD <- do.call(.bayesglm.fit, c(list(x=object@modelMatrix, y=object@weightFun(object@response), family=binomial()), fitArgsD)) + object@fitD <- do.call(arm::bayesglm.fit, c(list(x=object@modelMatrix, y=object@weightFun(object@response), family=binomial()), fitArgsD)) object <- .glmDOF(object, pos) object <- .dispersion(object) diff --git a/R/stat_ell.R b/R/stat_ell.R index ec34b13..0181d67 100644 --- a/R/stat_ell.R +++ b/R/stat_ell.R @@ -14,9 +14,9 @@ StatEll <- ggproto("StatEll", Stat, compute_group = function(data, scales,level=0.95,invert=FALSE,alpha=1) { e=ell(x=data$x, xse=data$xse,y=data$y,yse=data$yse,radius=sqrt(qchisq(level,df=2))) if(invert==c("x")){ - e[,1] =invlogit(e[,1]) + e[,1] = invlogit(e[,1]) }else if(invert==c("y")){ - e[,2] =invlogit(e[,2]) + e[,2] = invlogit(e[,2]) } return(e) }, diff --git a/man/invlogit.Rd b/man/invlogit.Rd index 0b42bcf..13114f6 100644 --- a/man/invlogit.Rd +++ b/man/invlogit.Rd @@ -1,21 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/bayesglm.R +% Please edit documentation in R/lmWrapper-bayesglm.R \name{invlogit} \alias{invlogit} -\title{Inverse of logistic transformation} -\usage{ -invlogit(x) -} -\arguments{ -\item{x}{numeric} -} -\value{ -numeric -} +\title{invlogit} \description{ -Inverse of logistic transformation -} -\examples{ -x <- 1:5 -invlogit(log(x/(1-x))) +invlogit } diff --git a/man/predict.ZlmFit.Rd b/man/predict.ZlmFit.Rd index 230f9ac..f144c58 100644 --- a/man/predict.ZlmFit.Rd +++ b/man/predict.ZlmFit.Rd @@ -4,7 +4,7 @@ \alias{predict.ZlmFit} \title{Return predictions from a ZlmFit object.} \usage{ -\method{predict}{ZlmFit}(object, newdata = NULL, modelmatrix = NULL, ...) +predict.ZlmFit(object, newdata = NULL, modelmatrix = NULL, ...) } \arguments{ \item{object}{A \code{ZlmFit}} diff --git a/vignettes/MAITAnalysis.Rmd b/vignettes/MAITAnalysis.Rmd index eb45a8e..1a89fdf 100644 --- a/vignettes/MAITAnalysis.Rmd +++ b/vignettes/MAITAnalysis.Rmd @@ -111,7 +111,7 @@ We'll set a filtering threshold using the [Potter Stewart](https://en.wikipedia. ## Filtering ```{r filter_outlying_cell,results='hide'} sca <- subset(scaRaw,filterCrit) -eid <- select(TxDb.Hsapiens.UCSC.hg19.knownGene,keys = mcols(sca)$entrez,keytype ="GENEID",columns = c("GENEID","TXNAME")) +eid <- AnnotationDbi::select(TxDb.Hsapiens.UCSC.hg19.knownGene,keys = mcols(sca)$entrez,keytype ="GENEID",columns = c("GENEID","TXNAME")) ueid <- unique(na.omit(eid)$GENEID) sca <- sca[mcols(sca)$entrez %in% ueid,] ## Remove invariant genes @@ -264,7 +264,7 @@ predicted_sig <- merge(mcols(sca), predicted[primerid%in%entrez_to_plot], by='pr predicted_sig <- as.data.table(predicted_sig) ## plot with inverse logit transformed x-axis -ggplot(predicted_sig)+aes(x=invlogit(etaD),y=muC,xse=seD,yse=seC,col=sample)+ +ggplot(predicted_sig)+aes(x=arm::invlogit(etaD),y=muC,xse=seD,yse=seC,col=sample)+ facet_wrap(~symbolid,scales="free_y")+theme_linedraw()+ geom_point(size=0.5)+scale_x_continuous("Proportion expression")+ scale_y_continuous("Estimated Mean")+