From 0a6ed1f52c1f1cb965bd2ab9e82e21ab46423f28 Mon Sep 17 00:00:00 2001 From: Zhang Date: Fri, 13 Feb 2026 20:40:06 +0100 Subject: [PATCH] Prepare package for Bioconductor submission - DESCRIPTION: Rewrite Title/Description, add biocViews, URL, BugReports, VignetteBuilder, bump R >= 4.4.0, add BiocStyle/knitr/rmarkdown to Suggests - Add BiocStyle vignette (vignettes/ribiosExpression.Rmd) - Convert NEWS to NEWS.md - Add @return tags to all exported generics and functions - Fix typos (annotaiton, suing, coereced, DesignContras, feta.name) - Replace class(x) %in% with is.numeric() - Replace 1:length()/1:ncol()/1:nlevels() with seq_len()/seq_along() - Replace subset()/with() with direct $ indexing in truncateDgeTable - Remove internal Roche wiki URL from documentation - Update http to https for Broad Institute URLs - Fix positional argument in writeGct eSet method - Remove duplicate @exportMethod on setter generics - Add @format and @examples to data documentation - Update .Rbuildignore --- .Rbuildignore | 2 + DESCRIPTION | 25 +++-- NEWS.md | 175 +++++++++++++++++++++++++++++++++ R/AllGenerics.R | 21 ++-- R/AllMethods.R | 9 +- R/DesignContrast.R | 8 +- R/assertFullRank.R | 2 +- R/filter.R | 4 +- R/io_gct_cls.R | 4 +- R/io_gmt.R | 2 +- R/io_tab.R | 6 +- R/removeAllZeroVar.R | 2 +- R/ribiosExpressionSet.R | 3 + R/splitPCA.R | 3 +- R/summarizeSamples.R | 2 +- R/transformations.R | 6 +- R/truncateDgeTable.R | 25 +++-- R/writeVarMetadata.R | 4 +- vignettes/ribiosExpression.Rmd | 168 +++++++++++++++++++++++++++++++ 19 files changed, 422 insertions(+), 49 deletions(-) create mode 100644 NEWS.md create mode 100644 vignettes/ribiosExpression.Rmd diff --git a/.Rbuildignore b/.Rbuildignore index 83dc6a1e..bc09c5ca 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -7,3 +7,5 @@ ^Makefile$ ^coverage\.txt$ ^coverage\.r$ +^\.github$ +^NEWS$ diff --git a/DESCRIPTION b/DESCRIPTION index e35cd986..8291f247 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,16 +1,21 @@ Package: ribiosExpression Type: Package -Title: Expression Analysis with RibiosExpression +Title: Data Structures and Utilities for Gene Expression Analysis Version: 1.3.5 -Date: 2025-01-30 Authors@R: person(given = "Jitao David", family = "Zhang", - role = c("aut", "cre", "ctb"), + role = c("aut", "cre"), email = "jitao_david.zhang@roche.com", comment = c(ORCID="0000-0002-3085-0909")) -Description: Data structures and functions for expression analysis. -Depends: R (>= 3.5.0) +Description: Provides data structures and utility functions for gene expression + analysis. It includes the DesignContrast class for representing study + designs and contrasts used in differential expression analysis, functions + for importing and exporting expression data in GCT/CLS formats, tools for + probeset summarization and filtering, and interfaces to limma-based + differential gene expression workflows. The package works with Biobase + ExpressionSet objects and integrates with the limma framework. +Depends: R (>= 4.4.0) Imports: methods, stats, @@ -28,9 +33,17 @@ Imports: openxlsx Suggests: testthat, - grid + grid, + BiocStyle, + knitr, + rmarkdown License: GPL-3 Encoding: UTF-8 +URL: https://github.com/bedapub/ribiosExpression +BugReports: https://github.com/bedapub/ribiosExpression/issues +biocViews: GeneExpression, DifferentialExpression, Microarray, + DataImport, Visualization +VignetteBuilder: knitr RoxygenNote: 7.3.2 Collate: 'AllClasses.R' diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 00000000..68f8b333 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,175 @@ +# Changes in ribiosExpression + +## Changes in version 1.3.5 + +### Bioconductor preparation +- Added `biocViews` to DESCRIPTION. +- Added BiocStyle vignette. +- Updated R dependency to >= 4.4.0. +- Added `@return` tags to all exported functions. +- Fixed typos in documentation. +- Replaced `class(x) %in%` with `is.numeric()`. +- Converted NEWS to NEWS.md format. + +## Changes in version 1.0-39 + +- `parseDesignContrast` adds new parameter `expSampleNames` to allow sample + name consistency check with input matrix. + +## Changes in version 1.0-34 + +- `DesignContrast` prints error message if validity is violated. +- `parseDesignContrastFile` fixes a mistake of assigning group levels. +- `parseDesignContrastFile` now accepts the `dispLevelStr` option. + +## Changes in version 1.0-33 + +- `parseDesignContrastStr` fixes two errors. + +## Changes in version 1.0-32 + +- Add `DesignContrast` object. + +## Changes in version 1.0-29 + +- Add new functionality in keepMaxMeanProbes.Rscript: able to read from .CHIP + file. +- keepMaxMeanProbes.Rscript now does not depend on the S4 class ExpressionSet + anymore, therefore much faster. + +## Changes in version 1.0-28 + +- Debug keepMaxMeanProbes.Rscript: `extname` returns NA if not found, therefore + an additional check is added. + +## Changes in version 1.0-27 + +- Function extension in `ChipFetcher2ExpressionSet`: now non-Affymetrix + probesets are supported as well. + +## Changes in version 1.0-26 + +- The main body of `readCls` has been moved to `ribiosIO::read_cls`. `readCls` + becomes a synonym for compatibility. + +## Changes in version 1.0-25 + +- Improvement in `readCls`: error messages provide more informative details for + mal-formatted files. + +## Changes in version 1.0-24 + +- Bug fix in `writeEset`: expression matrix is written without quotes. +- New script: keepMaxMeanProbes.Rscript filters duplicated genes by taking the + one with maximum average expression. + +## Changes in version 1.0-23 + +- Bug fix in `readGct`: if input feature names have duplicates, the exprs + matrix row names are changed. + +## Changes in version 1.0-22 + +- Move exprsMat2gct.Rscript to the ribiosIO package. +- `writeGct` has been refactored for matrix in ribiosIO, and re-implemented as + an S4-method for matrix and ExpressionSet. + +## Changes in version 1.0-21 + +- Add exprsMat2gct.Rscript to convert between expression matrix files and GCT + files. + +## Changes in version 1.0-20 + +- `ChipFetcher2ExpressionSet`: add option orthologue. +- `keepMaxStatProbe` uses `function(x) mean(x, na.rm=TRUE)` as default + function. + +## Changes in version 1.0-19 + +- limma2gsea.Rscript is updated. + +## Changes in version 1.0-18 + +- Refactor GSEA-related functions that are not dependent on the Biobase data + structure to the ribiosGSEA package. + +## Changes in version 1.0-17 + +- limma2gsea.Rscript guarantees no replicated gene symbols after orthologue + mapping. + +## Changes in version 1.0-16 + +- Add limma2gsea.Rscript to inst/Rscript. + +## Changes in version 1.0-15 + +- The ribiosAnnotation package has been removed from the dependency list. +- Add comp_diff.Rscript to inst/Rscript. + +## Changes in version 1.0-14 + +- The deprecation message of `remainHighestVarProbe` and `keepHighestVarProbe` + have been updated. + +## Changes in version 1.0-13 + +- The `rowscale` method warns if the input ExpressionSet does not have + 'lockedEnvironment' as storageMode. + +## Changes in version 1.0-12 + +- Low-level functions for computational drug repositioning refactored into + ribiosReposition. + +## Changes in version 1.0-11 + +- Add feature in `summarizeProbesets`: option `keep.featureNames`. +- Rename and rewrite `sprintGmt` to `formatGmt` as S4 method. + +## Changes in version 1.0-10 + +- Bugfix in `readEset` for numeric column names. +- Add `annotate` and `reannotate` methods. +- Add documentation for the `ribios.ExpressionSet` object. + +## Changes in version 1.0-9 + +- Add `kendallW` function and `kendallWmat` S4-method. + +## Changes in version 1.0-8 + +- PCA-related data structures and functions are refactored into the ribiosPCA + package. + +## Changes in version 1.0-7 + +- Add `summarizeProbesets` to summarize probesets. +- NAMESPACE are now explicitly stated. + +## Changes in version 1.0-6 + +- Add dependency on the ribiosAnnotation package. +- Add `writeEset` and `readEset` functions. +- `readFKdata` was renamed as `readFKtable`. + +## Changes in version 1.0-5 + +- Add git version control. + +## Changes in version 1.0-4 + +- Move data.frame and matrix functions to the ribiosUtils package. + +## Changes in version 1.0-3 + +- Refactor `read_gct` into ribiosIO for efficiency. + +## Changes in version 1.0-2 + +- Change `keepHighestVarProbe` to more generalized `keepMaxStatProbe`. + +## Changes in version 1.0-1 + +- Remove the path option from the `readGctCls` function. diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 1cb1b4bb..1d99b3bc 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -16,12 +16,14 @@ setGeneric("exprsToLong", function(x,...) standardGeneric("exprsToLong")) #' Extract contrastNames from an object #' @param object An object, see supported methods below +#' @return A character vector of contrast names #' @aliases contrastNames,DesignContrast-method #' @exportMethod contrastNames setGeneric("contrastNames", function(object) standardGeneric("contrastNames")) #' Extract design variable names from an object #' @param object An object, see supported methods below +#' @return A character vector of variable names #' @aliases designVariables,DesignContrast-method #' @exportMethod designVariables setGeneric("designVariables", function(object) @@ -30,6 +32,7 @@ setGeneric("designVariables", function(object) #' Extract sample groups from an object #' #' @param object An object, see supported methods below +#' @return A factor of sample groups #' @exportMethod groups #' @aliases groups,DesignContrast-method setGeneric("groups", function(object) standardGeneric("groups")) @@ -37,6 +40,7 @@ setGeneric("groups", function(object) standardGeneric("groups")) #' Extract displayed group labels from an object #' #' @param object An object, see supported methods below +#' @return A factor of display group labels #' @exportMethod dispGroups #' @aliases dispGroups,DesignContrast-method setGeneric("dispGroups", function(object) standardGeneric("dispGroups")) @@ -44,6 +48,7 @@ setGeneric("dispGroups", function(object) standardGeneric("dispGroups")) #' Extract the design matrix from an object #' #' @param object An object, see supported methods below +#' @return A numeric design matrix #' @exportMethod designMatrix #' @aliases designMatrix,DesignContrast-method setGeneric("designMatrix", function(object) standardGeneric("designMatrix")) @@ -52,13 +57,14 @@ setGeneric("designMatrix", function(object) standardGeneric("designMatrix")) #' #' @param object An object, see supported methods below #' @param value Design matrix -#' @exportMethod `designMatrix<-` +#' @return The modified object setGeneric("designMatrix<-", function(object, value) standardGeneric("designMatrix<-")) #' Extract the contrast matrix from an object #' #' @param object An object, see supported methods below +#' @return A numeric contrast matrix #' @exportMethod contrastMatrix #' @aliases contrastMatrix,DesignContrast-method setGeneric("contrastMatrix", function(object) standardGeneric("contrastMatrix")) @@ -67,22 +73,23 @@ setGeneric("contrastMatrix", function(object) standardGeneric("contrastMatrix")) #' #' @param object An object, see supported methods below #' @param value Contrast matrix -#' @exportMethod `contrastMatrix<-` +#' @return The modified object setGeneric("contrastMatrix<-", function(object, value) standardGeneric("contrastMatrix<-")) #' Extract the contrast annotation data.frame from an object #' #' @param object An object, see supported methods below +#' @return A data.frame annotating contrasts #' @exportMethod contrastAnnotation #' @aliases contrastAnnotation,DesignContrast-method setGeneric("contrastAnnotation", function(object) standardGeneric("contrastAnnotation")) -#' Assign contrast annotaiton to an object +#' Assign contrast annotation to an object #' #' @param object An object, see supported methods below -#' @param value Contrast anaotation data.frame -#' @exportMethod `contrastAnnotation<-` +#' @param value Contrast annotation data.frame +#' @return The modified object setGeneric("contrastAnnotation<-", function(object, value) standardGeneric("contrastAnnotation<-")) @@ -90,6 +97,7 @@ setGeneric("contrastAnnotation<-", function(object, value) #' Extract the number of contrasts from an object #' #' @param object An object, see supported methods below +#' @return An integer, number of contrasts #' @aliases nContrast,DesignContrast-method #' @exportMethod nContrast setGeneric("nContrast", function(object) standardGeneric("nContrast")) @@ -240,12 +248,13 @@ setGeneric("annotate", setGeneric("reannotate", function(object, check.target,...) standardGeneric("reannotate")) -#' Export matrix or eEset that can be coereced as one into gct/cls files +#' Export matrix or eSet that can be coerced as one into gct/cls files #' @keywords methods #' @param obj The input object, see methods below for supported data types #' @param file The output file #' @param feat.name Specifying feature names #' @param feat.desc Specifying feature descriptions +#' @return Used for its side effect of writing files; returns invisibly. #' @aliases writeGct,matrix,ANY,ANY,ANY-method writeGct,eSet,ANY,ANY,ANY-method #' @exportMethod writeGct setGeneric("writeGct", function(obj, file, feat.name, feat.desc) diff --git a/R/AllMethods.R b/R/AllMethods.R index 50b97e67..35e4a792 100644 --- a/R/AllMethods.R +++ b/R/AllMethods.R @@ -12,7 +12,7 @@ return(object@groups) }) #' @describeIn dispGroups Return the sample groups from a DesignContrast object -#' , suing display labels +#' , using display labels #' @export setMethod("dispGroups", "DesignContrast", function(object) { groups <- object@groups @@ -68,7 +68,7 @@ setReplaceMethod("contrastAnnotation", "DesignContrast", function(object, value) return(object) }) -#' @describeIn nContrast Return the number of contrast in a DesignContras +#' @describeIn nContrast Return the number of contrasts in a DesignContrast #' object #' @export setMethod("nContrast", "DesignContrast", function(object) { @@ -96,7 +96,7 @@ setMethod("exprsToLong", "matrix", function(x, idvar="illID",timevar="hybridID", ids=rownames(x), valueType="raw") { x <- as.data.frame(x) colnames(x) <- paste(valuevar, colnames(x), sep=".") - va <- 1:ncol(x) + va <- seq_len(ncol(x)) x[,idvar] <- ids xLong <- reshape(x, idvar=idvar, varying=va, timevar=timevar, direction="long") rownames(xLong) <- NULL @@ -115,6 +115,7 @@ setMethod("exprsToLong", "eSet", function(x) { #' @param x An ExpressionSet object. #' @param center Logical, whether the mean values of rows should be set to zero. #' @param scale Logical, whether the standard deviations of rows should be normalised to one. +#' @return An ExpressionSet object with row-scaled expression values. #' #' @importFrom ribiosUtils rowscale #' @export @@ -190,7 +191,7 @@ setMethod("formatGmt", if(length(comment)==1) comment <- rep(comment, length(title)) stopifnot(identical(length(title), length(comment))) - sapply(1:length(genes), + sapply(seq_along(genes), function(x) formatGmt(title[x], comment[x], genes[[x]]) ) }) diff --git a/R/DesignContrast.R b/R/DesignContrast.R index e1173f16..e4fa48a3 100644 --- a/R/DesignContrast.R +++ b/R/DesignContrast.R @@ -18,7 +18,7 @@ design2group <- function(designMatrix) { groups <- apply(designMatrix[,useCol, drop=FALSE], 1, paste, collapse="") res <- factor(groups) - levels(res) <- sprintf("AutoGroup_%02d", 1:nlevels(res)) + levels(res) <- sprintf("AutoGroup_%02d", seq_len(nlevels(res))) return(res) } @@ -55,7 +55,7 @@ DesignContrast <- function(designMatrix, contrastMatrix <- matrix(nrow=ncol(designMatrix), dimnames=list(colnames(designMatrix), NULL)) if(is.null(contrastAnnotation)) { contrastNames <- colnames(contrastMatrix) - if(is.null(contrastNames)) contrastNames <- 1:ncol(contrastMatrix) + if(is.null(contrastNames)) contrastNames <- seq_len(ncol(contrastMatrix)) contrastAnnotation <- data.frame(row.names=contrastNames) } res <- new("DesignContrast", @@ -261,7 +261,7 @@ parseDesignContrast <- function(designFile=NULL, contrastFile=NULL, .contrastSampleIndices<- function(descon, contrast) { contrastMat <- contrastMatrix(descon) designMat <- designMatrix(descon) - haltifnot(contrast %in% colnames(contrastMat) || contrast %in% 1:ncol(contrastMat), + haltifnot(contrast %in% colnames(contrastMat) || contrast %in% seq_len(ncol(contrastMat)), msg=sprintf("contrast '%s' not found in the contrast matrix", contrast)) currContrast <- contrastMat[, contrast] @@ -292,7 +292,7 @@ setMethod("contrastSampleIndices", c("DesignContrast", "numeric"), function(obje .contrastSampleIndices(object, contrast) }) -assert_is_range <- function(x) stopifnot(length(x)==2 && class(x) %in% c("integer", "numeric")) +assert_is_range <- function(x) stopifnot(length(x)==2 && is.numeric(x)) #' Plot a DesignContrast object with two heatmaps #' diff --git a/R/assertFullRank.R b/R/assertFullRank.R index 0fc85800..33443372 100644 --- a/R/assertFullRank.R +++ b/R/assertFullRank.R @@ -26,7 +26,7 @@ assertFullRank <- function(matrix) { #' removeColRank(myMat) #' @export removeColRank <- function(matrix) { - colind <- 1:ncol(matrix) + colind <- seq_len(ncol(matrix)) nc <- ncol(matrix) full <- c(ncol=nc, rank=Matrix::rankMatrix(matrix)) res <- t(sapply(colind, function(i){ diff --git a/R/filter.R b/R/filter.R index 8016ec11..bdf7c4bb 100644 --- a/R/filter.R +++ b/R/filter.R @@ -105,9 +105,9 @@ keepMaxStatProbe <- function(eset, probe.index.name, keepNAprobes=TRUE, } probe.indexed.fac <- factor(fData(eset.indexed)[,probe.index.name]) - probe.by.index <- split(1:dim(eset.indexed)[1], probe.indexed.fac) + probe.by.index <- split(seq_len(dim(eset.indexed)[1]), probe.indexed.fac) stat.by.index <- split(eset.indexed.featureStat, probe.indexed.fac) - max.probes <- sapply(1:nlevels(probe.indexed.fac), + max.probes <- sapply(seq_len(nlevels(probe.indexed.fac)), function(x) probe.by.index[[x]][ which.max(stat.by.index[[x]]) ]) eset.remain <- rep(FALSE, dim(eset)[1]) diff --git a/R/io_gct_cls.R b/R/io_gct_cls.R index 316dcb50..30d469f9 100644 --- a/R/io_gct_cls.R +++ b/R/io_gct_cls.R @@ -31,7 +31,7 @@ setMethod("writeGct", feat.name <- getDfCol(fd, feat.name) if(!missing(feat.desc)) feat.desc <- getDfCol(fd, feat.desc) - write_gct(exprs(obj), file=file, feat.name=feat.name, feat.desc) + write_gct(exprs(obj), file=file, feat.name=feat.name, feat.desc=feat.desc) }) @@ -101,7 +101,7 @@ writeCls <- function(eset, file=stdout(), sample.group.col="group") { #' #' See \code{\link{readGctCls}} for importing functions. #' @references -#' \url{http://www.broadinstitute.org/gsea/doc/GSEAUserGuideTEXT.htm} +#' \url{https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm} #' @examples #' #' data(sample.ExpressionSet, package="Biobase") diff --git a/R/io_gmt.R b/R/io_gmt.R index bb569b01..bddd6f2c 100644 --- a/R/io_gmt.R +++ b/R/io_gmt.R @@ -29,7 +29,7 @@ #' @note It is user's responsibility to check that all GRP files do exist and #' are readable. #' @author Jitao David Zhang -#' @references See \url{http://www.broadinstitute.org/cmap/index.jsp} for the +#' @references See \url{https://www.broadinstitute.org/connectivity-map-cmap} for the #' use of GRP files in the Connectivity Map web tool. #' @examples #' diff --git a/R/io_tab.R b/R/io_tab.R index 52d97d66..914f3da8 100644 --- a/R/io_tab.R +++ b/R/io_tab.R @@ -94,7 +94,8 @@ writeEset <- function(eset,exprs.file,fData.file,pData.file, #' @param sampleGroups.file Character, file name where the information of sample groups is written to. #' @param sampleGroupLevels.file Character, file name where the information of sample group levels is written to. #' -#' The function is used to export sample group and group level information for differential gene expression analysis. +#' @details The function is used to export sample group and group level information for differential gene expression analysis. +#' @return Used for its side effect of writing files. Returns invisibly. #' #' @examples #' writeSampleGroups(gl(3,4), stdout(), stdout()) @@ -115,13 +116,14 @@ writeSampleGroups <- function(sampleGroups, sampleGroups.file, sampleGroupLevels #' @param header Logical, whether a head line is present #' @param ... Passed to \code{\link{readFKtable}} #' -#' The function can read in eSet object saved by \code{\link{writeEset}} by parsing +#' @details The function can read in eSet object saved by \code{\link{writeEset}} by parsing #' three plain text files: \code{exprs.file}, \code{fData.file}, and \code{pData.file}. #' #' Currently both \code{tsv} and \code{gct} formats are supported for expression #' file. #' #' See \code{writeEset} for limitations of these functions. +#' @return An \code{ExpressionSet} object. #' #' @seealso \code{\link{writeEset}}, \code{\link{readFKtable}}. #' diff --git a/R/removeAllZeroVar.R b/R/removeAllZeroVar.R index 0f73225f..b0cd00ab 100644 --- a/R/removeAllZeroVar.R +++ b/R/removeAllZeroVar.R @@ -42,7 +42,7 @@ removeAllZeroVar.matrix <- function(obj, contrasts) { if(length(indInvalidContrast)==0) { keepContrast <- rep(TRUE, ncol(contrasts)) } else { - keepContrast <- setdiff(1:ncol(contrasts), indInvalidContrast) + keepContrast <- setdiff(seq_len(ncol(contrasts)), indInvalidContrast) } notEstContrasts <- colnames(contrasts)[indInvalidContrast] contrasts <- contrasts[!isNotEst, keepContrast] diff --git a/R/ribiosExpressionSet.R b/R/ribiosExpressionSet.R index 35710738..37937c9c 100644 --- a/R/ribiosExpressionSet.R +++ b/R/ribiosExpressionSet.R @@ -2,6 +2,9 @@ #' #' @name ribiosExpressionSet #' @docType data +#' @format An \code{ExpressionSet} object. #' @author Jitao David Zhang \email{jitao_david.zhang@roche.com} #' @keywords data +#' @examples +#' data(ribiosExpressionSet) NULL diff --git a/R/splitPCA.R b/R/splitPCA.R index d54fff94..c25cabc2 100644 --- a/R/splitPCA.R +++ b/R/splitPCA.R @@ -4,6 +4,7 @@ #' @param factor One or more factor vectors, used to split the eSet object #' @param func Function to retrieve values from split sub-eset objects #' @param ... Passed to \code{pcaScores} +#' @return A \code{data.frame} of PCA scores combined from all splits. #' #' @examples #' data(ribios.ExpressionSet, package="ribiosExpression") @@ -11,7 +12,7 @@ #' pcaScore1 <- splitPCA(ribios.ExpressionSet, fac1) #' @export splitPCA <- function(eset, factor, func=function(e) exprs(e), ...) { - resList <- tapply(1:ncol(eset), factor, function(i) { + resList <- tapply(seq_len(ncol(eset)), factor, function(i) { if(length(i)==0) return(NULL) if(length(i)==1) { diff --git a/R/summarizeSamples.R b/R/summarizeSamples.R index 6bd420af..b40c40b7 100644 --- a/R/summarizeSamples.R +++ b/R/summarizeSamples.R @@ -53,7 +53,7 @@ summarizeSamples <- function(eset, indSamples=eset$SAMPLEID, removeInvarCols=TRU indSamples, fun=fun, ...) assign(item, item.pool, envir=assayDataEnv) } - eset.pd <- do.call(rbind, tapply(1:ncol(eset), indSamples, function(x) { + eset.pd <- do.call(rbind, tapply(seq_len(ncol(eset)), indSamples, function(x) { pData(eset)[x[1], , drop=FALSE] })) if(ncol(eset.pd)==1) { diff --git a/R/transformations.R b/R/transformations.R index 13004fbc..d22d0f7b 100644 --- a/R/transformations.R +++ b/R/transformations.R @@ -25,7 +25,8 @@ matrixToLongTable <- function(x, valueLabel="value", rowLabel="row", colLabel="c #' @param df A \code{data.frame} #' @param prefix A character string, the prefix to be used if an column's name is empty. #' -#' If any column has an empty string as name, its replaced by the prefix appended by an index starting from 1 +#' @details If any column has an empty string as name, its replaced by the prefix appended by an index starting from 1 +#' @return A \code{data.frame} with fixed column names. #' @examples #' testDf <- data.frame("Col1"=LETTERS[1:3], "Col2"=letters[2:4]) #' colnames(testDf) <- c("", "") @@ -59,7 +60,8 @@ vectorizeExprs <- function(exp) { #' @param exprsFun A function to extract expression values, by default \code{exprs} #' @param includeOtherAssayData Logical, whether other elements in the \code{assayData} environment (if present) should be returned. #' -#' The function extracts exprs (and other values in the \code{assayData} environment), and return it in a long data.frame format with phenotypic data +#' @details The function extracts exprs (and other values in the \code{assayData} environment), and return it in a long data.frame format with phenotypic data +#' @return A \code{data.frame} in long format. #' #' @examples #' data(ribios.ExpressionSet, package="ribiosExpression") diff --git a/R/truncateDgeTable.R b/R/truncateDgeTable.R index 34b650a9..e2a69810 100644 --- a/R/truncateDgeTable.R +++ b/R/truncateDgeTable.R @@ -26,35 +26,32 @@ limmaTopTable2dgeTable <- function(limmaTopTable) { return(limmaTopTable) } -## truncateDgeTable uses logFC -utils::globalVariables(c("logFC")) - #' Truncate dgeTable into tables of positively and negatively differentially expressed genes according to the pre-defined criteria #' #' @param dgeTable dgeTable A DGEtable defined in ribiosExpression. Notice that the column names returned by limma::topTable are remapped (see limmaTopTable2dgeTable). #' @return A list of two elements: 'pos' and 'neg'. Each contains a dgeTable of positively/negatively regulated genes -#' @references The logic is described at http://rochewiki.roche.com/confluence/display/BIOINFO/Substream+Algorithm #' #' @export truncateDgeTable <- function(dgeTable) { dgeTable <- sortByCol(dgeTable, "PValue", decreasing=FALSE) - cond1 <- with(dgeTable, abs(logFC)>=1 & FDR<0.10) - cond2 <- with(dgeTable, abs(logFC)>=1 & PValue<0.05) + cond1 <- abs(dgeTable$logFC)>=1 & dgeTable$FDR<0.10 + cond2 <- abs(dgeTable$logFC)>=1 & dgeTable$PValue<0.05 if(sum(cond1)>=200) { - posTbl <- subset(dgeTable[cond1,], logFC>0) - negTbl <- subset(dgeTable[cond1,], logFC<0) + posTbl <- dgeTable[cond1 & dgeTable$logFC>0, , drop=FALSE] + negTbl <- dgeTable[cond1 & dgeTable$logFC<0, , drop=FALSE] } else if(sum(cond2)>=200) { - posTbl <- subset(dgeTable[cond2,], logFC>0) - negTbl <- subset(dgeTable[cond2,], logFC<0) + posTbl <- dgeTable[cond2 & dgeTable$logFC>0, , drop=FALSE] + negTbl <- dgeTable[cond2 & dgeTable$logFC<0, , drop=FALSE] } else { ntop <- pmin(400, pmin(nrow(dgeTable), pmax(100, as.integer(nrow(dgeTable)*0.05)))) - posTbl <- subset(dgeTable[1:ntop,], logFC>0) - negTbl <- subset(dgeTable[1:ntop,], logFC<0) + topTbl <- dgeTable[seq_len(ntop), , drop=FALSE] + posTbl <- topTbl[topTbl$logFC>0, , drop=FALSE] + negTbl <- topTbl[topTbl$logFC<0, , drop=FALSE] } maxRow <- 150 - if(nrow(posTbl)>maxRow) posTbl <- posTbl[1:maxRow,] - if(nrow(negTbl)>maxRow) negTbl <- negTbl[1:maxRow,] + if(nrow(posTbl)>maxRow) posTbl <- posTbl[seq_len(maxRow), , drop=FALSE] + if(nrow(negTbl)>maxRow) negTbl <- negTbl[seq_len(maxRow), , drop=FALSE] return(list(pos=posTbl, neg=negTbl)) } diff --git a/R/writeVarMetadata.R b/R/writeVarMetadata.R index 688e2875..decbe689 100644 --- a/R/writeVarMetadata.R +++ b/R/writeVarMetadata.R @@ -28,10 +28,10 @@ writeVarMetadata.default <- function(x, openxlsx::addWorksheet(wb, sheet) prependStyle <- openxlsx::createStyle(fgFill="#CCFFCC", fontColour="#003300") openxlsx::writeData(wb, sheet, prepend) - openxlsx::addStyle(wb, sheet=sheet, prependStyle, rows=1:length(prepend), cols=1) + openxlsx::addStyle(wb, sheet=sheet, prependStyle, rows=seq_along(prepend), cols=1) colheadStyle <- openxlsx::createStyle(textDecoration = "bold") openxlsx::writeData(wb, sheet, df, startRow=length(prepend)+1) - openxlsx::addStyle(wb, sheet=sheet, colheadStyle, rows=length(prepend)+1, cols=1:ncol(df)) + openxlsx::addStyle(wb, sheet=sheet, colheadStyle, rows=length(prepend)+1, cols=seq_len(ncol(df))) res <- openxlsx::saveWorkbook(wb, path, overwrite=overwrite, returnValue=TRUE) return(invisible(res)) } diff --git a/vignettes/ribiosExpression.Rmd b/vignettes/ribiosExpression.Rmd new file mode 100644 index 00000000..a92ab65e --- /dev/null +++ b/vignettes/ribiosExpression.Rmd @@ -0,0 +1,168 @@ +--- +title: "Introduction to ribiosExpression" +author: + - name: Jitao David Zhang + affiliation: Roche Pharma Research and Early Development + email: jitao_david.zhang@roche.com +package: ribiosExpression +output: + BiocStyle::html_document: + toc: true + toc_depth: 2 +vignette: > + %\VignetteIndexEntry{Introduction to ribiosExpression} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +# Introduction + +The `ribiosExpression` package provides data structures and utility functions +for gene expression analysis. It extends the Biobase `ExpressionSet` class with +tools for: + +- **Study design and contrasts**: The `DesignContrast` class encapsulates design + matrices, contrast matrices, and grouping information commonly used in + differential expression analysis with `limma`. +- **I/O operations**: Import and export expression data in GCT/CLS formats, + tab-delimited files, and GMT gene set formats. +- **Probeset summarization and filtering**: Collapse multiple probesets per gene + and filter by summary statistics. +- **Expression data transformation**: Convert expression matrices and + `ExpressionSet` objects to long-format data frames for downstream analysis and + visualization. + +# Installation + +```{r install, eval=FALSE} +if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager") + +BiocManager::install("ribiosExpression") +``` + +# Quick start + +## Loading the package + +```{r load} +library(ribiosExpression) +library(Biobase) +``` + +## Working with DesignContrast objects + +The `DesignContrast` class is a central data structure for representing study +designs and contrasts. You can create one directly or parse it from strings: + +```{r design-contrast} +## One-way ANOVA design from strings +dc <- parseDesignContrast( + sampleGroups = "Control,Treatment,Control,Treatment,Control,Treatment", + groupLevels = "Control,Treatment", + dispLevels = "Ctrl,Trt", + contrasts = "Treatment-Control" +) +dc +``` + +Access the components: + +```{r access-dc} +designMatrix(dc) +contrastMatrix(dc) +groups(dc) +``` + +## Building from design and contrast matrices + +```{r build-dc} +myFac <- gl(3, 3, labels = c("baseline", "treat1", "treat2")) +myDesign <- model.matrix(~myFac) +colnames(myDesign) <- c("baseline", "treat1", "treat2") +myContrast <- limma::makeContrasts( + contrasts = c("treat1", "treat2"), + levels = myDesign +) +dc2 <- DesignContrast(myDesign, myContrast, groups = myFac) +dc2 +``` + +## Visualizing the design + +```{r plot-dc, fig.width=6, fig.height=6} +plot(dc2, title = "Example Design") +``` + +## Reading and writing expression data + +### Read an expression matrix into an ExpressionSet + +```{r read-exprs} +idir <- system.file("extdata", package = "ribiosExpression") +eset <- readExprsMatrix(file.path(idir, "sample_eset_exprs.txt")) +eset +``` + +### Read GCT/CLS files + +```{r read-gct} +gct_eset <- readGctCls(file.base = file.path(idir, "test")) +gct_eset +``` + +### Write ExpressionSet to files + +```{r write-eset} +exprs_file <- tempfile(fileext = ".tsv") +writeEset(eset, exprs_file, exprs.file.format = "tsv") +``` + +## Transforming expression data to long format + +```{r long-table} +data(ribios.ExpressionSet) +longTbl <- eSetToLongTable(ribios.ExpressionSet[1:5, 1:3]) +head(longTbl) +``` + +## Probeset summarization + +When multiple probesets map to the same gene, you can summarize them: + +```{r summarize} +data(ribios.ExpressionSet) +summarized <- summarizeProbesets( + ribios.ExpressionSet, + index.name = "GeneID", + fun = mean +) +summarized +``` + +## Filtering probesets + +Keep only the probeset with the maximum variance per gene: + +```{r filter} +filtered <- keepMaxStatProbe( + ribios.ExpressionSet, + probe.index.name = "GeneID", + stat = sd, + na.rm = TRUE +) +filtered +``` + +# Session info + +```{r session-info} +sessionInfo() +```