diff --git a/R/data.R b/R/data.R index 26003278..dfffb857 100644 --- a/R/data.R +++ b/R/data.R @@ -153,3 +153,51 @@ NULL #' @keywords data #' @usage data(openSeas.mm9, package = "compartmap") NULL + +#' hg38 genes as a GRanges object +#' +#' This object was generated using the TxDb.Hsapiens.UCSC.hg19.knownGene package. +#' The script used for this object is found in the inst/scripts directory +#' +#' @name hg38.tx.gr +#' @docType data +#' @author James Eapen \email{james.eapen@vai.org} +#' @keywords data +#' @usage data(hg38.tx.gr, package = "compartmap") +NULL + +#' hg19 genes as a GRanges object +#' +#' This object was generated using the TxDb.Hsapiens.UCSC.hg19.knownGene package. +#' The script used for this object is found in the inst/scripts directory +#' +#' @name hg19.tx.gr +#' @docType data +#' @author James Eapen \email{james.eapen@vai.org} +#' @keywords data +#' @usage data(hg19.tx.gr, package = "compartmap") +NULL + +#' mm9 genes as a GRanges object +#' +#' This object was generated using the TxDb.Mmusculus.UCSC.mm9.knownGene package. +#' The script used for this object is found in the inst/scripts directory +#' +#' @name mm9.tx.gr +#' @docType data +#' @author James Eapen \email{james.eapen@vai.org} +#' @keywords data +#' @usage data(mm9.tx.gr, package = "compartmap") +NULL + +#' mm10 genes as a GRanges object +#' +#' This object was generated using the TxDb.Mmusculus.UCSC.mm10.knownGene package. +#' The script used for this object is found in the inst/scripts directory +#' +#' @name mm10.tx.gr +#' @docType data +#' @author James Eapen \email{james.eapen@vai.org} +#' @keywords data +#' @usage data(mm10.tx.gr, package = "compartmap") +NULL diff --git a/R/getABSignal.R b/R/getABSignal.R index e36a6901..7dc2c7c8 100644 --- a/R/getABSignal.R +++ b/R/getABSignal.R @@ -5,6 +5,7 @@ #' @param x A list object from getCorMatrix #' @param squeeze Whether squeezing was used (implies Fisher's Z transformation) #' @param assay What kind of assay are we working on ("array", "atac", "array") +#' @param genome The genome to use for gene-density-based sign correction #' #' @return A list x to pass to getABSignal #' @@ -48,7 +49,12 @@ #' #Get A/B signal #' absignal <- getABSignal(bin.cor.counts) -getABSignal <- function(x, squeeze = FALSE, assay = c("rna", "atac", "array")) { +getABSignal <- function( + x, + squeeze = FALSE, + assay = c("rna", "atac", "array"), + genome = c("hg19", "hg38", "mm9", "mm10") +) { assay <- match.arg(assay) gr <- x$gr @@ -60,10 +66,18 @@ getABSignal <- function(x, squeeze = FALSE, assay = c("rna", "atac", "array")) { gr$pc <- meanSmoother(pc) message("Done smoothing.") + if (flipSign(gr, genome)) gr$pc <- -gr$pc gr$compartments <- extractOpenClosed(gr, assay = assay) return(gr) } +flipSign <- function(gr, genome) { + tx.gr <- getGenome(genome, "tx") + gene_count <- countOverlaps(gr, tx.gr) + open <- gr$pc > 0 + sum(gene_count[open]) < sum(gene_count[!open]) +} + #' Get the open and closed compartment calls based on sign of singular values #' #' @param gr Input GRanges with associated mcols that represent singular values diff --git a/R/getSVD.R b/R/getSVD.R index 3b13e1eb..48470349 100644 --- a/R/getSVD.R +++ b/R/getSVD.R @@ -20,16 +20,8 @@ getSVD <- function(matrix, k = 1, sing.vec = c("left", "right")) { sing.vec <- match.arg(sing.vec) matrix <- .centerMatrix(matrix) - p.mat <- .getUV(matrix, k, sing.vec) - csums <- colSums(matrix, na.rm = TRUE) - - # check for negative correlation - # flip sign as necessary to ensure signal is associated with pos. values - if (cor(csums, p.mat) < 0) { - p.mat <- -p.mat - } - - p.mat * sqrt(length(p.mat)) # Chromosome length normalization + V <- .getUV(matrix, k, sing.vec) + V * sqrt(length(V)) # Chromosome length normalization } # Centre the matrix subtracting the row means from each row diff --git a/R/utils.R b/R/utils.R index 7fba3fe1..0f1d840a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -101,7 +101,8 @@ removeEmptyBoots <- function(obj) { #' Get a GRanges object from bundled compartmap genomes #' #' @param genome The desired genome to use ("hg19", "hg38", "mm9", "mm10") -#' @param type The type of data - full genome or open sea regions +#' @param type The type of data - full genome (`"genome"`), transcripts +#' (`"tx"`), or open sea regions (`"openseas"`) #' #' @return Granges of the genome #' @@ -121,6 +122,7 @@ getGenome <- function( }) gr <- switch(type, genome = paste0(genome.name, ".gr"), + tx = paste0(genome.name, ".tx.gr"), openseas = paste0("openSeas.", genome.name) ) return(get(gr)) diff --git a/data/datalist b/data/datalist index 5b74ff58..337c5eeb 100644 --- a/data/datalist +++ b/data/datalist @@ -11,3 +11,7 @@ openSeas.hg38 openSeas.mm10 openSeas.mm9 ss3_umi_sce +hg19.tx.gr +hg38.tx.gr +mm10.tx.gr +mm9.tx.gr diff --git a/data/hg19.tx.gr.rda b/data/hg19.tx.gr.rda new file mode 100644 index 00000000..1824fa95 Binary files /dev/null and b/data/hg19.tx.gr.rda differ diff --git a/data/hg38.tx.gr.rda b/data/hg38.tx.gr.rda new file mode 100644 index 00000000..861f0597 Binary files /dev/null and b/data/hg38.tx.gr.rda differ diff --git a/data/mm10.tx.gr.rda b/data/mm10.tx.gr.rda new file mode 100644 index 00000000..52fd6764 Binary files /dev/null and b/data/mm10.tx.gr.rda differ diff --git a/data/mm9.tx.gr.rda b/data/mm9.tx.gr.rda new file mode 100644 index 00000000..8c739488 Binary files /dev/null and b/data/mm9.tx.gr.rda differ diff --git a/flake.nix b/flake.nix index 9134fbb0..9ae613c3 100644 --- a/flake.nix +++ b/flake.nix @@ -43,6 +43,10 @@ ]; rDevDeps = with pkgs.rPackages; [ + TxDb_Hsapiens_UCSC_hg19_knownGene + TxDb_Hsapiens_UCSC_hg38_knownGene + TxDb_Mmusculus_UCSC_mm9_knownGene + TxDb_Mmusculus_UCSC_mm10_knownGene biocthis covr devtools diff --git a/inst/scripts/create_species_seqlengths_granges.R b/inst/scripts/create_species_seqlengths_granges.R index 2923fdb1..38ce47f9 100644 --- a/inst/scripts/create_species_seqlengths_granges.R +++ b/inst/scripts/create_species_seqlengths_granges.R @@ -8,25 +8,38 @@ library(Homo.sapiens) hg19.gr <- as(seqinfo(Homo.sapiens), "GRanges") +library(TxDb.Hsapiens.UCSC.hg19.knownGene) +hg19.tx.gr <- sort(genes(TxDb.Hsapiens.UCSC.hg19.knownGene)) + #hg38 library(BSgenome.Hsapiens.UCSC.hg38) +hg38.gr <- as(seqinfo(BSgenome.Hsapiens.UCSC.hg38), "GRanges") -hg38.gr <- as(seqinfo(BSgenome.Hsapiens.UCSC.hg38), - "GRanges") +library(TxDb.Hsapiens.UCSC.hg38.knownGene) +hg38.tx.gr <- sort(genes(TxDb.Hsapiens.UCSC.hg38.knownGene)) #mm9 library(BSgenome.Mmusculus.UCSC.mm9) +mm9.gr <- as(seqinfo(BSgenome.Mmusculus.UCSC.mm9), "GRanges") -mm9.gr <- as(seqinfo(BSgenome.Mmusculus.UCSC.mm9), - "GRanges") +library(TxDb.Mmusculus.UCSC.mm9.knownGene) +mm9.tx.gr <- sort(genes(TxDb.Mmusculus.UCSC.mm9.knownGene)) #mm10 library(Mus.musculus) mm10.gr <- as(seqinfo(Mus.musculus), "GRanges") +library(TxDb.Mmusculus.UCSC.mm10.knownGene) +mm10.tx.gr <- sort(genes(TxDb.Mmusculus.UCSC.mm10.knownGene)) + #save -save(hg19.gr, file = "~/git_repos/compartmap/data/hg19_gr.rda") -save(hg38.gr, file = "~/git_repos/compartmap/data/hg38_gr.rda") -save(mm9.gr, file = "~/git_repos/compartmap/data/mm9_gr.rda") -save(mm10.gr, file = "~/git_repos/compartmap/data/mm10_gr.rda") +save(hg19.gr, file = "../../data/hg19_gr.rda") +save(hg38.gr, file = "../../data/hg38_gr.rda") +save(mm9.gr, file = "../../data/mm9_gr.rda") +save(mm10.gr, file = "../../data/mm10_gr.rda") + +save(hg19.tx.gr, file = "../../data/hg19.tx.gr.rda") +save(hg38.tx.gr, file = "../../data/hg38.tx.gr.rda") +save(mm9.tx.gr, file = "../../data/mm9.tx.gr.rda") +save(mm10.tx.gr, file = "../../data/mm10.tx.gr.rda") diff --git a/man/getABSignal.Rd b/man/getABSignal.Rd index fbafd482..781fa5b9 100644 --- a/man/getABSignal.Rd +++ b/man/getABSignal.Rd @@ -4,7 +4,12 @@ \alias{getABSignal} \title{Calculate Pearson correlations of smoothed eigenvectors} \usage{ -getABSignal(x, squeeze = FALSE, assay = c("rna", "atac", "array")) +getABSignal( + x, + squeeze = FALSE, + assay = c("rna", "atac", "array"), + genome = c("hg19", "hg38", "mm9", "mm10") +) } \arguments{ \item{x}{A list object from getCorMatrix} @@ -12,6 +17,8 @@ getABSignal(x, squeeze = FALSE, assay = c("rna", "atac", "array")) \item{squeeze}{Whether squeezing was used (implies Fisher's Z transformation)} \item{assay}{What kind of assay are we working on ("array", "atac", "array")} + +\item{genome}{The genome to use for gene-density-based sign correction} } \value{ A list x to pass to getABSignal diff --git a/man/getGenome.Rd b/man/getGenome.Rd index 5692ef66..04314b0a 100644 --- a/man/getGenome.Rd +++ b/man/getGenome.Rd @@ -9,7 +9,8 @@ getGenome(genome = c("hg19", "hg38", "mm9", "mm10"), type = "genome") \arguments{ \item{genome}{The desired genome to use ("hg19", "hg38", "mm9", "mm10")} -\item{type}{The type of data - full genome or open sea regions} +\item{type}{The type of data - full genome (\code{"genome"}), transcripts +(\code{"tx"}), or open sea regions (\code{"openseas"})} } \value{ Granges of the genome diff --git a/man/hg19.tx.gr.Rd b/man/hg19.tx.gr.Rd new file mode 100644 index 00000000..5c444092 --- /dev/null +++ b/man/hg19.tx.gr.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{hg19.tx.gr} +\alias{hg19.tx.gr} +\title{hg19 genes as a GRanges object} +\usage{ +data(hg19.tx.gr, package = "compartmap") +} +\description{ +This object was generated using the TxDb.Hsapiens.UCSC.hg19.knownGene package. +The script used for this object is found in the inst/scripts directory +} +\author{ +James Eapen \email{james.eapen@vai.org} +} +\keyword{data} diff --git a/man/hg38.tx.gr.Rd b/man/hg38.tx.gr.Rd new file mode 100644 index 00000000..f7d5736b --- /dev/null +++ b/man/hg38.tx.gr.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{hg38.tx.gr} +\alias{hg38.tx.gr} +\title{hg38 genes as a GRanges object} +\usage{ +data(hg38.tx.gr, package = "compartmap") +} +\description{ +This object was generated using the TxDb.Hsapiens.UCSC.hg19.knownGene package. +The script used for this object is found in the inst/scripts directory +} +\author{ +James Eapen \email{james.eapen@vai.org} +} +\keyword{data} diff --git a/man/mm10.tx.gr.Rd b/man/mm10.tx.gr.Rd new file mode 100644 index 00000000..00320fe2 --- /dev/null +++ b/man/mm10.tx.gr.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{mm10.tx.gr} +\alias{mm10.tx.gr} +\title{mm10 genes as a GRanges object} +\usage{ +data(mm10.tx.gr, package = "compartmap") +} +\description{ +This object was generated using the TxDb.Mmusculus.UCSC.mm10.knownGene package. +The script used for this object is found in the inst/scripts directory +} +\author{ +James Eapen \email{james.eapen@vai.org} +} +\keyword{data} diff --git a/man/mm9.tx.gr.Rd b/man/mm9.tx.gr.Rd new file mode 100644 index 00000000..14f714d2 --- /dev/null +++ b/man/mm9.tx.gr.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{mm9.tx.gr} +\alias{mm9.tx.gr} +\title{mm9 genes as a GRanges object} +\usage{ +data(mm9.tx.gr, package = "compartmap") +} +\description{ +This object was generated using the TxDb.Mmusculus.UCSC.mm9.knownGene package. +The script used for this object is found in the inst/scripts directory +} +\author{ +James Eapen \email{james.eapen@vai.org} +} +\keyword{data} diff --git a/tests/testthat/test-getSVD.R b/tests/testthat/test-getSVD.R index 781f7244..8067c242 100644 --- a/tests/testthat/test-getSVD.R +++ b/tests/testthat/test-getSVD.R @@ -1,19 +1,4 @@ -test_that("getSVD", { -}) - -library("BiocSingular") -test_that(".getUV", { - mat <- matrix(runif(1000, 1, 100), nrow = 100) - svd <- runSVD(mat, k = 1, BSPARAM = IrlbaParam()) - expect_equal( - abs(compartmap:::.getUV(mat, k = 1, "left")), - abs(svd$u) - ) - expect_equal( - abs(compartmap:::.getUV(mat, k = 1, "right")), - abs(svd$v) - ) -}) +test_that("getSVD", {}) test_that(".centerMatrix", { m <- matrix(rnorm(100), ncol = 10)