diff --git a/R/getSubnetworkFromIndra.R b/R/getSubnetworkFromIndra.R index 09f1f30..8eea0a9 100644 --- a/R/getSubnetworkFromIndra.R +++ b/R/getSubnetworkFromIndra.R @@ -35,6 +35,12 @@ #' @param filter_by_ptm_site logical, whether to filter edges based on whether the #' site information from INDRA matches with the PTM site in the input. Default is FALSE. #' Only applicable for differential PTM abundance results. +#' @param include_infinite_fc logical, whether to include proteins with +#' infinite log fold change (i.e. proteins that are only detected in one condition). +#' Default is FALSE. +#' @param direction Character string specifying the direction of regulation to +#' include. One of \code{"both"} (default), \code{"up"} (upregulated only), +#' or \code{"down"} (downregulated only). #' #' @return list of 2 data.frames, nodes and edges #' @@ -60,8 +66,11 @@ getSubnetworkFromIndra <- function(input, logfc_cutoff = NULL, force_include_other = NULL, filter_by_curation = FALSE, - filter_by_ptm_site = FALSE) { - input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_other) + filter_by_ptm_site = FALSE, + include_infinite_fc = FALSE, + direction = c("both", "up", "down")) { + direction = match.arg(direction) + input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_other, include_infinite_fc, direction) .validateGetSubnetworkFromIndraInput(input, protein_level_data, sources_filter, force_include_other) res <- .callIndraCogexApi(input$HgncId, force_include_other) res <- .filterIndraResponse(res, statement_types, evidence_count_cutoff, sources_filter) diff --git a/R/utils_cytoscapeNetwork.R b/R/utils_cytoscapeNetwork.R index 3cbdf57..a7e2726 100644 --- a/R/utils_cytoscapeNetwork.R +++ b/R/utils_cytoscapeNetwork.R @@ -10,9 +10,17 @@ return(rep("#D3D3D3", length(logFC_values))) } + is_pos_inf <- is.infinite(logFC_values) & logFC_values > 0 + is_neg_inf <- is.infinite(logFC_values) & logFC_values < 0 + + finite_values <- logFC_values[is.finite(logFC_values)] default_max <- 2 - max_logFC <- max(c(abs(logFC_values), default_max), na.rm = TRUE) + max_logFC <- max(c(abs(finite_values), default_max), na.rm = TRUE) min_logFC <- -max_logFC + + logFC_values[is_pos_inf] <- max_logFC + logFC_values[is_neg_inf] <- min_logFC + color_map <- grDevices::colorRamp(colors) normalized <- (logFC_values - min_logFC) / (max_logFC - min_logFC) normalized[is.na(normalized)] <- 0.5 diff --git a/R/utils_getSubnetworkFromIndra.R b/R/utils_getSubnetworkFromIndra.R index d540635..228ff02 100644 --- a/R/utils_getSubnetworkFromIndra.R +++ b/R/utils_getSubnetworkFromIndra.R @@ -132,10 +132,21 @@ #' @param pvalueCutoff p-value cutoff #' @param logfc_cutoff logFC cutoff #' @param force_include_other list of identifiers to exempt from filtering +#' @param include_infinite_fc logical, whether to include proteins with +#' infinite log fold change (i.e. proteins that are only detected in one condition). +#' Default is FALSE. +#' @param direction Character string specifying the direction of regulation to +#' include. One of \code{"both"} (default), \code{"up"} (upregulated only), +#' or \code{"down"} (downregulated only). #' @return filtered groupComparison result #' @keywords internal #' @noRd -.filterGetSubnetworkFromIndraInput <- function(input, pvalueCutoff, logfc_cutoff, force_include_other) { +.filterGetSubnetworkFromIndraInput <- function(input, + pvalueCutoff, + logfc_cutoff, + force_include_other, + include_infinite_fc, + direction) { input$Protein <- as.character(input$Protein) # Extract exempt proteins before any filtering @@ -152,7 +163,11 @@ } } - # Apply standard filtering + infinite_fc_proteins <- NULL + if (include_infinite_fc) { + infinite_fc_proteins <- input[is.infinite(input$log2FC), ] + } + input <- input[!is.na(input$adj.pvalue),] if (!is.null(pvalueCutoff)) { input <- input[input$adj.pvalue < pvalueCutoff, ] @@ -163,8 +178,16 @@ } input <- input[!is.na(input$log2FC) & abs(input$log2FC) > logfc_cutoff, ] } - if ("issue" %in% colnames(input)) { - input <- input[is.na(input$issue), ] + + if (!is.null(infinite_fc_proteins) && nrow(infinite_fc_proteins) > 0) { + combined_input <- rbind(infinite_fc_proteins, input) + input <- combined_input[!duplicated(combined_input$Protein), ] + } + + if (direction == "up") { + input <- input[!is.na(input$log2FC) & input$log2FC > 0, ] + } else if (direction == "down") { + input <- input[!is.na(input$log2FC) & input$log2FC < 0, ] } # Combine filtered data with exempt proteins and remove duplicates @@ -186,6 +209,7 @@ } return(input) } + #' Add additional metadata to an edge #' @param edge object representation of an INDRA statement #' @param input filtered groupComparison result diff --git a/man/getSubnetworkFromIndra.Rd b/man/getSubnetworkFromIndra.Rd index a670d6e..4f9ff50 100644 --- a/man/getSubnetworkFromIndra.Rd +++ b/man/getSubnetworkFromIndra.Rd @@ -16,7 +16,9 @@ getSubnetworkFromIndra( logfc_cutoff = NULL, force_include_other = NULL, filter_by_curation = FALSE, - filter_by_ptm_site = FALSE + filter_by_ptm_site = FALSE, + include_infinite_fc = FALSE, + direction = c("both", "up", "down") ) } \arguments{ @@ -63,6 +65,14 @@ have been curated as incorrect in INDRA. Default is FALSE.} \item{filter_by_ptm_site}{logical, whether to filter edges based on whether the site information from INDRA matches with the PTM site in the input. Default is FALSE. Only applicable for differential PTM abundance results.} + +\item{include_infinite_fc}{logical, whether to include proteins with +infinite log fold change (i.e. proteins that are only detected in one condition). +Default is FALSE.} + +\item{direction}{Character string specifying the direction of regulation to +include. One of \code{"both"} (default), \code{"up"} (upregulated only), +or \code{"down"} (downregulated only).} } \value{ list of 2 data.frames, nodes and edges diff --git a/tests/testthat/test-utils_cytoscapeNetwork.R b/tests/testthat/test-utils_cytoscapeNetwork.R index 407f657..b36eecb 100644 --- a/tests/testthat/test-utils_cytoscapeNetwork.R +++ b/tests/testthat/test-utils_cytoscapeNetwork.R @@ -69,6 +69,12 @@ test_that(".mapLogFCToColor handles empty input", { expect_length(colors, 0) }) +test_that(".mapLogFCToColor handles Inf and -Inf values", { + colors <- MSstatsBioNet:::.mapLogFCToColor(c(-Inf, 0, Inf)) + expect_length(colors, 3) + expect_true(all(grepl("^#[0-9A-Fa-f]{6}$", colors))) +}) + # ============================================================================= # .relProps # ============================================================================= diff --git a/tests/testthat/test-utils_getSubnetworkFromIndra.R b/tests/testthat/test-utils_getSubnetworkFromIndra.R index cc350c8..afd76db 100644 --- a/tests/testthat/test-utils_getSubnetworkFromIndra.R +++ b/tests/testthat/test-utils_getSubnetworkFromIndra.R @@ -1,153 +1,215 @@ -make_nodes <- function() { - data.frame( - id = c("P53_HUMAN", "MDM2_HUMAN", "ATM_HUMAN"), - logFC = c(1.5, -1.0, 0.5), - Site = c("S15_S20", "T68", NA), - stringsAsFactors = FALSE - ) -} - -make_edges <- function() { - data.frame( - source = c("ATM_HUMAN", "ATM_HUMAN", "P53_HUMAN", "P53_HUMAN"), - target = c("P53_HUMAN", "MDM2_HUMAN", "MDM2_HUMAN", "ATM_HUMAN"), - interaction = c("Phosphorylation", "Phosphorylation", "Activation", "Activation"), - site = c("S15", "T999", NA, "S15"), - stringsAsFactors = FALSE - ) -} - -test_that(".filterByPtmSite returns input unchanged when filter_by_ptm_site = FALSE", { - nodes <- make_nodes() - edges <- make_edges() - result <- MSstatsBioNet:::.filterByPtmSite(nodes, edges, filter_by_ptm_site = FALSE) - - expect_equal(nrow(result$nodes), nrow(nodes)) - expect_equal(nrow(result$edges), nrow(edges)) - expect_equal(result$nodes, nodes) - expect_equal(result$edges, edges) -}) - -test_that(".filterByPtmSite returns input unchanged when no nodes have Site data", { - nodes <- make_nodes() - nodes$Site <- NA # wipe all sites - edges <- make_edges() +describe(".filterByPtmSite", { + make_nodes <- function() { + data.frame( + id = c("P53_HUMAN", "MDM2_HUMAN", "ATM_HUMAN"), + logFC = c(1.5, -1.0, 0.5), + Site = c("S15_S20", "T68", NA), + stringsAsFactors = FALSE + ) + } - result <- MSstatsBioNet:::.filterByPtmSite(nodes, edges, filter_by_ptm_site = TRUE) + make_edges <- function() { + data.frame( + source = c("ATM_HUMAN", "ATM_HUMAN", "P53_HUMAN", "P53_HUMAN"), + target = c("P53_HUMAN", "MDM2_HUMAN", "MDM2_HUMAN", "ATM_HUMAN"), + interaction = c("Phosphorylation", "Phosphorylation", "Activation", "Activation"), + site = c("S15", "T999", NA, "S15"), + stringsAsFactors = FALSE + ) + } - expect_equal(nrow(result$nodes), nrow(nodes)) - expect_equal(nrow(result$edges), nrow(edges)) -}) - -test_that(".filterByPtmSite keeps only edges with PTM site overlap on target node", { - result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), - filter_by_ptm_site = TRUE) - # Only ATM→P53 with site=S15 should survive - expect_equal(nrow(result$edges), 1) - expect_equal(result$edges$source, "ATM_HUMAN") - expect_equal(result$edges$target, "P53_HUMAN") - expect_equal(result$edges$site, "S15") -}) - -test_that(".filterByPtmSite drops edges where edge site does not overlap node Site", { - result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), - filter_by_ptm_site = TRUE) - # ATM→MDM2 with site=T999 should be gone (T999 not in MDM2's T68) - dropped <- result$edges[result$edges$source == "ATM_HUMAN" & - result$edges$target == "MDM2_HUMAN", ] - expect_equal(nrow(dropped), 0) -}) - -test_that(".filterByPtmSite drops edges with NA edge site even if node has Site data", { - result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), - filter_by_ptm_site = TRUE) - # P53→MDM2 has site=NA so must be dropped - dropped <- result$edges[result$edges$source == "P53_HUMAN" & - result$edges$target == "MDM2_HUMAN", ] - expect_equal(nrow(dropped), 0) -}) - -test_that(".filterByPtmSite drops edges where target node has no Site data", { - result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), - filter_by_ptm_site = TRUE) - # P53→ATM: ATM node Site is NA so no overlap possible - dropped <- result$edges[result$edges$source == "P53_HUMAN" & - result$edges$target == "ATM_HUMAN", ] - expect_equal(nrow(dropped), 0) -}) - -test_that(".filterByPtmSite prunes nodes to only those in surviving edges", { - result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), - filter_by_ptm_site = TRUE) - # Only ATM_HUMAN (source) and P53_HUMAN (target) should remain - expect_setequal(result$nodes$id, c("ATM_HUMAN", "P53_HUMAN")) - expect_false("MDM2_HUMAN" %in% result$nodes$id) -}) - -test_that(".filterByPtmSite preserves all node columns after pruning", { - result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), - filter_by_ptm_site = TRUE) - expect_true(all(c("id", "logFC", "Site") %in% names(result$nodes))) -}) - -test_that(".filterByPtmSite keeps edge when site matches any of multiple node sites", { - nodes <- data.frame( - id = c("A", "B"), - Site = c("S15_S20_T68", NA), - stringsAsFactors = FALSE - ) - edges <- data.frame( - source = "A", - target = "B", - interaction = "Phosphorylation", - site = "S20", # matches second site in A's Site string - stringsAsFactors = FALSE - ) - # Note: filter checks target node — B has no Site, so this should drop. - # Swap so A is the target to test multi-site matching. - edges2 <- data.frame( - source = "B", - target = "A", - interaction = "Phosphorylation", - site = "S20", - stringsAsFactors = FALSE - ) - result <- MSstatsBioNet:::.filterByPtmSite(nodes, edges2, filter_by_ptm_site = TRUE) - expect_equal(nrow(result$edges), 1) - expect_equal(result$edges$site, "S20") -}) - -test_that(".filterByPtmSite handles empty edges gracefully", { - nodes <- make_nodes() - empty_edges <- data.frame( - source = character(0), target = character(0), - interaction = character(0), site = character(0), - stringsAsFactors = FALSE - ) - result <- MSstatsBioNet:::.filterByPtmSite(nodes, empty_edges, - filter_by_ptm_site = TRUE) - expect_equal(nrow(result$edges), 0) - # All nodes pruned since no edges reference them - expect_equal(nrow(result$nodes), 0) + test_that(".filterByPtmSite returns input unchanged when filter_by_ptm_site = FALSE", { + nodes <- make_nodes() + edges <- make_edges() + result <- MSstatsBioNet:::.filterByPtmSite(nodes, edges, filter_by_ptm_site = FALSE) + + expect_equal(nrow(result$nodes), nrow(nodes)) + expect_equal(nrow(result$edges), nrow(edges)) + expect_equal(result$nodes, nodes) + expect_equal(result$edges, edges) + }) + + test_that(".filterByPtmSite returns input unchanged when no nodes have Site data", { + nodes <- make_nodes() + nodes$Site <- NA # wipe all sites + edges <- make_edges() + + result <- MSstatsBioNet:::.filterByPtmSite(nodes, edges, filter_by_ptm_site = TRUE) + + expect_equal(nrow(result$nodes), nrow(nodes)) + expect_equal(nrow(result$edges), nrow(edges)) + }) + + test_that(".filterByPtmSite keeps only edges with PTM site overlap on target node", { + result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), + filter_by_ptm_site = TRUE) + # Only ATM→P53 with site=S15 should survive + expect_equal(nrow(result$edges), 1) + expect_equal(result$edges$source, "ATM_HUMAN") + expect_equal(result$edges$target, "P53_HUMAN") + expect_equal(result$edges$site, "S15") + }) + + test_that(".filterByPtmSite drops edges where edge site does not overlap node Site", { + result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), + filter_by_ptm_site = TRUE) + # ATM→MDM2 with site=T999 should be gone (T999 not in MDM2's T68) + dropped <- result$edges[result$edges$source == "ATM_HUMAN" & + result$edges$target == "MDM2_HUMAN", ] + expect_equal(nrow(dropped), 0) + }) + + test_that(".filterByPtmSite drops edges with NA edge site even if node has Site data", { + result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), + filter_by_ptm_site = TRUE) + # P53→MDM2 has site=NA so must be dropped + dropped <- result$edges[result$edges$source == "P53_HUMAN" & + result$edges$target == "MDM2_HUMAN", ] + expect_equal(nrow(dropped), 0) + }) + + test_that(".filterByPtmSite drops edges where target node has no Site data", { + result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), + filter_by_ptm_site = TRUE) + # P53→ATM: ATM node Site is NA so no overlap possible + dropped <- result$edges[result$edges$source == "P53_HUMAN" & + result$edges$target == "ATM_HUMAN", ] + expect_equal(nrow(dropped), 0) + }) + + test_that(".filterByPtmSite prunes nodes to only those in surviving edges", { + result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), + filter_by_ptm_site = TRUE) + # Only ATM_HUMAN (source) and P53_HUMAN (target) should remain + expect_setequal(result$nodes$id, c("ATM_HUMAN", "P53_HUMAN")) + expect_false("MDM2_HUMAN" %in% result$nodes$id) + }) + + test_that(".filterByPtmSite preserves all node columns after pruning", { + result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), + filter_by_ptm_site = TRUE) + expect_true(all(c("id", "logFC", "Site") %in% names(result$nodes))) + }) + + test_that(".filterByPtmSite keeps edge when site matches any of multiple node sites", { + nodes <- data.frame( + id = c("A", "B"), + Site = c("S15_S20_T68", NA), + stringsAsFactors = FALSE + ) + edges <- data.frame( + source = "A", + target = "B", + interaction = "Phosphorylation", + site = "S20", # matches second site in A's Site string + stringsAsFactors = FALSE + ) + # Note: filter checks target node — B has no Site, so this should drop. + # Swap so A is the target to test multi-site matching. + edges2 <- data.frame( + source = "B", + target = "A", + interaction = "Phosphorylation", + site = "S20", + stringsAsFactors = FALSE + ) + result <- MSstatsBioNet:::.filterByPtmSite(nodes, edges2, filter_by_ptm_site = TRUE) + expect_equal(nrow(result$edges), 1) + expect_equal(result$edges$site, "S20") + }) + + test_that(".filterByPtmSite handles empty edges gracefully", { + nodes <- make_nodes() + empty_edges <- data.frame( + source = character(0), target = character(0), + interaction = character(0), site = character(0), + stringsAsFactors = FALSE + ) + result <- MSstatsBioNet:::.filterByPtmSite(nodes, empty_edges, + filter_by_ptm_site = TRUE) + expect_equal(nrow(result$edges), 0) + # All nodes pruned since no edges reference them + expect_equal(nrow(result$nodes), 0) + }) + + test_that(".filterByPtmSite handles empty nodes gracefully", { + empty_nodes <- data.frame( + id = character(0), Site = character(0), + stringsAsFactors = FALSE + ) + edges <- make_edges() + # No nodes have Site data so passthrough expected + result <- MSstatsBioNet:::.filterByPtmSite(empty_nodes, edges, + filter_by_ptm_site = TRUE) + expect_equal(nrow(result$edges), nrow(edges)) + }) + + test_that(".filterByPtmSite always returns a list with nodes and edges", { + result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), + filter_by_ptm_site = TRUE) + expect_type(result, "list") + expect_true(all(c("nodes", "edges") %in% names(result))) + expect_s3_class(result$nodes, "data.frame") + expect_s3_class(result$edges, "data.frame") + }) }) -test_that(".filterByPtmSite handles empty nodes gracefully", { - empty_nodes <- data.frame( - id = character(0), Site = character(0), - stringsAsFactors = FALSE - ) - edges <- make_edges() - # No nodes have Site data so passthrough expected - result <- MSstatsBioNet:::.filterByPtmSite(empty_nodes, edges, - filter_by_ptm_site = TRUE) - expect_equal(nrow(result$edges), nrow(edges)) +describe(".filterGetSubnetworkFromIndraInput", { + .make_test_input <- function() { + data.frame( + Protein = c("A", "B", "C", "D"), + log2FC = c(3, -3, 0.5, Inf), + adj.pvalue = c(0.01, 0.01, 0.5, 0.01), + stringsAsFactors = FALSE + ) + } + + test_that(".filterGetSubnetworkFromIndraInput filters by pvalueCutoff", { + result <- MSstatsBioNet:::.filterGetSubnetworkFromIndraInput( + .make_test_input(), pvalueCutoff = 0.05, logfc_cutoff = NULL, + force_include_other = NULL, include_infinite_fc = FALSE, direction = "both" + ) + expect_true(all(result$adj.pvalue < 0.05)) + }) + + test_that(".filterGetSubnetworkFromIndraInput filters by logfc_cutoff", { + result <- MSstatsBioNet:::.filterGetSubnetworkFromIndraInput( + .make_test_input(), pvalueCutoff = NULL, logfc_cutoff = 1, + force_include_other = NULL, include_infinite_fc = FALSE, direction = "both" + ) + expect_true(all(abs(result$log2FC) > 1)) + }) + + test_that(".filterGetSubnetworkFromIndraInput respects force_include_other", { + input <- cbind(.make_test_input(), HgncId = c("1", "2", "3", "4")) + result <- MSstatsBioNet:::.filterGetSubnetworkFromIndraInput( + input, pvalueCutoff = 0.001, logfc_cutoff = 10, + force_include_other = c("HGNC:1"), include_infinite_fc = FALSE, direction = "both" + ) + expect_true("A" %in% result$Protein) + }) + + test_that(".filterGetSubnetworkFromIndraInput includes infinite FC when requested", { + result <- MSstatsBioNet:::.filterGetSubnetworkFromIndraInput( + .make_test_input(), pvalueCutoff = NULL, logfc_cutoff = 5, + force_include_other = NULL, include_infinite_fc = TRUE, direction = "both" + ) + expect_true("D" %in% result$Protein) + }) + + test_that(".filterGetSubnetworkFromIndraInput filters by direction up", { + result <- MSstatsBioNet:::.filterGetSubnetworkFromIndraInput( + .make_test_input(), pvalueCutoff = NULL, logfc_cutoff = NULL, + force_include_other = NULL, include_infinite_fc = FALSE, direction = "up" + ) + expect_true(all(result$log2FC > 0)) + }) + + test_that(".filterGetSubnetworkFromIndraInput filters by direction down", { + result <- MSstatsBioNet:::.filterGetSubnetworkFromIndraInput( + .make_test_input(), pvalueCutoff = NULL, logfc_cutoff = NULL, + force_include_other = NULL, include_infinite_fc = FALSE, direction = "down" + ) + expect_true(all(result$log2FC < 0)) + }) }) - -test_that(".filterByPtmSite always returns a list with nodes and edges", { - result <- MSstatsBioNet:::.filterByPtmSite(make_nodes(), make_edges(), - filter_by_ptm_site = TRUE) - expect_type(result, "list") - expect_true(all(c("nodes", "edges") %in% names(result))) - expect_s3_class(result$nodes, "data.frame") - expect_s3_class(result$edges, "data.frame") -}) \ No newline at end of file