diff --git a/NEWS.md b/NEWS.md index ba99f10..0b70e2a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # clover 0.0.0.9000 +* `plot_tRNA_structure()` gains `sprinzl_coords` and `trna_id` parameters. When `sprinzl_coords` is provided, position columns in `modifications`, `outlines`, `text_colors`, and `linkages` are interpreted as Sprinzl labels and converted to 1-based sequence positions automatically (#20). + * `compute_bcerror_delta()` computes per-position differences in base-calling error rates between two conditions from a summarized bcerror tibble. * `prep_mod_heatmap()` prepares bcerror delta data for `plot_mod_heatmap()` by joining Sprinzl coordinates, annotating known modifications, and shortening tRNA labels. diff --git a/R/plot-structure.R b/R/plot-structure.R index f58557a..56805fb 100644 --- a/R/plot-structure.R +++ b/R/plot-structure.R @@ -60,27 +60,30 @@ structure_trnas <- function(organism) { #' @param organism Character string specifying the organism name #' (e.g., `"Escherichia coli"`). #' @param modifications A tibble with columns `pos` (1-based -#' position in the tRNA sequence) and `mod1` (short modification -#' name, e.g., `"m1A"`). Output of [modomics_mods()] works -#' directly after filtering to the tRNA of interest. -#' @param outlines A tibble with columns `pos` (1-based position) -#' and `group` (category name for palette lookup). Draws circle -#' outlines (stroke only, no fill) around each nucleotide. -#' @param linkages A tibble with columns `pos1`, `pos2`, and -#' optionally `value` (e.g., log odds ratio) for coloring arcs. -#' If a `log_odds_ratio` column is present and `value` is not, it -#' is automatically used as `value`, so output of -#' [clean_odds_ratios()] or [filter_linkages()] works directly. +#' position or Sprinzl label when `sprinzl_coords` is provided) +#' and `mod1` (short modification name, e.g., `"m1A"`). Output +#' of [modomics_mods()] works directly after filtering to the +#' tRNA of interest. +#' @param outlines A tibble with columns `pos` (1-based position +#' or Sprinzl label) and `group` (category name for palette +#' lookup). Draws circle outlines (stroke only, no fill) around +#' each nucleotide. +#' @param linkages A tibble with columns `pos1`, `pos2` (1-based +#' positions or Sprinzl labels), and optionally `value` (e.g., +#' log odds ratio) for coloring arcs. If a `log_odds_ratio` +#' column is present and `value` is not, it is automatically +#' used as `value`, so output of [clean_odds_ratios()] or +#' [filter_linkages()] works directly. #' @param output Path for the output SVG file. If `NULL` (default), #' writes to a temporary file. #' @param mod_palette Named character vector of colors keyed by #' modification short name. If `NULL`, uses a default palette. #' @param outline_palette Named character vector of colors keyed by #' outline group name. If `NULL`, uses `"#333333"` for all. -#' @param text_colors A tibble with columns `pos` (1-based position) -#' and `color` (hex color string). Changes the nucleotide letter -#' color at specified positions. Unspecified positions keep the -#' default color. +#' @param text_colors A tibble with columns `pos` (1-based position +#' or Sprinzl label) and `color` (hex color string). Changes the +#' nucleotide letter color at specified positions. Unspecified +#' positions keep the default color. #' @param position_markers Logical; if `TRUE` (default), draw #' small grey position numbers every 10 nucleotides around the #' cloverleaf to help orient readers. @@ -89,6 +92,16 @@ structure_trnas <- function(organism) { #' linkage values. Default `c("#0072B2", "#D55E00")` (blue for #' exclusive, vermillion for co-occurring). Stroke width encodes #' the magnitude of the value. +#' @param sprinzl_coords A tibble of Sprinzl coordinates as +#' returned by [read_sprinzl_coords()], or `NULL` (default). When +#' provided, position columns in `modifications`, `outlines`, +#' `text_colors`, and `linkages` are interpreted as Sprinzl +#' labels and converted to 1-based sequence positions +#' automatically. +#' @param trna_id Character string identifying the tRNA in +#' `sprinzl_coords` (e.g., +#' `"nuc-tRNA-Glu-UUC-1-1"`). If `NULL` (default), the tRNA +#' name is resolved from `trna` automatically. #' #' @return The path to the annotated SVG file (invisibly). #' @@ -109,10 +122,56 @@ plot_tRNA_structure <- function( outline_palette = NULL, text_colors = NULL, position_markers = TRUE, - linkage_palette = c("#0072B2", "#D55E00") + linkage_palette = c("#0072B2", "#D55E00"), + sprinzl_coords = NULL, + trna_id = NULL ) { rlang::check_installed("jsonlite", reason = "to read structure metadata.") + if (!is.null(sprinzl_coords)) { + if (is.null(trna_id)) { + trna_id <- find_sprinzl_id(trna, sprinzl_coords) + if (is.null(trna_id)) { + # Fallback: try matching without "nuc-" prefix + trna_id <- find_sprinzl_id_bare(trna, sprinzl_coords) + } + if (is.null(trna_id)) { + cli::cli_abort( + "Could not find {.val {trna}} in {.arg sprinzl_coords}." + ) + } + } + trna_coords <- sprinzl_coords[sprinzl_coords$trna_id == trna_id, ] + if (!is.null(modifications)) { + modifications <- convert_sprinzl_positions( + modifications, + "pos", + trna_coords + ) + } + if (!is.null(outlines)) { + outlines <- convert_sprinzl_positions( + outlines, + "pos", + trna_coords + ) + } + if (!is.null(text_colors)) { + text_colors <- convert_sprinzl_positions( + text_colors, + "pos", + trna_coords + ) + } + if (!is.null(linkages)) { + linkages <- convert_sprinzl_positions( + linkages, + c("pos1", "pos2"), + trna_coords + ) + } + } + org_dir <- structure_org_dir(organism) svg_path <- file.path(org_dir, paste0(trna, ".svg")) @@ -296,6 +355,39 @@ structure_html <- function(svg_path) { # Internal helpers ------------------------------------------------------------- +find_sprinzl_id_bare <- function(trna, sprinzl_coords) { + parts <- strsplit(trna, "-")[[1]] + if (length(parts) >= 3) { + parts[3] <- gsub("T", "U", parts[3]) + } + rna_name <- paste(parts, collapse = "-") + pattern <- paste0("^", rna_name, "-") + + ids <- unique(sprinzl_coords$trna_id) + matches <- grep(pattern, ids, value = TRUE) + if (length(matches) == 0) { + return(NULL) + } + sort(matches)[1] +} + +convert_sprinzl_positions <- function(df, pos_cols, trna_coords) { + lookup <- trna_coords[, c("sprinzl_label", "pos")] + for (col in pos_cols) { + original <- as.character(df[[col]]) + matched <- lookup$pos[match(original, lookup$sprinzl_label)] + unmatched <- original[is.na(matched) & !is.na(original)] + if (length(unmatched) > 0) { + n <- length(unmatched) + cli::cli_warn( + "Sprinzl position{cli::qty(length(unique(unmatched)))} {?s} {.val {unique(unmatched)}} not found; dropping {n} row{cli::qty(n)}{?s}." + ) + } + df[[col]] <- matched + } + df[stats::complete.cases(df[pos_cols]), , drop = FALSE] +} + # R2R SVGs use font-size 7.1 Helvetica. The text x/y attributes give the # left baseline of the character. These offsets shift to the visual center # of the uppercase letter (approximately half character-width right, half diff --git a/man/plot_tRNA_structure.Rd b/man/plot_tRNA_structure.Rd index 36ed9b6..bafa891 100644 --- a/man/plot_tRNA_structure.Rd +++ b/man/plot_tRNA_structure.Rd @@ -15,7 +15,9 @@ plot_tRNA_structure( outline_palette = NULL, text_colors = NULL, position_markers = TRUE, - linkage_palette = c("#0072B2", "#D55E00") + linkage_palette = c("#0072B2", "#D55E00"), + sprinzl_coords = NULL, + trna_id = NULL ) } \arguments{ @@ -27,19 +29,22 @@ available tRNAs.} (e.g., \code{"Escherichia coli"}).} \item{modifications}{A tibble with columns \code{pos} (1-based -position in the tRNA sequence) and \code{mod1} (short modification -name, e.g., \code{"m1A"}). Output of \code{\link[=modomics_mods]{modomics_mods()}} works -directly after filtering to the tRNA of interest.} +position or Sprinzl label when \code{sprinzl_coords} is provided) +and \code{mod1} (short modification name, e.g., \code{"m1A"}). Output +of \code{\link[=modomics_mods]{modomics_mods()}} works directly after filtering to the +tRNA of interest.} -\item{outlines}{A tibble with columns \code{pos} (1-based position) -and \code{group} (category name for palette lookup). Draws circle -outlines (stroke only, no fill) around each nucleotide.} +\item{outlines}{A tibble with columns \code{pos} (1-based position +or Sprinzl label) and \code{group} (category name for palette +lookup). Draws circle outlines (stroke only, no fill) around +each nucleotide.} -\item{linkages}{A tibble with columns \code{pos1}, \code{pos2}, and -optionally \code{value} (e.g., log odds ratio) for coloring arcs. -If a \code{log_odds_ratio} column is present and \code{value} is not, it -is automatically used as \code{value}, so output of -\code{\link[=clean_odds_ratios]{clean_odds_ratios()}} or \code{\link[=filter_linkages]{filter_linkages()}} works directly.} +\item{linkages}{A tibble with columns \code{pos1}, \code{pos2} (1-based +positions or Sprinzl labels), and optionally \code{value} (e.g., +log odds ratio) for coloring arcs. If a \code{log_odds_ratio} +column is present and \code{value} is not, it is automatically +used as \code{value}, so output of \code{\link[=clean_odds_ratios]{clean_odds_ratios()}} or +\code{\link[=filter_linkages]{filter_linkages()}} works directly.} \item{output}{Path for the output SVG file. If \code{NULL} (default), writes to a temporary file.} @@ -50,10 +55,10 @@ modification short name. If \code{NULL}, uses a default palette.} \item{outline_palette}{Named character vector of colors keyed by outline group name. If \code{NULL}, uses \code{"#333333"} for all.} -\item{text_colors}{A tibble with columns \code{pos} (1-based position) -and \code{color} (hex color string). Changes the nucleotide letter -color at specified positions. Unspecified positions keep the -default color.} +\item{text_colors}{A tibble with columns \code{pos} (1-based position +or Sprinzl label) and \code{color} (hex color string). Changes the +nucleotide letter color at specified positions. Unspecified +positions keep the default color.} \item{position_markers}{Logical; if \code{TRUE} (default), draw small grey position numbers every 10 nucleotides around the @@ -64,6 +69,18 @@ colors for negative (exclusive) and positive (co-occurring) linkage values. Default \code{c("#0072B2", "#D55E00")} (blue for exclusive, vermillion for co-occurring). Stroke width encodes the magnitude of the value.} + +\item{sprinzl_coords}{A tibble of Sprinzl coordinates as +returned by \code{\link[=read_sprinzl_coords]{read_sprinzl_coords()}}, or \code{NULL} (default). When +provided, position columns in \code{modifications}, \code{outlines}, +\code{text_colors}, and \code{linkages} are interpreted as Sprinzl +labels and converted to 1-based sequence positions +automatically.} + +\item{trna_id}{Character string identifying the tRNA in +\code{sprinzl_coords} (e.g., +\code{"nuc-tRNA-Glu-UUC-1-1"}). If \code{NULL} (default), the tRNA +name is resolved from \code{trna} automatically.} } \value{ The path to the annotated SVG file (invisibly). diff --git a/tests/testthat/_snaps/plot-structure.md b/tests/testthat/_snaps/plot-structure.md index 11a38c1..118d437 100644 --- a/tests/testthat/_snaps/plot-structure.md +++ b/tests/testthat/_snaps/plot-structure.md @@ -41,3 +41,19 @@ Error in `structure_html()`: ! SVG file not found: 'nonexistent.svg'. +# convert_sprinzl_positions warns on unmatched and drops rows + + Code + result <- convert_sprinzl_positions(df, "pos", trna_coords) + Condition + Warning: + Sprinzl position "99" not found; dropping 1 row. + +# plot_tRNA_structure errors when tRNA not in sprinzl_coords + + Code + plot_tRNA_structure(trna, org, sprinzl_coords = fake_coords) + Condition + Error in `plot_tRNA_structure()`: + ! Could not find "tRNA-Ala-GGC" in `sprinzl_coords`. + diff --git a/tests/testthat/test-plot-structure.R b/tests/testthat/test-plot-structure.R index 82e3a1c..0d565f2 100644 --- a/tests/testthat/test-plot-structure.R +++ b/tests/testthat/test-plot-structure.R @@ -284,6 +284,103 @@ test_that("structure_html errors on missing file", { ) }) +# Sprinzl coordinate conversion ------------------------------------------------ + +test_that("convert_sprinzl_positions maps labels to positions", { + trna_coords <- dplyr::tibble( + sprinzl_label = c("1", "2", "34", "35", "36"), + pos = c(1L, 2L, 30L, 31L, 32L) + ) + df <- dplyr::tibble(pos = c("34", "35", "36"), mod1 = c("m1A", "m5C", "D")) + result <- convert_sprinzl_positions(df, "pos", trna_coords) + expect_equal(result$pos, c(30L, 31L, 32L)) + expect_equal(result$mod1, c("m1A", "m5C", "D")) +}) + +test_that("convert_sprinzl_positions coerces numeric input", { + trna_coords <- dplyr::tibble( + sprinzl_label = c("34", "35"), + pos = c(30L, 31L) + ) + df <- dplyr::tibble(pos = c(34, 35), mod1 = c("m1A", "m5C")) + result <- convert_sprinzl_positions(df, "pos", trna_coords) + expect_equal(result$pos, c(30L, 31L)) +}) + +test_that("convert_sprinzl_positions warns on unmatched and drops rows", { + trna_coords <- dplyr::tibble( + sprinzl_label = c("1", "2"), + pos = c(1L, 2L) + ) + df <- dplyr::tibble(pos = c("1", "99"), mod1 = c("m1A", "m5C")) + expect_snapshot( + result <- convert_sprinzl_positions(df, "pos", trna_coords) + ) + expect_equal(nrow(result), 1) + expect_equal(result$pos, 1L) +}) + +test_that("convert_sprinzl_positions converts two columns for linkages", { + trna_coords <- dplyr::tibble( + sprinzl_label = c("34", "35", "36"), + pos = c(30L, 31L, 32L) + ) + df <- dplyr::tibble( + pos1 = c("34", "35"), + pos2 = c("36", "34"), + value = c(1.5, -0.5) + ) + result <- convert_sprinzl_positions(df, c("pos1", "pos2"), trna_coords) + expect_equal(result$pos1, c(30L, 31L)) + expect_equal(result$pos2, c(32L, 30L)) +}) + +test_that("plot_tRNA_structure errors when tRNA not in sprinzl_coords", { + skip_if( + length(structure_organisms()) == 0, + "No bundled structure SVGs" + ) + + org <- structure_organisms()[1] + trna <- structure_trnas(org)[1] + fake_coords <- dplyr::tibble( + trna_id = "nuc-tRNA-Fake-AAA-1-1", + pos = 1L, + sprinzl_label = "1" + ) + expect_snapshot( + plot_tRNA_structure(trna, org, sprinzl_coords = fake_coords), + error = TRUE + ) +}) + +test_that("plot_tRNA_structure converts sprinzl coords with real data", { + skip_if( + length(structure_organisms()) == 0, + "No bundled structure SVGs" + ) + coords_path <- system.file( + "extdata", + "sprinzl", + "ecoliK12_global_coords.tsv.gz", + package = "clover" + ) + skip_if(coords_path == "", "No bundled sprinzl coords") + + coords <- read_sprinzl_coords(coords_path) + org <- "Escherichia coli" + trna <- "tRNA-Glu-TTC" + + mods <- dplyr::tibble(pos = c("34", "35"), mod1 = c("m1A", "m5C")) + svg <- plot_tRNA_structure( + trna, + org, + modifications = mods, + sprinzl_coords = coords + ) + expect_true(file.exists(svg)) +}) + test_that("plot_tRNA_structure respects position_markers = FALSE", { skip_if( length(structure_organisms()) == 0, diff --git a/vignettes/articles/rewiring.Rmd b/vignettes/articles/rewiring.Rmd index 606b480..e0a1494 100644 --- a/vignettes/articles/rewiring.Rmd +++ b/vignettes/articles/rewiring.Rmd @@ -121,21 +121,24 @@ pcoa <- perform_pcoa(mat) plot_pcoa_rewiring(pcoa, scores) ``` -## Network visualization +## Structure visualization -Build a chord diagram from the ROR data to visualize modification rewiring -between positions. +Plot significant rewiring linkages on the tRNA cloverleaf structure for a +single isodecoder. ```{r} -#| label: fig-chord-ror -#| fig.cap: "Chord diagram of modification rewiring between control and infected conditions." -#| fig.width: 6 -#| fig.height: 6 -ror_agg <- ror |> - dplyr::group_by(pos1, pos2) |> - dplyr::summarise(log_ror = mean(ror), .groups = "drop") - -plot_chord_ror(ror_agg, title = "Modification rewiring: inf vs ctl") +#| label: fig-structure-ror +#| fig.cap: "tRNA structure showing significant modification rewiring linkages for tRNA-Glu-TTC." +ror_glu <- ror |> + filter(isodecoder == "tRNA-Glu-TTC", significant) |> + rename(value = ror) + +svg <- plot_tRNA_structure( + "tRNA-Glu-TTC", + "Escherichia coli", + linkages = ror_glu +) +structure_html(svg) ``` ## Session info