Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions DIMS/EvaluateTics.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,35 @@ remove_neg <- remove_tech_reps$neg
repl_pattern_filtered <- remove_from_repl_pattern(remove_neg, repl_pattern, nr_replicates)
save(repl_pattern_filtered, file = "negative_repl_pattern.RData")

# write output for QC info on missed infusions
if (is.null(remove_neg)) {
remove_neg <- "none"
}
write.table(
remove_neg,
file = "miss_infusions_negative.txt",
row.names = FALSE,
col.names = FALSE,
sep = "\t"
)

# positive scan mode
remove_pos <- remove_tech_reps$pos
repl_pattern_filtered <- remove_from_repl_pattern(remove_pos, repl_pattern, nr_replicates)
save(repl_pattern_filtered, file = "positive_repl_pattern.RData")

# write output for QC info on missed infusions
if (is.null(remove_pos)) {
remove_pos <- "none"
}
write.table(
remove_pos,
file = "miss_infusions_positive.txt",
row.names = FALSE,
col.names = FALSE,
sep = "\t"
)

# get an overview of suitable technical replicates for both scan modes
allsamples_techreps_neg <- get_overview_tech_reps(repl_pattern_filtered, "negative")
allsamples_techreps_pos <- get_overview_tech_reps(repl_pattern_filtered, "positive")
Expand All @@ -57,7 +81,6 @@ write.table(allsamples_techreps_both_scanmodes,
sep = ","
)


## generate TIC plots
# get all txt files
tic_files <- list.files("./", full.names = TRUE, pattern = "*TIC.txt")
Expand Down Expand Up @@ -135,4 +158,3 @@ tic_plot_pdf <- marrangeGrob(
ggsave(filename = paste0(run_name, "_TICplots.pdf"),
tic_plot_pdf, width = 21, height = 29.7, units = "cm")


77 changes: 62 additions & 15 deletions DIMS/GenerateQCOutput.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,16 @@ if (z_score == 1) {
is_list <- outlist[grep("Internal standard", outlist[, "relevance"], fixed = TRUE), ]
is_codes <- rownames(is_list)

# check if there is data present for all the samples that the pipeline started with,
# if not write sample name to a log file.
# check if there is data present for all the samples that the pipeline started with
sample_names_nodata <- setdiff(names(repl_pattern), names(is_list))
if (length(sample_names_nodata) == 0) {
sample_names_nodata <- "none"
}
write.table(sample_names_nodata,
file = paste(outdir, "sample_names_nodata.txt", sep = "/"),
row.names = FALSE, col.names = FALSE, quote = FALSE
)
if (!is.null(sample_names_nodata)) {
write.table(sample_names_nodata,
file = paste(outdir, "sample_names_nodata.txt", sep = "/"),
row.names = FALSE, col.names = FALSE, quote = FALSE
)
for (sample_name in sample_names_nodata) {
repl_pattern[[sample_name]] <- NULL
}
Expand Down Expand Up @@ -137,39 +139,47 @@ is_sum_selection <- c(
"13C6-Tyrosine (IS)"
)

# define threshold for acceptance of selected internal standards
threshold_is_dbs_neg <- c(15000, 200000, 130000, 18000, 50000)
threshold_is_dbs_pos <- c(150000, 3300000, 1750000, 150000, 270000)
threshold_is_dbs_sum <- c(1300000, 2500000, 500000, 1800000, 1400000)
threshold_is_pl_neg <- c(70000, 700000, 700000, 65000, 350000)
threshold_is_pl_pos <- c(1500000, 9000000, 3000000, 400000, 700000)
threshold_is_pl_sum <- c(8000000, 12500000, 2500000, 3000000, 4000000)

# add minimal intensity lines based on matrix (DBS or Plasma) and machine mode (neg, pos, sum)
if (dims_matrix == "DBS") {
add_min_intens_lines <- TRUE
hline_data_neg <-
data.frame(
int_line = c(15000, 200000, 130000, 18000, 50000),
int_line = threshold_is_dbs_neg,
HMDB_name = is_neg_selection
)
hline_data_pos <-
data.frame(
int_line = c(150000, 3300000, 1750000, 150000, 270000),
int_line = threshold_is_dbs_pos,
HMDB_name = is_pos_selection
)
hline_data_sum <-
data.frame(
int_line = c(1300000, 2500000, 500000, 1800000, 1400000),
int_line = threshold_is_dbs_sum,
HMDB_name = is_sum_selection
)
} else if (dims_matrix == "Plasma") {
add_min_intens_lines <- TRUE
hline_data_neg <-
data.frame(
int_line = c(70000, 700000, 700000, 65000, 350000),
int_line = threshold_is_pl_neg,
HMDB_name = is_neg_selection
)
hline_data_pos <-
data.frame(
int_line = c(1500000, 9000000, 3000000, 400000, 700000),
int_line = threshold_is_pl_pos,
HMDB_name = is_pos_selection
)
hline_data_sum <-
data.frame(
int_line = c(8000000, 12500000, 2500000, 3000000, 4000000),
int_line = threshold_is_pl_sum,
HMDB_name = is_sum_selection
)
} else {
Expand All @@ -180,9 +190,36 @@ if (dims_matrix == "DBS") {
plot_width <- 8 + 0.2 * sample_count
plot_height <- plot_width / 2.5

is_neg_selection <- subset(is_neg, HMDB_name %in% is_neg_selection)
is_pos_selection <- subset(is_pos, HMDB_name %in% is_pos_selection)
is_sum_selection <- subset(is_summed, HMDB_name %in% is_sum_selection)
is_neg_selection_subset <- subset(is_neg, HMDB_name %in% is_neg_selection)
is_pos_selection_subset <- subset(is_pos, HMDB_name %in% is_pos_selection)
is_sum_selection_subset <- subset(is_summed, HMDB_name %in% is_sum_selection)

# export txt file with samples with internal standard level below threshold
if (dims_matrix == "Plasma") {
is_below_threshold_neg <- find_is_below_threshold(is_neg_selection_subset, threshold_is_pl_neg, is_neg_selection, "neg")
is_below_threshold_pos <- find_is_below_threshold(is_pos_selection_subset, threshold_is_pl_pos, is_pos_selection, "pos")
is_below_threshold_sum <- find_is_below_threshold(is_sum_selection_subset, threshold_is_pl_sum, is_sum_selection, "sum")
is_below_threshold <- rbind(is_below_threshold_pos, is_below_threshold_neg, is_below_threshold_sum)
} else if (dims_matrix == "DBS") {
is_below_threshold_neg <- find_is_below_threshold(is_neg_selection_subset, threshold_is_dbs_neg, is_neg_selection, "neg")
is_below_threshold_pos <- find_is_below_threshold(is_pos_selection_subset, threshold_is_dbs_pos, is_pos_selection, "pos")
is_below_threshold_sum <- find_is_below_threshold(is_sum_selection_subset, threshold_is_dbs_sum, is_neg_selection, "sum")
is_below_threshold <- rbind(is_below_threshold_pos, is_below_threshold_neg, is_below_threshold_sum)
} else {
# generate empty table
is_below_threshold <- is_neg_selection_subset[0, ]
}

if (nrow(is_below_threshold) > 0) {
write.table(cbind(is_below_threshold, scanmode = scanmode_is),
file = "internal_standards_below_threshold.txt",
row.names = FALSE, sep = "\t")
} else {
write.table("no internal standards are below threshold",
file = "internal_standards_below_threshold.txt"
row.names = FALSE, col.names = FALSE
)
}

# bar plot either with or without minimal intensity lines
if (add_min_intens_lines) {
Expand Down Expand Up @@ -325,6 +362,7 @@ if (length(sst_colnrs) > 0) {
control_list_intensities <- sst_list[, control_col_ids]
control_list_cv <- calc_coefficient_of_variation(control_list_intensities)
sst_list_intensities <- cbind(sst_list_intensities, CV_controls = control_list_cv[, "CV_perc"])
sst_list_intensities <- as.data.frame(sst_list_intensities)
} else {
sst_list_intensities <- sst_list[, intensity_col_ids]
}
Expand Down Expand Up @@ -357,6 +395,15 @@ xlsx_name <- paste0(outdir, "/", project, "_IS_SST.xlsx")
openxlsx::saveWorkbook(wb, xlsx_name, overwrite = TRUE)
rm(wb)

# generate text file for workflow completed mail for components with Z-score < 2
if (sum(grepl("P1001", colnames(sst_list_intensities))) > 0) {
zscore_column <- grep("_Zscore", colnames(sst_list_intensities))[1]
sst_list_intensities_qc <- sst_list_intensities[sst_list_intensities[, zscore_column] < 2, ]
sst_list_intensities_qc <- select(sst_list_intensities_qc, -c("CV_controls"))
write.table(sst_list_intensities_qc, file = paste(outdir, "sst_qc.txt", sep = "/"), row.names = FALSE, sep = "\t")
} else {
write.table("no SST sample present", file = paste(outdir, "sst_qc.txt", sep = "/"), row.names = FALSE, col.names = FALSE)
}

### MISSING M/Z CHECK
# check the outlist_identified_(negative/positive).RData files for missing m/z values and save to file
Expand Down
4 changes: 3 additions & 1 deletion DIMS/GenerateQCOutput.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ process GenerateQCOutput {
tuple path('positive_controls_warning.txt'), path('missing_mz_warning.txt'), path('sample_names_nodata.txt'), optional: true
tuple path('*_IS_SST.xlsx'), path('*_positive_control.xlsx'), optional: true
path('plots/IS_*.png'), emit: plot_files
path('Check_number_of_controls.txt'), optional: true
path('check_number_of_controls.txt'), optional: true
path('sst_qc.txt'), optional: true
path('internal_standards_below_threshold.txt'), optional: true

script:
"""
Expand Down
26 changes: 26 additions & 0 deletions DIMS/export/generate_qc_output_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,29 @@ check_missing_mz <- function(mzmed_pgrp_ident, scanmode) {
}
return(results_mz_missing)
}

find_is_below_threshold <- function(is_selection_subset, thresholds, is_names, scanmode) {
#' Create a list of all internal standards with intensity below a threshold value
#'
#' @param is_selection_subset: Matrix with intensities for each internal standard in each sample
#' @param thresholds: Threshold values for a given scan mode and matrix
#' @param is_names: Array of names of internal standards for a given scan mode
#' @param scanmode: string indicating scan mode to include in output
#'
#' @return is_below_threshold: Matrix listing all samples for which internal standard intensity is below threshold

# initialize; get the headers of the matrix
is_below_threshold <- is_selection_subset[0, ]
# for every line, check if intensity is below the appropriate threshold
for (line_index in seq_len(nrow(is_selection_subset))) {
is_selected <- is_selection_subset$HMDB_name[line_index]
thresh_selected <- thresholds[which(is_names == is_selected)]
if (is_selection_subset$Intensity[line_index] < thresh_selected) {
is_below_threshold <- rbind(is_below_threshold, is_selection_subset[line_index, ])
}
}
# add information on scan mode
is_below_threshold <- cbind(is_below_threshold, scanmode = rep(scanmode, nrow(is_below_threshold)))
return(is_below_threshold)
}

20 changes: 20 additions & 0 deletions DIMS/tests/testthat/test_generate_qc_output.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# get_is_intensities, calc_coefficient_of_variation,
# check_missing_mz
library(ggplot2)
library(reshape2)
suppressMessages(library("dplyr"))
source("../../export/generate_qc_output_functions.R")

Expand Down Expand Up @@ -201,3 +202,22 @@ testthat::test_that("Check missing mz values", {
expect_identical(check_missing_mz(test_mz_pgrp, "Test")$`1`,
c(550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560))
})

testthat::test_that("list of internal standards below threshold is correctly created", {
test_is <- read.delim(test_path("fixtures", "test_internal_standards.txt"))
# select columns
test_is_wide <- test_is[ , c("HMDB_code", "HMDB_name", "C101.1", "C102.1", "P2.1", "P3.1")]
# melt into long format
test_is_long <- reshape2::melt(test_is_wide, id.vars = c("HMDB_name","HMDB_code"))
colnames(test_is_long)[4] <- "Intensity"

test_is_names <- c("metab_1 (IS)", "metab_2 (IS)", "metab_3 (IS)", "metab_4 (IS)")
test_thresholds <- rep(300, 4)

# 8 rows have intensity below threshold
expect_equal(nrow(find_is_below_threshold(test_is_long, test_thresholds, test_is_names, "test")), 8)
# the maximum intensity in the output should be less than threshold
expect_lt(max(find_is_below_threshold(test_is_long, test_thresholds, test_is_names, "test")$Intensity), 300)
# if all values are above threshold, the result should be an empty data table
expect_equal(nrow(find_is_below_threshold(test_is_long, test_thresholds / 3, test_is_names, "test")), 0)
})
Loading