From 51782b696f75bb097cfaa735251b3c5eceb5a455 Mon Sep 17 00:00:00 2001 From: devonjkohler Date: Mon, 2 Mar 2026 11:14:43 -0500 Subject: [PATCH] added anomaly model to MSstatsClean call --- NAMESPACE | 1 + R/clean_DIANN.R | 22 +++++++++++++++++++--- R/converters.R | 4 +++- man/bigDIANNtoMSstatsFormat.Rd | 3 +++ man/cleanDIANNChunk.Rd | 11 ++++++++++- man/reduceBigDIANN.Rd | 11 ++++++++++- 6 files changed, 46 insertions(+), 6 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d60085e..9ec4823 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,5 +9,6 @@ importFrom(MSstats,dataProcess) importFrom(MSstats,groupComparison) importFrom(MSstatsConvert,MSstatsClean) importFrom(MSstatsConvert,MSstatsImport) +importFrom(MSstatsConvert,MSstatsMakeAnnotation) importFrom(utils,head) importFrom(utils,sessionInfo) diff --git a/R/clean_DIANN.R b/R/clean_DIANN.R index 7fbcfec..ff63cad 100644 --- a/R/clean_DIANN.R +++ b/R/clean_DIANN.R @@ -7,6 +7,8 @@ #' @param global_qvalue_cutoff Global Q-value cutoff #' @param qvalue_cutoff Q-value cutoff #' @param pg_qvalue_cutoff Protein group Q-value cutoff +#' @param calculateAnomalyScores Boolean for MSstats+ Model +#' @param anomalyModelFeatures Character vector of features to use for MSstats+ Model #' @param annotation Annotation file or data frame #' @return NULL. Writes to file. #' @keywords internal @@ -15,6 +17,8 @@ reduceBigDIANN <- function(input_file, output_path, MBR = TRUE, global_qvalue_cutoff = 0.01, qvalue_cutoff = 0.01, pg_qvalue_cutoff = 0.01, + calculateAnomalyScores=FALSE, + anomalyModelFeatures=c(), annotation = NULL) { if (grepl("csv", input_file)) { delim = "," @@ -24,8 +28,14 @@ reduceBigDIANN <- function(input_file, output_path, MBR = TRUE, delim <- ";" } - diann_chunk <- function(x, pos) cleanDIANNChunk(x, output_path, MBR, quantificationColumn, pos, - global_qvalue_cutoff, qvalue_cutoff, pg_qvalue_cutoff, annotation) + diann_chunk <- function(x, pos) cleanDIANNChunk(x, output_path, MBR, + quantificationColumn, pos, + global_qvalue_cutoff, + qvalue_cutoff, + pg_qvalue_cutoff, + calculateAnomalyScores, + anomalyModelFeatures, + annotation) readr::read_delim_chunked(input_file, readr::DataFrameCallback$new(diann_chunk), @@ -43,6 +53,8 @@ reduceBigDIANN <- function(input_file, output_path, MBR = TRUE, #' @param global_qvalue_cutoff Global Q-value cutoff #' @param qvalue_cutoff Q-value cutoff #' @param pg_qvalue_cutoff Protein group Q-value cutoff +#' @param calculateAnomalyScores Boolean for MSstats+ Model +#' @param anomalyModelFeatures Character vector of features to use for MSstats+ Model #' @param annotation Annotation file or data frame #' @importFrom MSstatsConvert MSstatsImport MSstatsClean MSstatsMakeAnnotation #' @return NULL @@ -51,6 +63,8 @@ cleanDIANNChunk = function(input, output_path, MBR, quantificationColumn, pos, global_qvalue_cutoff = 0.01, qvalue_cutoff = 0.01, pg_qvalue_cutoff = 0.01, + calculateAnomalyScores=FALSE, + anomalyModelFeatures = c(), annotation = NULL) { input = MSstatsImport(list(input = input), "MSstats", "DIANN") @@ -60,7 +74,9 @@ cleanDIANNChunk = function(input, output_path, MBR, quantificationColumn, pos, quantificationColumn = quantificationColumn, global_qvalue_cutoff = global_qvalue_cutoff, qvalue_cutoff = qvalue_cutoff, - pg_qvalue_cutoff = pg_qvalue_cutoff + pg_qvalue_cutoff = pg_qvalue_cutoff, + calculateAnomalyScores = calculateAnomalyScores, + anomalyModelFeatures = anomalyModelFeatures ) input = MSstatsMakeAnnotation(input, annotation) .writeChunkToFile(input, output_path, pos) diff --git a/R/converters.R b/R/converters.R index 6f3db0b..03af429 100644 --- a/R/converters.R +++ b/R/converters.R @@ -188,7 +188,9 @@ bigDIANNtoMSstatsFormat <- function(input_file, paste0("reduce_output_", output_file_name), MBR, quantificationColumn, - global_qvalue_cutoff, qvalue_cutoff, pg_qvalue_cutoff, annotation) + global_qvalue_cutoff, qvalue_cutoff, pg_qvalue_cutoff, + calculateAnomalyScores, anomalyModelFeatures, + annotation) # Preprocess the cleaned data (feature selection, etc.) msstats_data <- MSstatsPreprocessBig( diff --git a/man/bigDIANNtoMSstatsFormat.Rd b/man/bigDIANNtoMSstatsFormat.Rd index 244d6b9..4e48e3e 100644 --- a/man/bigDIANNtoMSstatsFormat.Rd +++ b/man/bigDIANNtoMSstatsFormat.Rd @@ -6,6 +6,7 @@ \usage{ bigDIANNtoMSstatsFormat( input_file, + annotation = NULL, output_file_name, backend, MBR = TRUE, @@ -26,6 +27,8 @@ bigDIANNtoMSstatsFormat( \arguments{ \item{input_file}{name of the input text file in 10-column MSstats format.} +\item{annotation}{name of 'annotation.txt' data which includes Condition, BioReplicate, Run.} + \item{output_file_name}{name of an output file which will be saved after pre-processing} \item{backend}{"arrow" or "sparklyr". Option "sparklyr" requires a spark installation diff --git a/man/cleanDIANNChunk.Rd b/man/cleanDIANNChunk.Rd index b74ca16..86adebb 100644 --- a/man/cleanDIANNChunk.Rd +++ b/man/cleanDIANNChunk.Rd @@ -12,7 +12,10 @@ cleanDIANNChunk( pos, global_qvalue_cutoff = 0.01, qvalue_cutoff = 0.01, - pg_qvalue_cutoff = 0.01 + pg_qvalue_cutoff = 0.01, + calculateAnomalyScores = FALSE, + anomalyModelFeatures = c(), + annotation = NULL ) } \arguments{ @@ -31,6 +34,12 @@ cleanDIANNChunk( \item{qvalue_cutoff}{Q-value cutoff} \item{pg_qvalue_cutoff}{Protein group Q-value cutoff} + +\item{calculateAnomalyScores}{Boolean for MSstats+ Model} + +\item{anomalyModelFeatures}{Character vector of features to use for MSstats+ Model} + +\item{annotation}{Annotation file or data frame} } \description{ Clean a single chunk of DIANN data diff --git a/man/reduceBigDIANN.Rd b/man/reduceBigDIANN.Rd index d40dce1..ad2f73b 100644 --- a/man/reduceBigDIANN.Rd +++ b/man/reduceBigDIANN.Rd @@ -11,7 +11,10 @@ reduceBigDIANN( quantificationColumn = "FragmentQuantCorrected", global_qvalue_cutoff = 0.01, qvalue_cutoff = 0.01, - pg_qvalue_cutoff = 0.01 + pg_qvalue_cutoff = 0.01, + calculateAnomalyScores = FALSE, + anomalyModelFeatures = c(), + annotation = NULL ) } \arguments{ @@ -28,6 +31,12 @@ reduceBigDIANN( \item{qvalue_cutoff}{Q-value cutoff} \item{pg_qvalue_cutoff}{Protein group Q-value cutoff} + +\item{calculateAnomalyScores}{Boolean for MSstats+ Model} + +\item{anomalyModelFeatures}{Character vector of features to use for MSstats+ Model} + +\item{annotation}{Annotation file or data frame} } \value{ NULL. Writes to file.