-
Notifications
You must be signed in to change notification settings - Fork 10
Add big file diann #151
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: devel
Are you sure you want to change the base?
Add big file diann #151
Changes from all commits
affd611
7ec3938
e07c645
983265e
2feabd1
ee99a00
8ced47e
1fc981c
bd4d39a
cf54d43
5c7c74b
f03f003
76933e3
6d06c88
b86880d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -530,7 +530,7 @@ getData <- function(input) { | |
| shinybusy::update_modal_spinner(text = "Processing large Spectronaut file...") | ||
|
|
||
| # Call the big file conversion function from MSstatsConvert | ||
| converted_data <- MSstatsBig::bigSpectronauttoMSstatsFormat( | ||
| converted_data <- bigSpectronauttoMSstatsFormat( | ||
| input_file = local_big_file_path, | ||
| output_file_name = "output_file.csv", | ||
| backend = "arrow", | ||
|
|
@@ -592,10 +592,70 @@ getData <- function(input) { | |
| } | ||
| } | ||
| else if(input$filetype == 'diann') { | ||
| if (isTRUE(input$big_file_diann)) { | ||
| # Logic for big DIANN files | ||
| # Parse the file path from shinyFiles input | ||
| volumes <- shinyFiles::getVolumes()() | ||
| path_info <- shinyFiles::parseFilePaths(volumes, input$big_file_browse) | ||
| local_big_file_path <- if (nrow(path_info) > 0) path_info$datapath else NULL | ||
|
|
||
| if (!is.numeric(input$max_feature_count) || is.na(input$max_feature_count) || input$max_feature_count <= 0) { | ||
| showNotification("Error: max_feature_count must be a positive number.", type = "error") | ||
| shinybusy::remove_modal_spinner() | ||
| return(NULL) | ||
| } | ||
|
|
||
| if (is.null(local_big_file_path) || !file.exists(local_big_file_path)) { | ||
| showNotification("Error: The selected file does not exist or is not readable.", type = "error") | ||
| shinybusy::remove_modal_spinner() | ||
| return(NULL) | ||
| } | ||
|
|
||
| shinybusy::update_modal_spinner(text = "Processing large DIANN file...") | ||
|
|
||
| # Call the big file conversion function from MSstatsConvert | ||
| converted_data <- bigDIANNtoMSstatsFormat( | ||
| input_file = local_big_file_path, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two things:
|
||
| annotation = getAnnot(input), | ||
| output_file_name = "output_file.csv", | ||
| backend = "arrow", | ||
| MBR = isTRUE(input$diann_MBR), | ||
| quantificationColumn = input$diann_quantificationColumn, | ||
| max_feature_count = input$max_feature_count, | ||
| filter_unique_peptides = input$filter_unique_peptides, | ||
| aggregate_psms = input$aggregate_psms, | ||
| filter_few_obs = input$filter_few_obs | ||
| ) | ||
|
|
||
| # Attempt to load the data into memory. | ||
| mydata <- tryCatch({ | ||
| dplyr::collect(converted_data) | ||
| }, error = function(e) { | ||
| showNotification( | ||
| paste("Memory Error: The dataset is too large to process in-memory.", e$message), | ||
| type = "error", | ||
| duration = NULL | ||
| ) | ||
| return(NULL) | ||
| }) | ||
|
|
||
| if (is.null(mydata)) { | ||
| shinybusy::remove_modal_spinner() | ||
| return(NULL) | ||
| } | ||
|
Comment on lines
+595
to
+645
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: rg -n 'big_file_browse' --type=r -C2Repository: Vitek-Lab/MSstatsShiny Length of output: 2953 🏁 Script executed: rg -n 'bigDIANNtoMSstatsFormat' --type=r -C5Repository: Vitek-Lab/MSstatsShiny Length of output: 4398 🏁 Script executed: sed -n '938,955p' R/utils.RRepository: Vitek-Lab/MSstatsShiny Length of output: 1306 🏁 Script executed: # Check if both Spectronaut and DIANN big file UIs are conditionally rendered or always active
rg -n 'create_spectronaut_large_file_ui|create_diann_large_file_ui' -C3Repository: Vitek-Lab/MSstatsShiny Length of output: 2795 Generated code for DIANN big-file conversion is missing the The runtime code at line 619 correctly includes The shared 🤖 Prompt for AI Agents |
||
| } else { | ||
| if (getFileExtension(input$dianndata$name) %in% c("parquet", "pq")) { | ||
| data = read_parquet(input$dianndata$datapath) | ||
| } else { | ||
| data = read.csv(input$dianndata$datapath, sep=input$sep_dianndata) | ||
| sep = input$sep_dianndata | ||
| if(is.null(sep)) { | ||
| sep = "\t" | ||
| } | ||
| if (sep == "\t") { | ||
| data = read.delim(input$dianndata$datapath) | ||
| } else { | ||
| data = read.csv(input$dianndata$datapath, sep = sep) | ||
| } | ||
| } | ||
|
|
||
| qvalue_cutoff = 0.01 | ||
|
|
@@ -620,6 +680,7 @@ getData <- function(input) { | |
| use_log_file = FALSE, | ||
| quantificationColumn = quantificationColumn | ||
| ) | ||
| } | ||
| print("Mydata from mstats") | ||
| print(mydata) | ||
| } | ||
|
|
@@ -721,7 +782,8 @@ library(MSstatsTMT) | |
| library(MSstatsPTM)\n", sep = "") | ||
| codes = paste(codes, "\n# Package versions\n# MSstats version ", packageVersion("MSstats"), | ||
| "\n# MSstatsTMT version ", packageVersion("MSstatsTMT"), | ||
| "\n# MSstatsPTM version ", packageVersion("MSstatsPTM"), sep = "") | ||
| "\n# MSstatsPTM version ", packageVersion("MSstatsPTM"), | ||
| "\n# MSstatsBig version ", tryCatch(packageVersion("MSstatsBig"), error = function(e) "Not Installed"), sep = "") | ||
| codes = paste(codes, "\n\n# Read data\n", sep = "") | ||
| if(input$filetype == 'sample') { | ||
| if(input$BIO != "PTM" && input$DDA_DIA =='LType' && input$LabelFreeType == "SRM_PRM") { | ||
|
|
@@ -843,27 +905,68 @@ library(MSstatsPTM)\n", sep = "") | |
| } | ||
| else if(input$filetype == 'spec') { | ||
|
|
||
| codes = paste(codes, "data = read.csv(\"insert your MSstats scheme output from Spectronaut filepath\", header = TRUE, sep = ",input$sep_specdata,")\nannot_file = read.csv(\"insert your annotation filepath\", sep='\t')#Optional\n" | ||
| , sep = "") | ||
|
|
||
| codes = paste(codes, "data = SpectronauttoMSstatsFormat(data, | ||
| annotation = annot_file #Optional, | ||
| filter_with_Qvalue = TRUE, ## same as default | ||
| qvalue_cutoff = 0.01, ## same as default | ||
| fewMeasurements=\"remove\", | ||
| removeProtein_with1Feature = TRUE, | ||
| use_log_file = FALSE)\n", sep = "") | ||
| if (isTRUE(input$big_file_spec)) { | ||
| codes = paste(codes, "library(MSstatsBig)\n", sep = "") | ||
| codes = paste(codes, "data = MSstatsBig::bigSpectronauttoMSstatsFormat(\n", sep = "") | ||
| codes = paste(codes, " input_file = \"insert your large Spectronaut file path\",\n", sep = "") | ||
| codes = paste(codes, " output_file_name = \"output_file.csv\",\n", sep = "") | ||
| codes = paste(codes, " backend = \"arrow\",\n", sep = "") | ||
| codes = paste(codes, " filter_by_excluded = ", input$filter_by_excluded, ",\n", sep = "") | ||
| codes = paste(codes, " filter_by_identified = ", input$filter_by_identified, ",\n", sep = "") | ||
| codes = paste(codes, " filter_by_qvalue = ", input$filter_by_qvalue, ",\n", sep = "") | ||
| codes = paste(codes, " qvalue_cutoff = ", input$qvalue_cutoff, ",\n", sep = "") | ||
| codes = paste(codes, " max_feature_count = ", input$max_feature_count, ",\n", sep = "") | ||
| codes = paste(codes, " filter_unique_peptides = ", input$filter_unique_peptides, ",\n", sep = "") | ||
| codes = paste(codes, " aggregate_psms = ", input$aggregate_psms, ",\n", sep = "") | ||
| codes = paste(codes, " filter_few_obs = ", input$filter_few_obs, "\n", sep = "") | ||
| codes = paste(codes, ")\n", sep = "") | ||
| codes = paste(codes, "data = dplyr::collect(data)\n", sep = "") | ||
| } else { | ||
| codes = paste(codes, "data = read.csv(\"insert your MSstats scheme output from Spectronaut filepath\", header = TRUE, sep = ",input$sep_specdata,")\nannot_file = read.csv(\"insert your annotation filepath\", sep='\t')#Optional\n" | ||
| , sep = "") | ||
| codes = paste(codes, "data = SpectronauttoMSstatsFormat(data, | ||
| annotation = annot_file #Optional, | ||
| filter_with_Qvalue = TRUE, ## same as default | ||
| qvalue_cutoff = 0.01, ## same as default | ||
| fewMeasurements=\"remove\", | ||
| removeProtein_with1Feature = TRUE, | ||
| use_log_file = FALSE)\n", sep = "") | ||
| } | ||
| } | ||
| else if(input$filetype == 'diann') { | ||
|
|
||
| codes = paste(codes, "data = read.csv(\"insert your MSstats scheme output from DIANN filepath\", header = TRUE, sep = '\\t')\nannot_file = read.csv(\"insert your annotation filepath\")#Optional\n" | ||
| , sep = "") | ||
|
|
||
| codes = paste(codes, "data = DIANNtoMSstatsFormat(data, | ||
| annotation = annot_file, #Optional | ||
| qvalue_cutoff = 0.01, ## same as default | ||
| removeProtein_with1Feature = TRUE, | ||
| use_log_file = FALSE)\n", sep = "") | ||
| if (isTRUE(input$big_file_diann)) { | ||
| codes = paste(codes, "library(MSstatsBig)\n", sep = "") | ||
| codes = paste(codes, "data = MSstatsBig::bigDIANNtoMSstatsFormat(\n", sep = "") | ||
| codes = paste(codes, " input_file = \"insert your large DIANN file path\",\n", sep = "") | ||
| codes = paste(codes, " output_file_name = \"output_file.csv\",\n", sep = "") | ||
| codes = paste(codes, " backend = \"arrow\",\n", sep = "") | ||
| codes = paste(codes, " MBR = ", isTRUE(input$diann_MBR), ",\n", sep = "") | ||
| codes = paste(codes, " quantificationColumn = \"", input$diann_quantificationColumn, "\",\n", sep = "") | ||
| codes = paste(codes, " max_feature_count = ", input$max_feature_count, ",\n", sep = "") | ||
| codes = paste(codes, " filter_unique_peptides = ", input$filter_unique_peptides, ",\n", sep = "") | ||
| codes = paste(codes, " aggregate_psms = ", input$aggregate_psms, ",\n", sep = "") | ||
| codes = paste(codes, " filter_few_obs = ", input$filter_few_obs, "\n", sep = "") | ||
| codes = paste(codes, ")\n", sep = "") | ||
| codes = paste(codes, "data = dplyr::collect(data)\n", sep = "") | ||
|
Comment on lines
+938
to
+951
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Generated code for big DIANN is missing the The runtime call at line 619 passes Proposed fix codes = paste(codes, "data = MSstatsBig::bigDIANNtoMSstatsFormat(\n", sep = "")
codes = paste(codes, " input_file = \"insert your large DIANN file path\",\n", sep = "")
+ codes = paste(codes, " annotation = read.csv(\"insert your annotation filepath\"),\n", sep = "")
codes = paste(codes, " output_file_name = \"output_file.csv\",\n", sep = "")🤖 Prompt for AI Agents |
||
| } else { | ||
| sep = input$sep_dianndata | ||
| if(is.null(sep)) { | ||
| sep = "\t" | ||
| } | ||
|
|
||
| if (sep == "\t") { | ||
| codes = paste(codes, "data = read.delim(\"insert your MSstats scheme output from DIANN filepath\")\nannot_file = read.csv(\"insert your annotation filepath\")#Optional\n", sep = "") | ||
| } else { | ||
| codes = paste(codes, "data = read.csv(\"insert your MSstats scheme output from DIANN filepath\", header = TRUE, sep = '", sep, "')\nannot_file = read.csv(\"insert your annotation filepath\")#Optional\n", sep = "") | ||
| } | ||
|
|
||
| codes = paste(codes, "data = DIANNtoMSstatsFormat(data, | ||
| annotation = annot_file, #Optional | ||
| qvalue_cutoff = 0.01, ## same as default | ||
| removeProtein_with1Feature = TRUE, | ||
| use_log_file = FALSE)\n", sep = "") | ||
| } | ||
| } | ||
| else if(input$filetype == 'open') { | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.