diff --git a/DESCRIPTION b/DESCRIPTION index 1d749c0..7107604 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,9 @@ Imports: dplyr, DT, jsonlite, - reticulate + reticulate, + DBI, + RSQLite Suggests: testthat (>= 3.0.0), covr, diff --git a/NAMESPACE b/NAMESPACE index 32f8a83..9577f2a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,29 +3,50 @@ export(copy_images_to_class_folders) export(create_empty_changes_log) export(create_new_classifications) +export(export_all_db_to_mat) +export(export_all_db_to_png) +export(export_db_to_mat) +export(export_db_to_png) export(filter_to_extracted) export(get_config_dir) +export(get_db_path) +export(get_default_db_dir) export(get_file_index_path) export(get_sample_paths) export(get_settings_path) +export(import_all_mat_to_db) +export(import_mat_to_db) export(init_python_env) export(is_valid_sample_name) +export(list_annotated_samples_db) +export(load_annotations_db) export(load_class_list) export(load_file_index) export(load_from_classifier_mat) export(load_from_csv) +export(load_from_db) export(load_from_mat) export(read_roi_dimensions) export(rescan_file_index) export(run_app) export(sanitize_string) +export(save_annotations_db) export(save_file_index) export(save_sample_annotations) export(save_validation_statistics) +export(update_annotator) +importFrom(DBI,dbConnect) +importFrom(DBI,dbDisconnect) +importFrom(DBI,dbExecute) +importFrom(DBI,dbGetQuery) +importFrom(DBI,dbWriteTable) importFrom(DT,renderDT) +importFrom(RSQLite,SQLite) importFrom(bslib,bs_theme) importFrom(dplyr,filter) importFrom(iRfcb,ifcb_annotate_samples) +importFrom(iRfcb,ifcb_create_manual_file) +importFrom(iRfcb,ifcb_extract_pngs) importFrom(iRfcb,ifcb_get_mat_variable) importFrom(jsonlite,fromJSON) importFrom(reticulate,py_available) diff --git a/NEWS.md b/NEWS.md index 46b4a5c..61f390a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,16 @@ ## Features +### SQLite Database Backend +- Annotations are now stored in a local SQLite database (`annotations.sqlite`) by default +- Works out of the box with no Python dependency - only R packages (RSQLite, DBI) are needed +- MATLAB `.mat` file export is still available as an opt-in for ifcb-analysis compatibility +- Storage format configurable in Settings: "SQLite" (default), "MAT file", or "Both" +- Existing `.mat` annotations continue to work and can be loaded as before +- `import_mat_to_db()` utility for bulk migration of existing `.mat` files to SQLite +- Sample discovery scans both `.mat` files and the SQLite database +- When loading a sample, SQLite is checked first (faster), with `.mat` fallback + ### Sample Management - Load samples from ROI files with automatic year/month filtering - Support for validation mode (existing classifications) and annotation mode (new samples) @@ -54,7 +64,9 @@ - Visual warnings for classes in classifications not in class2use list ### Output -- Save annotations as MATLAB-compatible .mat files (using iRfcb) +- Save annotations to SQLite database (default, no Python needed) +- Optional: save annotations as MATLAB-compatible .mat files (using iRfcb, requires Python) +- Configurable storage format: SQLite only, MAT only, or both - Save validation statistics as CSV (in `validation_statistics/` subfolder) - Organize output PNGs by class folder (for CNN training) - Auto-save when navigating between samples @@ -76,7 +88,8 @@ - Switch between annotation/validation modes for dual-mode samples ## Technical Notes -- Requires Python with scipy for MAT file writing (optional - only for ifcb-analysis compatibility) +- SQLite is the default annotation storage - works out of the box with RSQLite (no external dependencies) +- Python with scipy is optional - only needed for MAT file export (ifcb-analysis compatibility) - Uses iRfcb package for IFCB data handling - Session cache preserves work when switching samples - File index cache reduces startup time by avoiding redundant folder scans diff --git a/R/database.R b/R/database.R new file mode 100644 index 0000000..536a218 --- /dev/null +++ b/R/database.R @@ -0,0 +1,652 @@ +# SQLite database backend for ClassiPyR annotations +# +# Provides functions to store and retrieve annotations in a local SQLite +# database as an alternative to .mat files. SQLite is the default storage +# backend - it works out of the box with no Python dependency. + +#' @importFrom DBI dbConnect dbDisconnect dbWriteTable dbGetQuery dbExecute +#' @importFrom RSQLite SQLite +#' @importFrom iRfcb ifcb_create_manual_file ifcb_extract_pngs +NULL + +#' Get path to the annotations SQLite database +#' +#' Returns the path to \code{annotations.sqlite} in the given database +#' directory. The database directory should be on a local filesystem, not a +#' network drive, because +#' \href{https://www.sqlite.org/useovernet.html}{SQLite file locking is +#' unreliable over network filesystems}. +#' +#' @param db_folder Path to the database directory. Defaults to +#' \code{\link{get_default_db_dir}()}, a persistent local directory. +#' @return Path to the SQLite database file +#' @export +#' @seealso \code{\link{get_default_db_dir}} for the default database directory +#' @examples +#' # Use the default local database directory +#' get_db_path(get_default_db_dir()) +#' +#' # Or specify a custom directory +#' get_db_path("/data/local_db") +get_db_path <- function(db_folder) { + file.path(db_folder, "annotations.sqlite") +} + +#' Initialize the annotations database schema +#' +#' Creates the \code{annotations} and \code{class_lists} tables if they do not +#' already exist. +#' +#' @param con A DBI connection object +#' @return NULL (called for side effects) +#' @keywords internal +init_db_schema <- function(con) { + dbExecute(con, " + CREATE TABLE IF NOT EXISTS annotations ( + sample_name TEXT NOT NULL, + roi_number INTEGER NOT NULL, + class_name TEXT NOT NULL, + annotator TEXT, + timestamp TEXT DEFAULT (datetime('now')), + is_manual INTEGER NOT NULL DEFAULT 1, + PRIMARY KEY (sample_name, roi_number) + ) + ") + + dbExecute(con, " + CREATE TABLE IF NOT EXISTS class_lists ( + sample_name TEXT NOT NULL, + class_index INTEGER NOT NULL, + class_name TEXT NOT NULL, + PRIMARY KEY (sample_name, class_index) + ) + ") + + # Migration: add is_manual column to existing databases that lack it + cols <- dbGetQuery(con, "PRAGMA table_info(annotations)") + if (!"is_manual" %in% cols$name) { + dbExecute(con, "ALTER TABLE annotations ADD COLUMN is_manual INTEGER NOT NULL DEFAULT 1") + } + + invisible(NULL) +} + +#' Save annotations to the SQLite database +#' +#' Writes (or replaces) annotations for a single sample. The existing rows for +#' the sample are deleted first so that re-saving acts as an upsert. +#' +#' @param db_path Path to the SQLite database file +#' @param sample_name Sample name (e.g., \code{"D20230101T120000_IFCB134"}) +#' @param classifications Data frame with at least \code{file_name} and +#' \code{class_name} columns +#' @param class2use Character vector of class names (preserves index order for +#' .mat export) +#' @param annotator Annotator name +#' @param is_manual Integer vector of 0/1 flags indicating whether each ROI was +#' manually reviewed (1) or not yet reviewed (0, corresponding to NaN in .mat +#' files). If \code{NULL} (the default), all ROIs are treated as reviewed. +#' @return TRUE on success, FALSE on failure +#' @export +#' @examples +#' \dontrun{ +#' db_path <- get_db_path("/data/manual") +#' save_annotations_db(db_path, "D20230101T120000_IFCB134", +#' classifications, class2use, "Jane") +#' } +save_annotations_db <- function(db_path, sample_name, classifications, + class2use, annotator = "Unknown", + is_manual = NULL) { + if (is.null(classifications) || nrow(classifications) == 0) { + return(FALSE) + } + + dir.create(dirname(db_path), recursive = TRUE, showWarnings = FALSE) + + con <- dbConnect(SQLite(), db_path) + on.exit(dbDisconnect(con), add = TRUE) + + init_db_schema(con) + + # Extract ROI numbers from file_name (e.g., "D20230101T120000_IFCB134_00001.png" -> 1) + roi_numbers <- as.integer(gsub(".*_(\\d+)\\.png$", "\\1", classifications$file_name)) + + if (is.null(is_manual)) { + is_manual <- rep(1L, nrow(classifications)) + } + + annotations_df <- data.frame( + sample_name = sample_name, + roi_number = roi_numbers, + class_name = classifications$class_name, + annotator = annotator, + timestamp = format(Sys.time(), "%Y-%m-%d %H:%M:%S"), + is_manual = as.integer(is_manual), + stringsAsFactors = FALSE + ) + + tryCatch({ + dbExecute(con, "BEGIN TRANSACTION") + + # Delete existing annotations for this sample (upsert semantics) + dbExecute(con, "DELETE FROM annotations WHERE sample_name = ?", + params = list(sample_name)) + dbWriteTable(con, "annotations", annotations_df, append = TRUE) + + # Save class list for this sample (preserves index order for .mat export) + dbExecute(con, "DELETE FROM class_lists WHERE sample_name = ?", + params = list(sample_name)) + if (length(class2use) > 0) { + class_list_df <- data.frame( + sample_name = sample_name, + class_index = seq_along(class2use), + class_name = class2use, + stringsAsFactors = FALSE + ) + dbWriteTable(con, "class_lists", class_list_df, append = TRUE) + } + + dbExecute(con, "COMMIT") + TRUE + }, error = function(e) { + tryCatch(dbExecute(con, "ROLLBACK"), error = function(e2) NULL) + warning("Failed to save annotations to database: ", e$message) + FALSE + }) +} + +#' Load annotations from the SQLite database +#' +#' Reads annotations for a single sample and returns a data frame in the same +#' format as \code{\link{load_from_mat}}. +#' +#' @param db_path Path to the SQLite database file +#' @param sample_name Sample name +#' @param roi_dimensions Data frame from \code{\link{read_roi_dimensions}} with +#' columns \code{roi_number}, \code{width}, \code{height}, \code{area} +#' @return Data frame with columns: file_name, class_name, score, width, height, +#' roi_area. Returns NULL if the sample has no annotations. +#' @export +#' @examples +#' \dontrun{ +#' dims <- read_roi_dimensions("/data/raw/2023/D20230101/D20230101T120000_IFCB134.adc") +#' db_path <- get_db_path("/data/manual") +#' classifications <- load_annotations_db(db_path, "D20230101T120000_IFCB134", dims) +#' } +load_annotations_db <- function(db_path, sample_name, roi_dimensions) { + if (!file.exists(db_path)) { + return(NULL) + } + + con <- dbConnect(SQLite(), db_path) + on.exit(dbDisconnect(con), add = TRUE) + + rows <- dbGetQuery(con, + "SELECT roi_number, class_name FROM annotations WHERE sample_name = ? ORDER BY roi_number", + params = list(sample_name) + ) + + if (nrow(rows) == 0) { + return(NULL) + } + + # Match ROI dimensions by roi_number (safe lookup with NA fallback) + roi_data <- lapply(rows$roi_number, function(rn) { + idx <- which(roi_dimensions$roi_number == rn) + if (length(idx) > 0) { + list(width = roi_dimensions$width[idx], + height = roi_dimensions$height[idx], + area = roi_dimensions$area[idx]) + } else { + list(width = NA_real_, height = NA_real_, area = NA_real_) + } + }) + + classifications <- data.frame( + file_name = sprintf("%s_%05d.png", sample_name, rows$roi_number), + class_name = rows$class_name, + score = NA_real_, + width = vapply(roi_data, `[[`, numeric(1), "width"), + height = vapply(roi_data, `[[`, numeric(1), "height"), + roi_area = vapply(roi_data, `[[`, numeric(1), "area"), + stringsAsFactors = FALSE + ) + + # Sort by area (descending) - consistent with load_from_mat + classifications[order(-classifications$roi_area), ] +} + +#' List samples with annotations in the database +#' +#' @param db_path Path to the SQLite database file +#' @return Character vector of sample names that have annotations +#' @export +#' @examples +#' \dontrun{ +#' db_path <- get_db_path("/data/manual") +#' samples <- list_annotated_samples_db(db_path) +#' } +list_annotated_samples_db <- function(db_path) { + if (!file.exists(db_path)) { + return(character()) + } + + con <- dbConnect(SQLite(), db_path) + on.exit(dbDisconnect(con), add = TRUE) + + # Check that the annotations table exists + tables <- dbGetQuery(con, "SELECT name FROM sqlite_master WHERE type='table'") + if (!"annotations" %in% tables$name) { + return(character()) + } + + result <- dbGetQuery(con, "SELECT DISTINCT sample_name FROM annotations ORDER BY sample_name") + result$sample_name +} + +#' Update the annotator name for one or more samples +#' +#' Changes the annotator field for all annotations belonging to the specified +#' sample(s). This is useful for correcting the annotator after bulk imports +#' or when transferring ownership of annotations. +#' +#' @param db_path Path to the SQLite database file +#' @param sample_names Character vector of sample names to update +#' @param annotator New annotator name +#' @return Named integer vector with the number of rows updated per sample. +#' Samples not found in the database are included with a count of 0. +#' @export +#' @examples +#' \dontrun{ +#' db_path <- get_db_path("/data/manual") +#' +#' # Update a single sample +#' update_annotator(db_path, "D20230101T120000_IFCB134", "Jane") +#' +#' # Update multiple samples at once +#' update_annotator(db_path, +#' c("D20230101T120000_IFCB134", "D20230202T080000_IFCB134"), +#' "Jane") +#' +#' # Update all annotated samples +#' all_samples <- list_annotated_samples_db(db_path) +#' update_annotator(db_path, all_samples, "Jane") +#' } +update_annotator <- function(db_path, sample_names, annotator) { + if (!file.exists(db_path)) { + stop("Database not found: ", db_path) + } + if (length(sample_names) == 0) { + return(integer(0)) + } + if (!is.character(annotator) || length(annotator) != 1 || is.na(annotator)) { + stop("annotator must be a single non-NA character string") + } + + con <- dbConnect(SQLite(), db_path) + on.exit(dbDisconnect(con), add = TRUE) + + counts <- vapply(sample_names, function(sn) { + res <- dbExecute(con, + "UPDATE annotations SET annotator = ? WHERE sample_name = ?", + params = list(annotator, sn) + ) + as.integer(res) + }, integer(1)) + + counts +} + +#' Import a .mat annotation file into the SQLite database +#' +#' Reads an existing .mat annotation file and writes its data into the SQLite +#' database. The class list (\code{class2use_manual}) and classlist indices are +#' read directly from the .mat file to ensure a faithful import. ROIs with NaN +#' indices (not yet reviewed) are stored with \code{is_manual = 0}. +#' +#' @param mat_path Path to the .mat annotation file +#' @param db_path Path to the SQLite database file +#' @param sample_name Sample name +#' @param annotator Annotator name (defaults to \code{"imported"}) +#' @return TRUE on success, FALSE on failure +#' @export +#' @examples +#' \dontrun{ +#' import_mat_to_db( +#' mat_path = "/data/manual/D20230101T120000_IFCB134.mat", +#' db_path = get_db_path("/data/manual"), +#' sample_name = "D20230101T120000_IFCB134" +#' ) +#' } +import_mat_to_db <- function(mat_path, db_path, sample_name, + annotator = "imported") { + if (!file.exists(mat_path)) { + warning("MAT file not found: ", mat_path) + return(FALSE) + } + + tryCatch({ + # Read the class list embedded in the .mat file + class2use <- as.character(ifcb_get_mat_variable(mat_path, + variable_name = "class2use_manual")) + + classlist <- ifcb_get_mat_variable(mat_path, variable_name = "classlist") + roi_numbers <- classlist[, 1] + class_indices <- classlist[, 2] + + # Detect NaN (not yet reviewed) vs classified ROIs + is_nan <- is.nan(class_indices) + is_manual <- ifelse(is_nan, 0L, 1L) + + class_names <- vapply(class_indices, function(idx) { + if (is.na(idx) || is.nan(idx) || idx < 1 || idx > length(class2use)) { + "unclassified" + } else { + class2use[idx] + } + }, character(1)) + + # Build a classifications-like data frame for save_annotations_db + classifications <- data.frame( + file_name = sprintf("%s_%05d.png", sample_name, roi_numbers), + class_name = class_names, + stringsAsFactors = FALSE + ) + + save_annotations_db(db_path, sample_name, classifications, class2use, + annotator, is_manual = is_manual) + }, error = function(e) { + warning("Failed to import MAT file: ", e$message) + FALSE + }) +} + +#' Export annotations from SQLite to a .mat file +#' +#' Reads annotations for a single sample from the database and writes a +#' MATLAB-compatible annotation file using \code{iRfcb::ifcb_create_manual_file}. +#' Requires Python with scipy. +#' +#' @param db_path Path to the SQLite database file +#' @param sample_name Sample name +#' @param output_folder Folder where the .mat file will be written +#' @return TRUE on success, FALSE on failure +#' @export +#' @examples +#' \dontrun{ +#' db_path <- get_db_path("/data/manual") +#' export_db_to_mat(db_path, "D20230101T120000_IFCB134", "/data/manual") +#' } +export_db_to_mat <- function(db_path, sample_name, output_folder) { + if (!file.exists(db_path)) { + warning("Database not found: ", db_path) + return(FALSE) + } + + con <- dbConnect(SQLite(), db_path) + on.exit(dbDisconnect(con), add = TRUE) + + # Get annotations for this sample (including is_manual flag) + rows <- dbGetQuery(con, + "SELECT roi_number, class_name, is_manual FROM annotations WHERE sample_name = ? ORDER BY roi_number", + params = list(sample_name) + ) + + if (nrow(rows) == 0) { + warning("No annotations found for sample: ", sample_name) + return(FALSE) + } + + # Get class list for this sample + class_list <- dbGetQuery(con, + "SELECT class_index, class_name FROM class_lists WHERE sample_name = ? ORDER BY class_index", + params = list(sample_name) + ) + + if (nrow(class_list) == 0) { + warning("No class list found for sample: ", sample_name) + return(FALSE) + } + + class2use <- class_list$class_name + + # Build classlist numeric vector: map class names to indices + # Use NaN for unreviewed ROIs (is_manual == 0) to preserve the distinction + classlist_indices <- match(rows$class_name, class2use) + # Any unmatched classes default to 1 (typically "unclassified") + classlist_indices[is.na(classlist_indices)] <- 1L + classlist_indices <- as.numeric(classlist_indices) + classlist_indices[rows$is_manual == 0L] <- NaN + + output_file <- file.path(output_folder, paste0(sample_name, ".mat")) + + tryCatch({ + dir.create(output_folder, recursive = TRUE, showWarnings = FALSE) + ifcb_create_manual_file( + roi_length = nrow(rows), + class2use = class2use, + output_file = output_file, + classlist = classlist_indices + ) + TRUE + }, error = function(e) { + warning("Failed to export to MAT: ", e$message) + FALSE + }) +} + +#' Bulk import .mat annotation files into the SQLite database +#' +#' Scans a folder for \code{.mat} annotation files (excluding classifier output +#' files matching \code{*_class*.mat}) and imports each into the database. Each +#' file's embedded \code{class2use_manual} is used for class-name mapping. +#' +#' @param mat_folder Folder containing .mat annotation files +#' @param db_path Path to the SQLite database file +#' @param annotator Annotator name (defaults to \code{"imported"}) +#' @return Named list with counts: \code{success}, \code{failed}, \code{skipped} +#' @export +#' @examples +#' \dontrun{ +#' db_path <- get_db_path("/data/manual") +#' result <- import_all_mat_to_db("/data/manual", db_path) +#' cat(result$success, "imported,", result$failed, "failed,", result$skipped, "skipped\n") +#' } +import_all_mat_to_db <- function(mat_folder, db_path, + annotator = "imported") { + mat_files <- list.files(mat_folder, pattern = "\\.mat$", full.names = TRUE) + # Exclude classifier output files (*_class*.mat) and class2use files + mat_files <- mat_files[!grepl("_class", basename(mat_files))] + mat_files <- mat_files[!grepl("^class2use", basename(mat_files))] + + counts <- list(success = 0L, failed = 0L, skipped = 0L) + + if (length(mat_files) == 0) { + return(counts) + } + + # Get already-imported samples to allow skipping + existing <- list_annotated_samples_db(db_path) + + for (mat_path in mat_files) { + sample_name <- tools::file_path_sans_ext(basename(mat_path)) + + if (sample_name %in% existing) { + counts$skipped <- counts$skipped + 1L + next + } + + ok <- import_mat_to_db(mat_path, db_path, sample_name, annotator) + if (isTRUE(ok)) { + counts$success <- counts$success + 1L + } else { + counts$failed <- counts$failed + 1L + } + } + + counts +} + +#' Bulk export all annotated samples from SQLite to .mat files +#' +#' Exports every sample in the database to a MATLAB-compatible annotation file. +#' Requires Python with scipy. +#' +#' @param db_path Path to the SQLite database file +#' @param output_folder Folder where .mat files will be written +#' @return Named list with counts: \code{success}, \code{failed} +#' @export +#' @examples +#' \dontrun{ +#' db_path <- get_db_path("/data/manual") +#' result <- export_all_db_to_mat(db_path, "/data/manual") +#' cat(result$success, "exported,", result$failed, "failed\n") +#' } +export_all_db_to_mat <- function(db_path, output_folder) { + samples <- list_annotated_samples_db(db_path) + + counts <- list(success = 0L, failed = 0L) + + if (length(samples) == 0) { + return(counts) + } + + for (sample_name in samples) { + ok <- export_db_to_mat(db_path, sample_name, output_folder) + if (isTRUE(ok)) { + counts$success <- counts$success + 1L + } else { + counts$failed <- counts$failed + 1L + } + } + + counts +} + +#' Export annotated images from SQLite to class-organized PNG folders +#' +#' Reads annotations for a single sample from the database and extracts PNG +#' images from the ROI file, placing each image into a subfolder named after +#' its assigned class. +#' +#' @param db_path Path to the SQLite database file +#' @param sample_name Sample name +#' @param roi_path Path to the \code{.roi} file for this sample +#' @param png_folder Base output folder. Images are written to +#' \code{png_folder//} +#' @param skip_class Character vector of class names to exclude from export +#' (e.g. \code{"unclassified"}). Default \code{NULL} exports all classes. +#' @return TRUE on success, FALSE on failure +#' @export +#' @examples +#' \dontrun{ +#' db_path <- get_db_path("/data/manual") +#' export_db_to_png(db_path, "D20230101T120000_IFCB134", +#' "/data/raw/2023/D20230101/D20230101T120000_IFCB134.roi", +#' "/data/png_output", +#' skip_class = "unclassified") +#' } +export_db_to_png <- function(db_path, sample_name, roi_path, png_folder, + skip_class = NULL) { + if (!file.exists(db_path)) { + warning("Database not found: ", db_path) + return(FALSE) + } + if (!file.exists(roi_path)) { + warning("ROI file not found: ", roi_path) + return(FALSE) + } + + con <- dbConnect(SQLite(), db_path) + on.exit(dbDisconnect(con), add = TRUE) + + rows <- dbGetQuery(con, + "SELECT roi_number, class_name FROM annotations WHERE sample_name = ? ORDER BY roi_number", + params = list(sample_name) + ) + + if (nrow(rows) == 0) { + warning("No annotations found for sample: ", sample_name) + return(FALSE) + } + + # Filter out skipped classes + if (!is.null(skip_class) && length(skip_class) > 0) { + rows <- rows[!rows$class_name %in% skip_class, ] + if (nrow(rows) == 0) { + return(TRUE) # All ROIs were in skipped classes — nothing to export + } + } + + dir.create(png_folder, recursive = TRUE, showWarnings = FALSE) + + # Group ROIs by class name and extract each group with taxaname for subfolder + classes <- unique(rows$class_name) + + tryCatch({ + for (cls in classes) { + roi_numbers <- rows$roi_number[rows$class_name == cls] + ifcb_extract_pngs( + roi_file = roi_path, + out_folder = png_folder, + ROInumbers = roi_numbers, + taxaname = cls, + verbose = FALSE + ) + } + TRUE + }, error = function(e) { + warning("Failed to export PNGs for ", sample_name, ": ", e$message) + FALSE + }) +} + +#' Bulk export all annotated samples from SQLite to class-organized PNGs +#' +#' Exports every annotated sample in the database to PNG images organized +#' into class subfolders. +#' +#' @param db_path Path to the SQLite database file +#' @param png_folder Base output folder for PNGs +#' @param roi_path_map Named list mapping sample names to \code{.roi} file +#' paths. Samples without an entry are skipped. +#' @param skip_class Character vector of class names to exclude from export +#' (e.g. \code{"unclassified"}). Default \code{NULL} exports all classes. +#' @return Named list with counts: \code{success}, \code{failed}, \code{skipped} +#' @export +#' @examples +#' \dontrun{ +#' db_path <- get_db_path("/data/manual") +#' roi_map <- list("D20230101T120000_IFCB134" = "/data/raw/.../D20230101T120000_IFCB134.roi") +#' result <- export_all_db_to_png(db_path, "/data/png_output", roi_map, +#' skip_class = "unclassified") +#' cat(result$success, "exported,", result$failed, "failed,", result$skipped, "skipped\n") +#' } +export_all_db_to_png <- function(db_path, png_folder, roi_path_map, + skip_class = NULL) { + samples <- list_annotated_samples_db(db_path) + + counts <- list(success = 0L, failed = 0L, skipped = 0L) + + if (length(samples) == 0) { + return(counts) + } + + for (sample_name in samples) { + roi_path <- roi_path_map[[sample_name]] + if (is.null(roi_path) || !file.exists(roi_path)) { + counts$skipped <- counts$skipped + 1L + next + } + + ok <- export_db_to_png(db_path, sample_name, roi_path, png_folder, + skip_class = skip_class) + if (isTRUE(ok)) { + counts$success <- counts$success + 1L + } else { + counts$failed <- counts$failed + 1L + } + } + + counts +} diff --git a/R/sample_loading.R b/R/sample_loading.R index 3d3138e..0f97e9f 100644 --- a/R/sample_loading.R +++ b/R/sample_loading.R @@ -55,6 +55,27 @@ load_from_csv <- function(csv_path) { classifications } +#' Load classifications from SQLite database +#' +#' Reads annotations for a sample from the SQLite database and returns a data +#' frame in the same format as \code{\link{load_from_mat}}. +#' +#' @param db_path Path to the SQLite database file +#' @param sample_name Sample name (e.g., "D20230101T120000_IFCB134") +#' @param roi_dimensions Data frame from \code{\link{read_roi_dimensions}} +#' @return Data frame with columns: file_name, class_name, score, width, height, +#' roi_area. Returns NULL if the sample has no annotations in the database. +#' @export +#' @examples +#' \dontrun{ +#' dims <- read_roi_dimensions("/data/raw/2023/D20230101/D20230101T120000_IFCB134.adc") +#' db_path <- get_db_path("/data/manual") +#' classifications <- load_from_db(db_path, "D20230101T120000_IFCB134", dims) +#' } +load_from_db <- function(db_path, sample_name, roi_dimensions) { + load_annotations_db(db_path, sample_name, roi_dimensions) +} + #' Load classifications from existing MAT annotation file #' #' Reads a MATLAB annotation file (created by ClassiPyR or ifcb-analysis) @@ -83,13 +104,23 @@ load_from_mat <- function(mat_path, sample_name, class2use, roi_dimensions) { # Read classlist from MAT file (column 2 contains class indices) classlist <- ifcb_get_mat_variable(mat_path, variable_name = "classlist") + # Prefer the class list embedded in the .mat file for accurate index mapping + mat_class2use <- tryCatch( + as.character(ifcb_get_mat_variable(mat_path, + variable_name = "class2use_manual")), + error = function(e) NULL + ) + if (!is.null(mat_class2use) && length(mat_class2use) > 0) { + class2use <- mat_class2use + } + # Map class indices to class names roi_numbers <- classlist[, 1] class_indices <- classlist[, 2] - # Get class names from indices (handle 0 or NA as "unclassified") + # Get class names from indices (handle NaN, 0, or NA as "unclassified") class_names <- sapply(class_indices, function(idx) { - if (is.na(idx) || idx < 1 || idx > length(class2use)) { + if (is.na(idx) || is.nan(idx) || idx < 1 || idx > length(class2use)) { return("unclassified") } return(class2use[idx]) diff --git a/R/sample_saving.R b/R/sample_saving.R index ee212b6..0857a87 100644 --- a/R/sample_saving.R +++ b/R/sample_saving.R @@ -3,31 +3,38 @@ #' @importFrom iRfcb ifcb_annotate_samples NULL -#' Save sample annotations to MAT and statistics files +#' Save sample annotations #' -#' Saves the current annotations for a sample, including: -#' - MAT file compatible with ifcb-analysis (requires Python) -#' - Validation statistics CSV files -#' - PNG images organized by class +#' Saves the current annotations for a sample. By default annotations are +#' stored in a local SQLite database (\code{annotations.sqlite} in the database +#' folder). Optionally, a MATLAB-compatible \code{.mat} file can also be +#' written (requires Python + scipy). #' #' @param sample_name Sample name (e.g., "D20230101T120000_IFCB134") #' @param classifications Current classifications data frame #' @param original_classifications Original classifications data frame (for comparison) #' @param changes_log Changes log data frame from \code{\link{create_empty_changes_log}} #' @param temp_png_folder Path to temporary folder with extracted PNG images -#' @param output_folder Output folder path for MAT files +#' @param output_folder Output folder path for MAT files and statistics #' @param png_output_folder PNG output folder path (organized by class) #' @param roi_folder ROI folder path (for ADC file location, used as fallback) #' @param class2use_path Path to class2use file +#' @param class2use Character vector of class names. When NULL (default), loaded +#' from \code{class2use_path}. #' @param annotator Annotator name for statistics #' @param adc_folder Direct path to the ADC folder. When provided, this is used #' instead of constructing the path via \code{\link{get_sample_paths}}. #' This supports non-standard folder structures. +#' @param save_format One of \code{"sqlite"} (default), \code{"mat"}, or +#' \code{"both"}. Controls which backend(s) are written. +#' @param db_folder Path to the database folder for SQLite storage. Defaults to +#' \code{\link{get_default_db_dir}()}. Should be a local filesystem path, +#' not a network drive. #' @return TRUE on success, FALSE on failure #' @export #' @examples #' \dontrun{ -#' # Save annotations for a sample +#' # Save annotations for a sample (default: SQLite) #' success <- save_sample_annotations( #' sample_name = "D20230101T120000_IFCB134", #' classifications = current_classifications, @@ -50,8 +57,11 @@ save_sample_annotations <- function(sample_name, png_output_folder, roi_folder, class2use_path, + class2use = NULL, annotator = "Unknown", - adc_folder = NULL) { + adc_folder = NULL, + save_format = "sqlite", + db_folder = get_default_db_dir()) { if (is.null(sample_name) || is.null(classifications) || is.null(class2use_path)) { return(FALSE) @@ -76,11 +86,10 @@ save_sample_annotations <- function(sample_name, dir.create(png_output_folder, recursive = TRUE) } - # Create temporary PNG folder structure for ifcb_annotate_samples + # Copy images to class subfolders temp_annotate_folder <- tempfile(pattern = "ifcb_annotate_") dir.create(temp_annotate_folder, recursive = TRUE) - # Copy images to class subfolders copy_images_to_class_folders( classifications = classifications, src_folder = file.path(temp_png_folder, sample_name), @@ -88,21 +97,34 @@ save_sample_annotations <- function(sample_name, output_folder = png_output_folder ) - # Find ADC folder: use provided path, or fall back to get_sample_paths() - if (is.null(adc_folder)) { - paths <- get_sample_paths(sample_name, roi_folder) - adc_folder <- paths$adc_folder + # Save to SQLite (fast, no Python needed) + if (save_format %in% c("sqlite", "both")) { + # Load class list if not provided + c2u <- class2use + if (is.null(c2u)) { + c2u <- load_class_list(class2use_path) + } + db_path <- get_db_path(db_folder) + save_annotations_db(db_path, sample_name, classifications, c2u, annotator) } - # Run annotation - save MAT to output folder directly - ifcb_annotate_samples( - png_folder = temp_annotate_folder, - adc_folder = adc_folder, - class2use_file = class2use_path, - output_folder = output_folder, - sample_names = sample_name, - remove_trailing_numbers = FALSE - ) + # Save to .mat (requires Python + scipy) + if (save_format %in% c("mat", "both")) { + # Find ADC folder: use provided path, or fall back to get_sample_paths() + if (is.null(adc_folder)) { + paths <- get_sample_paths(sample_name, roi_folder) + adc_folder <- paths$adc_folder + } + + ifcb_annotate_samples( + png_folder = temp_annotate_folder, + adc_folder = adc_folder, + class2use_file = class2use_path, + output_folder = output_folder, + sample_names = sample_name, + remove_trailing_numbers = FALSE + ) + } # Save statistics save_validation_statistics( diff --git a/R/utils.R b/R/utils.R index 315b297..d8c3717 100644 --- a/R/utils.R +++ b/R/utils.R @@ -11,6 +11,8 @@ #' @importFrom jsonlite fromJSON #' @importFrom reticulate py_available #' @importFrom dplyr filter +#' @importFrom DBI dbConnect dbDisconnect dbGetQuery dbWriteTable dbExecute +#' @importFrom RSQLite SQLite NULL #' Get ClassiPyR configuration directory @@ -33,6 +35,28 @@ get_config_dir <- function() { tools::R_user_dir("ClassiPyR", "config") } +#' Get default database directory +#' +#' Returns the default path for the SQLite annotations database. This is a +#' persistent, local, user-level directory that survives package reinstalls. +#' The database should be stored on a local filesystem, not on a network +#' drive, because SQLite file locking is unreliable over network filesystems. +#' +#' @return Path to the default database directory +#' @export +#' @seealso \code{\link{get_db_path}} for the full database file path +#' @examples +#' # Get the default database directory +#' db_dir <- get_default_db_dir() +#' print(db_dir) +get_default_db_dir <- function() { + # Check if running under R CMD check + if (nzchar(Sys.getenv("_R_CHECK_PACKAGE_NAME_", ""))) { + return(file.path(tempdir(), "ClassiPyR", "db")) + } + tools::R_user_dir("ClassiPyR", "data") +} + #' Get path to settings file #' #' Returns the path to the settings JSON file, creating the configuration @@ -113,8 +137,11 @@ load_file_index <- function() { #' #' @param roi_folder Path to ROI data folder. If NULL, read from saved settings. #' @param csv_folder Path to classification folder (CSV/MAT). If NULL, read from saved settings. -#' @param output_folder Path to output folder for annotations. If NULL, read from saved settings. +#' @param output_folder Path to output folder for MAT annotations. If NULL, read from saved settings. #' @param verbose If TRUE, print progress messages. Default TRUE. +#' @param db_folder Path to the database folder for SQLite annotations. If NULL, +#' read from saved settings; if not found in settings, defaults to +#' \code{\link{get_default_db_dir}()}. #' @return Invisibly returns the file index list, or NULL if roi_folder is invalid. #' @export #' @examples @@ -133,9 +160,11 @@ load_file_index <- function() { #' # Rscript -e 'ClassiPyR::rescan_file_index()' #' } rescan_file_index <- function(roi_folder = NULL, csv_folder = NULL, - output_folder = NULL, verbose = TRUE) { + output_folder = NULL, verbose = TRUE, + db_folder = NULL) { # Read from saved settings if not provided - if (is.null(roi_folder) || is.null(csv_folder) || is.null(output_folder)) { + if (is.null(roi_folder) || is.null(csv_folder) || is.null(output_folder) || + is.null(db_folder)) { settings_path <- get_settings_path() if (file.exists(settings_path)) { saved <- tryCatch( @@ -145,9 +174,15 @@ rescan_file_index <- function(roi_folder = NULL, csv_folder = NULL, if (is.null(roi_folder)) roi_folder <- saved$roi_folder if (is.null(csv_folder)) csv_folder <- saved$csv_folder if (is.null(output_folder)) output_folder <- saved$output_folder + if (is.null(db_folder)) db_folder <- saved$db_folder } } + # Fall back to default db folder if still NULL + if (is.null(db_folder)) { + db_folder <- get_default_db_dir() + } + # Validate ROI folder roi_valid <- !is.null(roi_folder) && length(roi_folder) == 1 && !isTRUE(is.na(roi_folder)) && nzchar(roi_folder) && dir.exists(roi_folder) @@ -219,15 +254,24 @@ rescan_file_index <- function(roi_folder = NULL, csv_folder = NULL, if (verbose) message(" Found ", length(classified), " classified samples") } - # Scan output folder for manual annotations + # Scan output folder for manual annotations (.mat files + SQLite database) annotated <- character() if (output_valid) { if (verbose) message("Scanning output folder: ", output_folder) + + # Scan .mat files output_mat_files <- list.files(output_folder, pattern = "\\.mat$", full.names = FALSE) manual_mat_files <- output_mat_files[!grepl("_class", output_mat_files)] - annotated <- tools::file_path_sans_ext(manual_mat_files) - annotated <- annotated[annotated %in% sample_names] + annotated_mat <- tools::file_path_sans_ext(manual_mat_files) + annotated_mat <- annotated_mat[annotated_mat %in% sample_names] + + # Scan SQLite database + db_path <- get_db_path(db_folder) + annotated_db <- list_annotated_samples_db(db_path) + annotated_db <- annotated_db[annotated_db %in% sample_names] + + annotated <- unique(c(annotated_mat, annotated_db)) if (verbose) message(" Found ", length(annotated), " annotated samples") } diff --git a/README.md b/README.md index 1624124..65a6a1d 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # ClassiPyR ClassiPyR website [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) -[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![R-CMD-check](https://github.com/EuropeanIFCBGroup/ClassiPyR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/EuropeanIFCBGroup/ClassiPyR/actions/workflows/R-CMD-check.yaml) [![codecov](https://codecov.io/gh/EuropeanIFCBGroup/ClassiPyR/branch/main/graph/badge.svg)](https://app.codecov.io/gh/EuropeanIFCBGroup/ClassiPyR) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.18414999.svg)](https://doi.org/10.5281/zenodo.18414999) @@ -18,28 +17,41 @@ A Shiny application for manual (human) image classification and validation of Im - **Dual Mode**: Validate existing classifications or annotate from scratch - **Multiple Formats**: Load from CSV or MATLAB classifier output +- **SQLite Storage**: Annotations stored in a local SQLite database by default - no Python needed - **Efficient Workflow**: Drag-select, batch relabeling, class filtering -- **MATLAB Compatible**: Export for [ifcb-analysis](https://github.com/hsosik/ifcb-analysis) toolbox +- **MATLAB Compatible**: Optional `.mat` export for [ifcb-analysis](https://github.com/hsosik/ifcb-analysis) toolbox - **CNN Training Ready**: Organized PNG output by class - **Measure Tool**: Built-in ruler for image measurements -- **Cross-Platform**: Web-based folder browser works on all platforms +- **Cross-Platform**: Works on all platforms with no external dependencies ## Installation +Install the latest release from GitHub using the `remotes` package: + ```r -install.packages("remotes") -remotes::install_github("EuropeanIFCBGroup/ClassiPyR") +# Install remotes +if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes") + +# Install ClassiPyR +remotes::install_github("EuropeanIFCBGroup/ClassiPyR", + ref = remotes::github_release()) ``` `ClassiPyR` depends on [iRfcb](https://github.com/EuropeanIFCBGroup/iRfcb) for IFCB data handling, which is installed automatically. -### Python Setup +### Python Setup (optional) + +Python is **not required** for the default workflow. Annotations are stored in a local SQLite database that works out of the box. -Python is required for saving annotations as MATLAB .mat files. If you only need to read existing .mat files or work with CSV files, this step is optional. +Python is only needed if you want to export annotations as MATLAB `.mat` files for use with [ifcb-analysis](https://github.com/hsosik/ifcb-analysis). To set up using `iRfcb`: ```r library(iRfcb) + +# Define a path were the venv will be installed venv_path = "/path/to/your/venv" + +# Install the venv ifcb_py_install(venv_path) ``` diff --git a/_pkgdown.yml b/_pkgdown.yml index d36c266..dda7f38 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -49,6 +49,7 @@ reference: - load_from_classifier_mat - load_from_csv - load_from_mat + - load_from_db - create_new_classifications - filter_to_extracted - title: Sample Saving @@ -57,6 +58,21 @@ reference: - save_sample_annotations - save_validation_statistics - copy_images_to_class_folders +- title: Database Backend + desc: SQLite database functions for annotation storage + contents: + - get_default_db_dir + - get_db_path + - save_annotations_db + - load_annotations_db + - list_annotated_samples_db + - update_annotator + - import_mat_to_db + - import_all_mat_to_db + - export_db_to_mat + - export_all_db_to_mat + - export_db_to_png + - export_all_db_to_png - title: File Index Cache desc: Functions for managing the file index cache for faster startup contents: diff --git a/inst/app/global.R b/inst/app/global.R index 32d00af..22e4a26 100644 --- a/inst/app/global.R +++ b/inst/app/global.R @@ -15,6 +15,8 @@ suppressPackageStartupMessages({ library(DT) library(jsonlite) library(reticulate) + library(DBI) + library(RSQLite) }) # Get version from package diff --git a/inst/app/server.R b/inst/app/server.R index 29eb1fc..4c82bcc 100644 --- a/inst/app/server.R +++ b/inst/app/server.R @@ -97,11 +97,13 @@ server <- function(input, output, session) { roi_folder = startup_wd, output_folder = startup_wd, png_output_folder = startup_wd, + db_folder = get_default_db_dir(), use_threshold = TRUE, pixels_per_micron = 3.4, # IFCB default resolution auto_sync = TRUE, # Automatically sync folders on startup class2use_path = NULL, # Path to class2use file for auto-loading - python_venv_path = NULL # NULL = use ./venv in working directory + python_venv_path = NULL, # NULL = use ./venv in working directory + save_format = "sqlite" # "sqlite" (default), "mat", or "both" ) if (file.exists(settings_file)) { @@ -151,10 +153,12 @@ server <- function(input, output, session) { roi_folder = saved_settings$roi_folder, output_folder = saved_settings$output_folder, png_output_folder = saved_settings$png_output_folder, + db_folder = saved_settings$db_folder, use_threshold = saved_settings$use_threshold, pixels_per_micron = saved_settings$pixels_per_micron, auto_sync = saved_settings$auto_sync, - python_venv_path = saved_settings$python_venv_path + python_venv_path = saved_settings$python_venv_path, + save_format = saved_settings$save_format ) # Initialize class dropdown with default class list on startup @@ -214,11 +218,10 @@ server <- function(input, output, session) { title = "Settings", size = "l", easyClose = TRUE, - - fileInput("class2use_file", "Class List File (.mat or .txt)", - accept = c(".mat", ".txt")), - - # Classification Folder (CSV and MAT) + + # ── Folder Paths ────────────────────────────────────────────── + h5("Folder Paths"), + div( style = "display: flex; gap: 5px; align-items: flex-end; margin-bottom: 15px;", div(style = "flex: 1;", @@ -227,8 +230,7 @@ server <- function(input, output, session) { shinyDirButton("browse_csv_folder", "Browse", "Select Classification Folder", class = "btn-outline-secondary", style = "margin-bottom: 15px;") ), - - # ROI Folder + div( style = "display: flex; gap: 5px; align-items: flex-end; margin-bottom: 15px;", div(style = "flex: 1;", @@ -237,18 +239,16 @@ server <- function(input, output, session) { shinyDirButton("browse_roi_folder", "Browse", "Select ROI Data Folder", class = "btn-outline-secondary", style = "margin-bottom: 15px;") ), - - # Output Folder + div( style = "display: flex; gap: 5px; align-items: flex-end; margin-bottom: 15px;", div(style = "flex: 1;", - textInput("cfg_output_folder", "Output Folder (MAT & CSV)", + textInput("cfg_output_folder", "Output Folder (MAT/statistics)", value = config$output_folder, width = "100%")), shinyDirButton("browse_output_folder", "Browse", "Select Output Folder", class = "btn-outline-secondary", style = "margin-bottom: 15px;") ), - - # PNG Output Folder + div( style = "display: flex; gap: 5px; align-items: flex-end; margin-bottom: 15px;", div(style = "flex: 1;", @@ -257,47 +257,105 @@ server <- function(input, output, session) { shinyDirButton("browse_png_folder", "Browse", "Select PNG Output Folder", class = "btn-outline-secondary", style = "margin-bottom: 15px;") ), - - hr(), - - # Sync options + + div( + style = "display: flex; gap: 5px; align-items: flex-end; margin-bottom: 5px;", + div(style = "flex: 1;", + textInput("cfg_db_folder", "Database Folder (SQLite)", + value = config$db_folder, width = "100%")), + shinyDirButton("browse_db_folder", "Browse", "Select Database Folder", + class = "btn-outline-secondary", style = "margin-bottom: 15px;") + ), + tags$small(class = "text-muted", style = "display: block; margin-bottom: 15px;", + "Must be a local drive. SQLite databases are", + tags$a(href = "https://www.sqlite.org/useovernet.html", target = "_blank", + "not safe on network filesystems"), + "due to unreliable file locking."), + checkboxInput("cfg_auto_sync", "Sync folders automatically on startup", value = config$auto_sync), tags$small(class = "text-muted", "When disabled, the app loads from cache on startup. Use the sync button to update manually."), - + hr(), - - # Classifier options - h5("Classifier Options"), + + # ── Class List ──────────────────────────────────────────────── + h5("Class List"), + + fileInput("class2use_file", "Load class list file (.mat or .txt)", + accept = c(".mat", ".txt")), + + div( + style = "display: flex; align-items: center; gap: 10px;", + actionButton("open_class_editor", "Edit Class List", + icon = icon("list"), class = "btn-outline-primary"), + tags$span(class = "text-muted", style = "font-size: 12px;", + textOutput("class_count_text", inline = TRUE)) + ), + + hr(), + + # ── Annotation Storage ──────────────────────────────────────── + h5("Annotation Storage"), + + selectInput("cfg_save_format", "Storage Format", + choices = c( + "SQLite (recommended)" = "sqlite", + "MAT file (MATLAB compatible)" = "mat", + "Both SQLite and MAT" = "both" + ), + selected = config$save_format), + tags$small(class = "text-muted", + "SQLite works out of the box. MAT files require Python and are only needed for ifcb-analysis compatibility."), + + hr(), + + # ── Import / Export ──────────────────────────────────────────── + h5("Import / Export"), + + div( + style = "display: flex; gap: 10px; margin-bottom: 8px;", + actionButton("import_mat_to_db_btn", "Import .mat \u2192 SQLite", + icon = icon("database"), class = "btn-outline-secondary btn-sm"), + actionButton("export_db_to_mat_btn", "Export SQLite \u2192 .mat", + icon = icon("file-export"), class = "btn-outline-secondary btn-sm"), + actionButton("export_db_to_png_btn", "Export SQLite \u2192 PNG", + icon = icon("image"), class = "btn-outline-secondary btn-sm") + ), + tags$small(class = "text-muted", + "Bulk import/export all annotated samples between storage formats.", + "PNG export extracts images into class-name subfolders."), + + div( + style = "margin-top: 8px;", + textInput("cfg_skip_class_png", "Skip class in PNG export", + value = if (!is.null(rv$class2use) && length(rv$class2use) > 0) rv$class2use[1] else "", + width = "250px"), + tags$small(class = "text-muted", + "Images with this class are excluded from PNG export.", + "Pre-filled with the first class in your class list.", + "Leave empty to export all classes.") + ), + + hr(), + + # ── IFCB Options ────────────────────────────────────────────── + h5("IFCB Options"), + checkboxInput("cfg_use_threshold", "Apply classification threshold", value = config$use_threshold), tags$small(class = "text-muted", - "When enabled, classifications below the confidence threshold are marked as 'unclassified'"), - - hr(), - - # Image resolution setting - h5("Image Resolution"), + "Only applies to ifcb-analysis MATLAB classifier output (*_class*.mat).", + "When enabled, classifications below the confidence threshold are marked as 'unclassified'."), + div( - style = "display: flex; gap: 10px; align-items: center;", + style = "display: flex; gap: 10px; align-items: center; margin-top: 10px;", numericInput("cfg_pixels_per_micron", "Pixels per micron", value = config$pixels_per_micron, min = 0.1, max = 20, step = 0.1, width = "150px"), - tags$small(class = "text-muted", "IFCB default: 3.4 px/µm") + tags$small(class = "text-muted", "Scale calibration for the measuring tool. IFCB default: 3.4 px/\u00b5m.") ), - - hr(), - - # Class list editor button - div( - style = "display: flex; align-items: center; gap: 10px;", - actionButton("open_class_editor", "Edit Class List", - icon = icon("list"), class = "btn-outline-primary"), - tags$span(class = "text-muted", style = "font-size: 12px;", - textOutput("class_count_text", inline = TRUE)) - ), - + footer = tagList( modalButton("Cancel"), actionButton("save_settings", "Save Settings", class = "btn-primary") @@ -313,6 +371,8 @@ server <- function(input, output, session) { roots = make_dynamic_roots("cfg_roi_folder"), session = session) shinyDirChoose(input, "browse_output_folder", roots = make_dynamic_roots("cfg_output_folder"), session = session) + shinyDirChoose(input, "browse_db_folder", + roots = make_dynamic_roots("cfg_db_folder"), session = session) shinyDirChoose(input, "browse_png_folder", roots = make_dynamic_roots("cfg_png_output_folder"), session = session) @@ -344,6 +404,15 @@ server <- function(input, output, session) { } }) + observeEvent(input$browse_db_folder, { + if (!is.integer(input$browse_db_folder)) { + folder <- parseDirPath(get_browse_volumes(input$cfg_db_folder), input$browse_db_folder) + if (length(folder) > 0) { + updateTextInput(session, "cfg_db_folder", value = as.character(folder)) + } + } + }) + observeEvent(input$browse_png_folder, { if (!is.integer(input$browse_png_folder)) { folder <- parseDirPath(get_browse_volumes(input$cfg_png_output_folder), input$browse_png_folder) @@ -614,10 +683,12 @@ server <- function(input, output, session) { config$roi_folder <- input$cfg_roi_folder config$output_folder <- input$cfg_output_folder config$png_output_folder <- input$cfg_png_output_folder + config$db_folder <- input$cfg_db_folder config$use_threshold <- input$cfg_use_threshold config$pixels_per_micron <- input$cfg_pixels_per_micron config$auto_sync <- input$cfg_auto_sync - + config$save_format <- input$cfg_save_format + # Persist settings to file for next session # python_venv_path is kept from config (set via run_app() or previous save) persist_settings(list( @@ -625,9 +696,11 @@ server <- function(input, output, session) { roi_folder = input$cfg_roi_folder, output_folder = input$cfg_output_folder, png_output_folder = input$cfg_png_output_folder, + db_folder = input$cfg_db_folder, use_threshold = input$cfg_use_threshold, pixels_per_micron = input$cfg_pixels_per_micron, auto_sync = input$cfg_auto_sync, + save_format = input$cfg_save_format, class2use_path = rv$class2use_path, python_venv_path = config$python_venv_path )) @@ -644,7 +717,105 @@ server <- function(input, output, session) { rescan_trigger(rescan_trigger() + 1) } }) - + + # Import .mat -> SQLite bulk handler + observeEvent(input$import_mat_to_db_btn, { + if (is.null(config$output_folder) || config$output_folder == "") { + showNotification("Output folder is not configured. Set it in Settings first.", + type = "error") + return() + } + db_path <- get_db_path(config$db_folder) + annotator <- if (!is.null(input$annotator_name) && nzchar(input$annotator_name)) { + input$annotator_name + } else { + "imported" + } + + withProgress(message = "Importing .mat files to SQLite...", { + result <- import_all_mat_to_db(config$output_folder, db_path, annotator) + }) + + showNotification( + sprintf("Import complete: %d imported, %d failed, %d skipped (already in DB).", + result$success, result$failed, result$skipped), + type = if (result$failed > 0) "warning" else "message", + duration = 8 + ) + + # Trigger file index rescan to update sample list + if (result$success > 0) { + rescan_trigger(rescan_trigger() + 1) + } + }) + + # Export SQLite -> .mat bulk handler + observeEvent(input$export_db_to_mat_btn, { + if (is.null(config$output_folder) || config$output_folder == "") { + showNotification("Output folder is not configured. Set it in Settings first.", + type = "error") + return() + } + if (!python_available) { + showNotification("Python is not available. Export to .mat requires Python with scipy.", + type = "error") + return() + } + + db_path <- get_db_path(config$db_folder) + + withProgress(message = "Exporting SQLite to .mat files...", { + result <- export_all_db_to_mat(db_path, config$output_folder) + }) + + showNotification( + sprintf("Export complete: %d exported, %d failed.", result$success, result$failed), + type = if (result$failed > 0) "warning" else "message", + duration = 8 + ) + }) + + # Export SQLite -> PNG bulk handler + observeEvent(input$export_db_to_png_btn, { + if (is.null(config$png_output_folder) || config$png_output_folder == "") { + showNotification("PNG Output Folder is not configured. Set it in Settings first.", + type = "error") + return() + } + if (is.null(config$output_folder) || config$output_folder == "") { + showNotification("Output folder is not configured. Set it in Settings first.", + type = "error") + return() + } + + db_path <- get_db_path(config$db_folder) + current_roi_map <- roi_path_map() + + if (length(current_roi_map) == 0) { + showNotification("No ROI file index available. Click Sync first.", + type = "error") + return() + } + + skip <- if (!is.null(input$cfg_skip_class_png) && nzchar(input$cfg_skip_class_png)) { + input$cfg_skip_class_png + } else { + NULL + } + + withProgress(message = "Exporting PNGs from SQLite...", { + result <- export_all_db_to_png(db_path, config$png_output_folder, + current_roi_map, skip_class = skip) + }) + + showNotification( + sprintf("PNG export complete: %d exported, %d failed, %d skipped (ROI not found).", + result$success, result$failed, result$skipped), + type = if (result$failed > 0) "warning" else "message", + duration = 8 + ) + }) + # ============================================================================ # UI Outputs - Warnings and Indicators # ============================================================================ @@ -673,15 +844,17 @@ server <- function(input, output, session) { }) output$python_warning <- renderUI({ - if (!python_available) { + needs_python <- config$save_format %in% c("mat", "both") + if (!python_available && needs_python) { div( class = "alert alert-warning", style = "margin-top: 10px; padding: 8px; font-size: 12px;", "Python not available. Saving .mat files will not work. ", - "This is only required if you use the ", + "Switch to SQLite storage format in Settings, or install Python: ", + "run ifcb_py_install() in R console. ", + "MAT files are only needed for ", tags$a(href = "https://github.com/hsosik/ifcb-analysis", target = "_blank", "ifcb-analysis"), - " MATLAB toolbox (Sosik & Olson, 2007). ", - "Run ifcb_py_install() in R console to enable." + " compatibility." ) } }) @@ -859,7 +1032,7 @@ server <- function(input, output, session) { # Switch from validation mode to annotation mode observeEvent(input$switch_to_annotation, { req(rv$current_sample, rv$has_both_modes) - + sample_name <- rv$current_sample roi_path <- roi_path_map()[[sample_name]] if (is.null(roi_path)) { @@ -867,12 +1040,21 @@ server <- function(input, output, session) { return() } adc_path <- sub("\\.roi$", ".adc", roi_path) + + # Try SQLite first, then .mat + db_path <- get_db_path(config$db_folder) annotation_mat_path <- file.path(config$output_folder, paste0(sample_name, ".mat")) - - if (file.exists(annotation_mat_path)) { + has_db <- sample_name %in% list_annotated_samples_db(db_path) + has_mat <- file.exists(annotation_mat_path) + + if (has_db || has_mat) { roi_dims <- read_roi_dimensions(adc_path) - classifications <- load_from_mat(annotation_mat_path, sample_name, rv$class2use, roi_dims) - + if (has_db) { + classifications <- load_from_db(db_path, sample_name, roi_dims) + } else { + classifications <- load_from_mat(annotation_mat_path, sample_name, rv$class2use, roi_dims) + } + rv$original_classifications <- classifications rv$classifications <- classifications rv$is_annotation_mode <- TRUE @@ -880,7 +1062,7 @@ server <- function(input, output, session) { rv$selected_images <- character() rv$current_page <- 1 rv$changes_log <- create_empty_changes_log() - + # Update class filter dropdown available_classes <- sort(unique(classifications$class_name)) unmatched <- setdiff(available_classes, c(rv$class2use, "unclassified")) @@ -890,7 +1072,7 @@ server <- function(input, output, session) { updateSelectInput(session, "class_filter", choices = c("All" = "all", setNames(available_classes, display_names)), selected = "all") - + showNotification("Switched to Manual annotation mode", type = "message") } else { showNotification("No manual annotation file found", type = "warning") @@ -966,6 +1148,7 @@ server <- function(input, output, session) { roi_folder = config$roi_folder, output_folder = config$output_folder, png_output_folder = config$png_output_folder, + db_folder = config$db_folder, use_threshold = config$use_threshold, class2use_path = persistent_path )) @@ -1374,8 +1557,11 @@ server <- function(input, output, session) { png_output_folder = config$png_output_folder, roi_folder = config$roi_folder, class2use_path = rv$class2use_path, + class2use = rv$class2use, annotator = input$annotator_name, - adc_folder = adc_folder_for_save + adc_folder = adc_folder_for_save, + save_format = config$save_format, + db_folder = config$db_folder ) # Only update annotated samples list if changes were actually saved if (isTRUE(saved)) { @@ -1416,30 +1602,39 @@ server <- function(input, output, session) { tryCatch({ annotation_mat_path <- file.path(config$output_folder, paste0(sample_name, ".mat")) - has_existing_annotation <- file.exists(annotation_mat_path) + db_path <- get_db_path(config$db_folder) + has_db_annotation <- sample_name %in% list_annotated_samples_db(db_path) + has_mat_annotation <- file.exists(annotation_mat_path) + has_existing_annotation <- has_db_annotation || has_mat_annotation has_classification <- has_csv || has_classifier_mat - + # Track if sample has both modes available rv$has_both_modes <- has_existing_annotation && has_classification rv$using_manual_mode <- has_existing_annotation # Default to manual if available - + # Variable to hold mode message for notification (shown after filtering) mode_message <- NULL - + # Priority: Manual annotation > Classification > New annotation + # Within manual annotations: SQLite first (faster), then .mat fallback if (has_existing_annotation) { # ANNOTATION MODE - from existing manual annotation (priority when both exist) if (!file.exists(adc_path)) { showNotification(paste("ADC file not found:", adc_path), type = "error") return(FALSE) } - + roi_dims <- read_roi_dimensions(adc_path) - classifications <- load_from_mat(annotation_mat_path, sample_name, rv$class2use, roi_dims) + + if (has_db_annotation) { + classifications <- load_from_db(db_path, sample_name, roi_dims) + } else { + classifications <- load_from_mat(annotation_mat_path, sample_name, rv$class2use, roi_dims) + } rv$is_annotation_mode <- TRUE - + mode_message <- if (rv$has_both_modes) "Manual mode (switch available)" else "Resumed" - + } else if (has_csv) { # VALIDATION MODE - from CSV classifications <- load_from_csv(csv_path) @@ -2044,63 +2239,54 @@ server <- function(input, output, session) { } tryCatch({ - output_folder <- config$output_folder - stats_folder <- file.path(config$output_folder, "validation_statistics") - png_output_folder <- config$png_output_folder - - if (!dir.exists(output_folder)) dir.create(output_folder, recursive = TRUE) - if (!dir.exists(stats_folder)) dir.create(stats_folder, recursive = TRUE) - if (!dir.exists(png_output_folder)) dir.create(png_output_folder, recursive = TRUE) - - temp_annotate_folder <- tempfile(pattern = "ifcb_annotate_") - dir.create(temp_annotate_folder, recursive = TRUE) - - withProgress(message = "Copying images...", { - copy_images_to_class_folders( - classifications = rv$classifications, - src_folder = file.path(rv$temp_png_folder, rv$current_sample), - temp_folder = temp_annotate_folder, - output_folder = png_output_folder - ) - }) - roi_path <- roi_path_map()[[rv$current_sample]] adc_folder <- if (!is.null(roi_path)) dirname(roi_path) else NULL if (is.null(adc_folder)) { showNotification("Cannot find ROI data folder for this sample", type = "error") return() } - - withProgress(message = "Saving MAT file...", { - result <- ifcb_annotate_samples( - png_folder = temp_annotate_folder, + + save_fmt <- config$save_format + progress_msg <- switch(save_fmt, + sqlite = "Saving to database...", + mat = "Saving MAT file...", + both = "Saving annotations...", + "Saving..." + ) + + withProgress(message = progress_msg, { + result <- save_sample_annotations( + sample_name = rv$current_sample, + classifications = rv$classifications, + original_classifications = rv$original_classifications, + changes_log = rv$changes_log, + temp_png_folder = rv$temp_png_folder, + output_folder = config$output_folder, + png_output_folder = config$png_output_folder, + roi_folder = config$roi_folder, + class2use_path = rv$class2use_path, + class2use = rv$class2use, + annotator = annotator, adc_folder = adc_folder, - class2use_file = rv$class2use_path, - output_folder = output_folder, - sample_names = rv$current_sample, - remove_trailing_numbers = FALSE + save_format = save_fmt, + db_folder = config$db_folder ) }) - - save_validation_statistics( - sample_name = rv$current_sample, - classifications = rv$classifications, - original_classifications = rv$original_classifications, - stats_folder = stats_folder, - annotator = annotator - ) - - unlink(temp_annotate_folder, recursive = TRUE) - + + if (!isTRUE(result)) { + showNotification("Save returned no changes", type = "warning") + return() + } + # Update annotated samples list to reflect new manual annotation current_annotated <- annotated_samples() if (!rv$current_sample %in% current_annotated) { annotated_samples(c(current_annotated, rv$current_sample)) update_current_sample_status(rv$current_sample) } - + showNotification(paste("Saved to", config$output_folder), type = "message") - + }, error = function(e) { showNotification(paste("Error saving:", e$message), type = "error") }) @@ -2384,7 +2570,9 @@ server <- function(input, output, session) { png_output_folder = png_output_folder, roi_folder = roi_folder, class2use_path = class2use_path, - annotator = annotator + annotator = annotator, + save_format = isolate(config$save_format), + db_folder = isolate(config$db_folder) ) }, error = function(e) { message("Failed to auto-save ", sample_name, " on session end: ", e$message) diff --git a/man/export_all_db_to_mat.Rd b/man/export_all_db_to_mat.Rd new file mode 100644 index 0000000..4209e06 --- /dev/null +++ b/man/export_all_db_to_mat.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{export_all_db_to_mat} +\alias{export_all_db_to_mat} +\title{Bulk export all annotated samples from SQLite to .mat files} +\usage{ +export_all_db_to_mat(db_path, output_folder) +} +\arguments{ +\item{db_path}{Path to the SQLite database file} + +\item{output_folder}{Folder where .mat files will be written} +} +\value{ +Named list with counts: \code{success}, \code{failed} +} +\description{ +Exports every sample in the database to a MATLAB-compatible annotation file. +Requires Python with scipy. +} +\examples{ +\dontrun{ +db_path <- get_db_path("/data/manual") +result <- export_all_db_to_mat(db_path, "/data/manual") +cat(result$success, "exported,", result$failed, "failed\n") +} +} diff --git a/man/export_all_db_to_png.Rd b/man/export_all_db_to_png.Rd new file mode 100644 index 0000000..633ea75 --- /dev/null +++ b/man/export_all_db_to_png.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{export_all_db_to_png} +\alias{export_all_db_to_png} +\title{Bulk export all annotated samples from SQLite to class-organized PNGs} +\usage{ +export_all_db_to_png(db_path, png_folder, roi_path_map, skip_class = NULL) +} +\arguments{ +\item{db_path}{Path to the SQLite database file} + +\item{png_folder}{Base output folder for PNGs} + +\item{roi_path_map}{Named list mapping sample names to \code{.roi} file +paths. Samples without an entry are skipped.} + +\item{skip_class}{Character vector of class names to exclude from export +(e.g. \code{"unclassified"}). Default \code{NULL} exports all classes.} +} +\value{ +Named list with counts: \code{success}, \code{failed}, \code{skipped} +} +\description{ +Exports every annotated sample in the database to PNG images organized +into class subfolders. +} +\examples{ +\dontrun{ +db_path <- get_db_path("/data/manual") +roi_map <- list("D20230101T120000_IFCB134" = "/data/raw/.../D20230101T120000_IFCB134.roi") +result <- export_all_db_to_png(db_path, "/data/png_output", roi_map, + skip_class = "unclassified") +cat(result$success, "exported,", result$failed, "failed,", result$skipped, "skipped\n") +} +} diff --git a/man/export_db_to_mat.Rd b/man/export_db_to_mat.Rd new file mode 100644 index 0000000..64f7f04 --- /dev/null +++ b/man/export_db_to_mat.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{export_db_to_mat} +\alias{export_db_to_mat} +\title{Export annotations from SQLite to a .mat file} +\usage{ +export_db_to_mat(db_path, sample_name, output_folder) +} +\arguments{ +\item{db_path}{Path to the SQLite database file} + +\item{sample_name}{Sample name} + +\item{output_folder}{Folder where the .mat file will be written} +} +\value{ +TRUE on success, FALSE on failure +} +\description{ +Reads annotations for a single sample from the database and writes a +MATLAB-compatible annotation file using \code{iRfcb::ifcb_create_manual_file}. +Requires Python with scipy. +} +\examples{ +\dontrun{ +db_path <- get_db_path("/data/manual") +export_db_to_mat(db_path, "D20230101T120000_IFCB134", "/data/manual") +} +} diff --git a/man/export_db_to_png.Rd b/man/export_db_to_png.Rd new file mode 100644 index 0000000..5696220 --- /dev/null +++ b/man/export_db_to_png.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{export_db_to_png} +\alias{export_db_to_png} +\title{Export annotated images from SQLite to class-organized PNG folders} +\usage{ +export_db_to_png(db_path, sample_name, roi_path, png_folder, skip_class = NULL) +} +\arguments{ +\item{db_path}{Path to the SQLite database file} + +\item{sample_name}{Sample name} + +\item{roi_path}{Path to the \code{.roi} file for this sample} + +\item{png_folder}{Base output folder. Images are written to +\code{png_folder//}} + +\item{skip_class}{Character vector of class names to exclude from export +(e.g. \code{"unclassified"}). Default \code{NULL} exports all classes.} +} +\value{ +TRUE on success, FALSE on failure +} +\description{ +Reads annotations for a single sample from the database and extracts PNG +images from the ROI file, placing each image into a subfolder named after +its assigned class. +} +\examples{ +\dontrun{ +db_path <- get_db_path("/data/manual") +export_db_to_png(db_path, "D20230101T120000_IFCB134", + "/data/raw/2023/D20230101/D20230101T120000_IFCB134.roi", + "/data/png_output", + skip_class = "unclassified") +} +} diff --git a/man/figures/settings-dialog.png b/man/figures/settings-dialog.png index ca617f5..b9db08e 100644 Binary files a/man/figures/settings-dialog.png and b/man/figures/settings-dialog.png differ diff --git a/man/get_db_path.Rd b/man/get_db_path.Rd new file mode 100644 index 0000000..0ddedd3 --- /dev/null +++ b/man/get_db_path.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{get_db_path} +\alias{get_db_path} +\title{Get path to the annotations SQLite database} +\usage{ +get_db_path(db_folder) +} +\arguments{ +\item{db_folder}{Path to the database directory. Defaults to +\code{\link{get_default_db_dir}()}, a persistent local directory.} +} +\value{ +Path to the SQLite database file +} +\description{ +Returns the path to \code{annotations.sqlite} in the given database +directory. The database directory should be on a local filesystem, not a +network drive, because +\href{https://www.sqlite.org/useovernet.html}{SQLite file locking is +unreliable over network filesystems}. +} +\examples{ +# Use the default local database directory +get_db_path(get_default_db_dir()) + +# Or specify a custom directory +get_db_path("/data/local_db") +} +\seealso{ +\code{\link{get_default_db_dir}} for the default database directory +} diff --git a/man/get_default_db_dir.Rd b/man/get_default_db_dir.Rd new file mode 100644 index 0000000..6e954de --- /dev/null +++ b/man/get_default_db_dir.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_default_db_dir} +\alias{get_default_db_dir} +\title{Get default database directory} +\usage{ +get_default_db_dir() +} +\value{ +Path to the default database directory +} +\description{ +Returns the default path for the SQLite annotations database. This is a +persistent, local, user-level directory that survives package reinstalls. +The database should be stored on a local filesystem, not on a network +drive, because SQLite file locking is unreliable over network filesystems. +} +\examples{ +# Get the default database directory +db_dir <- get_default_db_dir() +print(db_dir) +} +\seealso{ +\code{\link{get_db_path}} for the full database file path +} diff --git a/man/import_all_mat_to_db.Rd b/man/import_all_mat_to_db.Rd new file mode 100644 index 0000000..3da50dd --- /dev/null +++ b/man/import_all_mat_to_db.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{import_all_mat_to_db} +\alias{import_all_mat_to_db} +\title{Bulk import .mat annotation files into the SQLite database} +\usage{ +import_all_mat_to_db(mat_folder, db_path, annotator = "imported") +} +\arguments{ +\item{mat_folder}{Folder containing .mat annotation files} + +\item{db_path}{Path to the SQLite database file} + +\item{annotator}{Annotator name (defaults to \code{"imported"})} +} +\value{ +Named list with counts: \code{success}, \code{failed}, \code{skipped} +} +\description{ +Scans a folder for \code{.mat} annotation files (excluding classifier output +files matching \code{*_class*.mat}) and imports each into the database. Each +file's embedded \code{class2use_manual} is used for class-name mapping. +} +\examples{ +\dontrun{ +db_path <- get_db_path("/data/manual") +result <- import_all_mat_to_db("/data/manual", db_path) +cat(result$success, "imported,", result$failed, "failed,", result$skipped, "skipped\n") +} +} diff --git a/man/import_mat_to_db.Rd b/man/import_mat_to_db.Rd new file mode 100644 index 0000000..f9db02d --- /dev/null +++ b/man/import_mat_to_db.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{import_mat_to_db} +\alias{import_mat_to_db} +\title{Import a .mat annotation file into the SQLite database} +\usage{ +import_mat_to_db(mat_path, db_path, sample_name, annotator = "imported") +} +\arguments{ +\item{mat_path}{Path to the .mat annotation file} + +\item{db_path}{Path to the SQLite database file} + +\item{sample_name}{Sample name} + +\item{annotator}{Annotator name (defaults to \code{"imported"})} +} +\value{ +TRUE on success, FALSE on failure +} +\description{ +Reads an existing .mat annotation file and writes its data into the SQLite +database. The class list (\code{class2use_manual}) and classlist indices are +read directly from the .mat file to ensure a faithful import. ROIs with NaN +indices (not yet reviewed) are stored with \code{is_manual = 0}. +} +\examples{ +\dontrun{ +import_mat_to_db( + mat_path = "/data/manual/D20230101T120000_IFCB134.mat", + db_path = get_db_path("/data/manual"), + sample_name = "D20230101T120000_IFCB134" +) +} +} diff --git a/man/init_db_schema.Rd b/man/init_db_schema.Rd new file mode 100644 index 0000000..efc8a94 --- /dev/null +++ b/man/init_db_schema.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{init_db_schema} +\alias{init_db_schema} +\title{Initialize the annotations database schema} +\usage{ +init_db_schema(con) +} +\arguments{ +\item{con}{A DBI connection object} +} +\value{ +NULL (called for side effects) +} +\description{ +Creates the \code{annotations} and \code{class_lists} tables if they do not +already exist. +} +\keyword{internal} diff --git a/man/list_annotated_samples_db.Rd b/man/list_annotated_samples_db.Rd new file mode 100644 index 0000000..c007624 --- /dev/null +++ b/man/list_annotated_samples_db.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{list_annotated_samples_db} +\alias{list_annotated_samples_db} +\title{List samples with annotations in the database} +\usage{ +list_annotated_samples_db(db_path) +} +\arguments{ +\item{db_path}{Path to the SQLite database file} +} +\value{ +Character vector of sample names that have annotations +} +\description{ +List samples with annotations in the database +} +\examples{ +\dontrun{ +db_path <- get_db_path("/data/manual") +samples <- list_annotated_samples_db(db_path) +} +} diff --git a/man/load_annotations_db.Rd b/man/load_annotations_db.Rd new file mode 100644 index 0000000..62eea24 --- /dev/null +++ b/man/load_annotations_db.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{load_annotations_db} +\alias{load_annotations_db} +\title{Load annotations from the SQLite database} +\usage{ +load_annotations_db(db_path, sample_name, roi_dimensions) +} +\arguments{ +\item{db_path}{Path to the SQLite database file} + +\item{sample_name}{Sample name} + +\item{roi_dimensions}{Data frame from \code{\link{read_roi_dimensions}} with +columns \code{roi_number}, \code{width}, \code{height}, \code{area}} +} +\value{ +Data frame with columns: file_name, class_name, score, width, height, + roi_area. Returns NULL if the sample has no annotations. +} +\description{ +Reads annotations for a single sample and returns a data frame in the same +format as \code{\link{load_from_mat}}. +} +\examples{ +\dontrun{ +dims <- read_roi_dimensions("/data/raw/2023/D20230101/D20230101T120000_IFCB134.adc") +db_path <- get_db_path("/data/manual") +classifications <- load_annotations_db(db_path, "D20230101T120000_IFCB134", dims) +} +} diff --git a/man/load_from_db.Rd b/man/load_from_db.Rd new file mode 100644 index 0000000..50ffe55 --- /dev/null +++ b/man/load_from_db.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sample_loading.R +\name{load_from_db} +\alias{load_from_db} +\title{Load classifications from SQLite database} +\usage{ +load_from_db(db_path, sample_name, roi_dimensions) +} +\arguments{ +\item{db_path}{Path to the SQLite database file} + +\item{sample_name}{Sample name (e.g., "D20230101T120000_IFCB134")} + +\item{roi_dimensions}{Data frame from \code{\link{read_roi_dimensions}}} +} +\value{ +Data frame with columns: file_name, class_name, score, width, height, + roi_area. Returns NULL if the sample has no annotations in the database. +} +\description{ +Reads annotations for a sample from the SQLite database and returns a data +frame in the same format as \code{\link{load_from_mat}}. +} +\examples{ +\dontrun{ +dims <- read_roi_dimensions("/data/raw/2023/D20230101/D20230101T120000_IFCB134.adc") +db_path <- get_db_path("/data/manual") +classifications <- load_from_db(db_path, "D20230101T120000_IFCB134", dims) +} +} diff --git a/man/rescan_file_index.Rd b/man/rescan_file_index.Rd index 8b2018d..30876d0 100644 --- a/man/rescan_file_index.Rd +++ b/man/rescan_file_index.Rd @@ -8,7 +8,8 @@ rescan_file_index( roi_folder = NULL, csv_folder = NULL, output_folder = NULL, - verbose = TRUE + verbose = TRUE, + db_folder = NULL ) } \arguments{ @@ -16,9 +17,13 @@ rescan_file_index( \item{csv_folder}{Path to classification folder (CSV/MAT). If NULL, read from saved settings.} -\item{output_folder}{Path to output folder for annotations. If NULL, read from saved settings.} +\item{output_folder}{Path to output folder for MAT annotations. If NULL, read from saved settings.} \item{verbose}{If TRUE, print progress messages. Default TRUE.} + +\item{db_folder}{Path to the database folder for SQLite annotations. If NULL, +read from saved settings; if not found in settings, defaults to +\code{\link{get_default_db_dir}()}.} } \value{ Invisibly returns the file index list, or NULL if roi_folder is invalid. diff --git a/man/save_annotations_db.Rd b/man/save_annotations_db.Rd new file mode 100644 index 0000000..78688ca --- /dev/null +++ b/man/save_annotations_db.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{save_annotations_db} +\alias{save_annotations_db} +\title{Save annotations to the SQLite database} +\usage{ +save_annotations_db( + db_path, + sample_name, + classifications, + class2use, + annotator = "Unknown", + is_manual = NULL +) +} +\arguments{ +\item{db_path}{Path to the SQLite database file} + +\item{sample_name}{Sample name (e.g., \code{"D20230101T120000_IFCB134"})} + +\item{classifications}{Data frame with at least \code{file_name} and +\code{class_name} columns} + +\item{class2use}{Character vector of class names (preserves index order for +.mat export)} + +\item{annotator}{Annotator name} + +\item{is_manual}{Integer vector of 0/1 flags indicating whether each ROI was +manually reviewed (1) or not yet reviewed (0, corresponding to NaN in .mat +files). If \code{NULL} (the default), all ROIs are treated as reviewed.} +} +\value{ +TRUE on success, FALSE on failure +} +\description{ +Writes (or replaces) annotations for a single sample. The existing rows for +the sample are deleted first so that re-saving acts as an upsert. +} +\examples{ +\dontrun{ +db_path <- get_db_path("/data/manual") +save_annotations_db(db_path, "D20230101T120000_IFCB134", + classifications, class2use, "Jane") +} +} diff --git a/man/save_sample_annotations.Rd b/man/save_sample_annotations.Rd index ac00201..24bb82f 100644 --- a/man/save_sample_annotations.Rd +++ b/man/save_sample_annotations.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/sample_saving.R \name{save_sample_annotations} \alias{save_sample_annotations} -\title{Save sample annotations to MAT and statistics files} +\title{Save sample annotations} \usage{ save_sample_annotations( sample_name, @@ -14,8 +14,11 @@ save_sample_annotations( png_output_folder, roi_folder, class2use_path, + class2use = NULL, annotator = "Unknown", - adc_folder = NULL + adc_folder = NULL, + save_format = "sqlite", + db_folder = get_default_db_dir() ) } \arguments{ @@ -29,7 +32,7 @@ save_sample_annotations( \item{temp_png_folder}{Path to temporary folder with extracted PNG images} -\item{output_folder}{Output folder path for MAT files} +\item{output_folder}{Output folder path for MAT files and statistics} \item{png_output_folder}{PNG output folder path (organized by class)} @@ -37,24 +40,34 @@ save_sample_annotations( \item{class2use_path}{Path to class2use file} +\item{class2use}{Character vector of class names. When NULL (default), loaded +from \code{class2use_path}.} + \item{annotator}{Annotator name for statistics} \item{adc_folder}{Direct path to the ADC folder. When provided, this is used instead of constructing the path via \code{\link{get_sample_paths}}. This supports non-standard folder structures.} + +\item{save_format}{One of \code{"sqlite"} (default), \code{"mat"}, or +\code{"both"}. Controls which backend(s) are written.} + +\item{db_folder}{Path to the database folder for SQLite storage. Defaults to +\code{\link{get_default_db_dir}()}. Should be a local filesystem path, +not a network drive.} } \value{ TRUE on success, FALSE on failure } \description{ -Saves the current annotations for a sample, including: -- MAT file compatible with ifcb-analysis (requires Python) -- Validation statistics CSV files -- PNG images organized by class +Saves the current annotations for a sample. By default annotations are +stored in a local SQLite database (\code{annotations.sqlite} in the database +folder). Optionally, a MATLAB-compatible \code{.mat} file can also be +written (requires Python + scipy). } \examples{ \dontrun{ -# Save annotations for a sample +# Save annotations for a sample (default: SQLite) success <- save_sample_annotations( sample_name = "D20230101T120000_IFCB134", classifications = current_classifications, diff --git a/man/update_annotator.Rd b/man/update_annotator.Rd new file mode 100644 index 0000000..f40757b --- /dev/null +++ b/man/update_annotator.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/database.R +\name{update_annotator} +\alias{update_annotator} +\title{Update the annotator name for one or more samples} +\usage{ +update_annotator(db_path, sample_names, annotator) +} +\arguments{ +\item{db_path}{Path to the SQLite database file} + +\item{sample_names}{Character vector of sample names to update} + +\item{annotator}{New annotator name} +} +\value{ +Named integer vector with the number of rows updated per sample. + Samples not found in the database are included with a count of 0. +} +\description{ +Changes the annotator field for all annotations belonging to the specified +sample(s). This is useful for correcting the annotator after bulk imports +or when transferring ownership of annotations. +} +\examples{ +\dontrun{ +db_path <- get_db_path("/data/manual") + +# Update a single sample +update_annotator(db_path, "D20230101T120000_IFCB134", "Jane") + +# Update multiple samples at once +update_annotator(db_path, + c("D20230101T120000_IFCB134", "D20230202T080000_IFCB134"), + "Jane") + +# Update all annotated samples +all_samples <- list_annotated_samples_db(db_path) +update_annotator(db_path, all_samples, "Jane") +} +} diff --git a/tests/testthat/test-database.R b/tests/testthat/test-database.R new file mode 100644 index 0000000..47bee39 --- /dev/null +++ b/tests/testthat/test-database.R @@ -0,0 +1,1082 @@ +# Tests for SQLite database backend + +library(testthat) + +test_that("get_db_path returns correct path", { + expect_equal( + get_db_path("/data/local_db"), + file.path("/data/local_db", "annotations.sqlite") + ) +}) + +test_that("save_annotations_db creates database with correct schema", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + classifications <- data.frame( + file_name = c("D20230101T120000_IFCB134_00001.png", + "D20230101T120000_IFCB134_00002.png"), + class_name = c("Diatom", "Ciliate"), + stringsAsFactors = FALSE + ) + class2use <- c("unclassified", "Diatom", "Ciliate", "Dinoflagellate") + + result <- save_annotations_db(db_path, "D20230101T120000_IFCB134", + classifications, class2use, "TestUser") + + expect_true(result) + expect_true(file.exists(db_path)) + + # Verify schema + con <- DBI::dbConnect(RSQLite::SQLite(), db_path) + on.exit(DBI::dbDisconnect(con)) + + tables <- DBI::dbGetQuery(con, "SELECT name FROM sqlite_master WHERE type='table'") + expect_true("annotations" %in% tables$name) + expect_true("class_lists" %in% tables$name) + + # Verify annotations data + annotations <- DBI::dbGetQuery(con, "SELECT * FROM annotations ORDER BY roi_number") + expect_equal(nrow(annotations), 2) + expect_equal(annotations$sample_name, rep("D20230101T120000_IFCB134", 2)) + expect_equal(annotations$roi_number, c(1L, 2L)) + expect_equal(annotations$class_name, c("Diatom", "Ciliate")) + expect_equal(annotations$annotator, rep("TestUser", 2)) + + # Verify class list data + class_list <- DBI::dbGetQuery(con, "SELECT * FROM class_lists ORDER BY class_index") + expect_equal(nrow(class_list), length(class2use)) + expect_equal(class_list$class_name, class2use) + expect_equal(class_list$class_index, seq_along(class2use)) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("save_annotations_db returns FALSE for empty classifications", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + result <- save_annotations_db(db_path, "sample", + data.frame(file_name = character(), + class_name = character()), + c("unclassified"), "TestUser") + expect_false(result) + + result2 <- save_annotations_db(db_path, "sample", NULL, c("unclassified"), "TestUser") + expect_false(result2) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("save_annotations_db upserts (re-saving replaces data)", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + sample_name <- "D20230101T120000_IFCB134" + class2use <- c("unclassified", "Diatom", "Ciliate") + + # First save + classifications_v1 <- data.frame( + file_name = paste0(sample_name, "_00001.png"), + class_name = "Diatom", + stringsAsFactors = FALSE + ) + save_annotations_db(db_path, sample_name, classifications_v1, class2use, "User1") + + # Second save with different data + classifications_v2 <- data.frame( + file_name = paste0(sample_name, "_00001.png"), + class_name = "Ciliate", + stringsAsFactors = FALSE + ) + save_annotations_db(db_path, sample_name, classifications_v2, class2use, "User2") + + # Verify only latest version exists + con <- DBI::dbConnect(RSQLite::SQLite(), db_path) + on.exit(DBI::dbDisconnect(con)) + + annotations <- DBI::dbGetQuery(con, + "SELECT * FROM annotations WHERE sample_name = ?", + params = list(sample_name)) + expect_equal(nrow(annotations), 1) + expect_equal(annotations$class_name, "Ciliate") + expect_equal(annotations$annotator, "User2") + + unlink(db_dir, recursive = TRUE) +}) + +test_that("load_annotations_db returns correct data frame format", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + sample_name <- "D20230101T120000_IFCB134" + class2use <- c("unclassified", "Diatom", "Ciliate") + + classifications <- data.frame( + file_name = c(paste0(sample_name, "_00001.png"), + paste0(sample_name, "_00002.png"), + paste0(sample_name, "_00003.png")), + class_name = c("Diatom", "Ciliate", "Diatom"), + stringsAsFactors = FALSE + ) + save_annotations_db(db_path, sample_name, classifications, class2use, "TestUser") + + # Load with ROI dimensions + roi_dims <- data.frame( + roi_number = 1:3, + width = c(100, 150, 80), + height = c(80, 100, 60), + area = c(8000, 15000, 4800) + ) + + result <- load_annotations_db(db_path, sample_name, roi_dims) + + expect_s3_class(result, "data.frame") + expect_equal(nrow(result), 3) + expect_true(all(c("file_name", "class_name", "score", "width", "height", "roi_area") %in% names(result))) + + # Should be sorted by area descending + expect_equal(result$roi_area, c(15000, 8000, 4800)) + expect_equal(result$class_name, c("Ciliate", "Diatom", "Diatom")) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("load_annotations_db returns NULL for missing sample", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + roi_dims <- data.frame( + roi_number = 1:3, width = rep(100, 3), + height = rep(80, 3), area = rep(8000, 3) + ) + + # Non-existent database + result <- load_annotations_db(db_path, "nonexistent_sample", roi_dims) + expect_null(result) + + # Existing database but missing sample + save_annotations_db(db_path, "other_sample", + data.frame(file_name = "other_sample_00001.png", + class_name = "Diatom", + stringsAsFactors = FALSE), + c("Diatom"), "test") + + result2 <- load_annotations_db(db_path, "nonexistent_sample", roi_dims) + expect_null(result2) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("list_annotated_samples_db returns correct sample names", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + # Empty / non-existent database + expect_equal(list_annotated_samples_db(db_path), character()) + + # Add two samples + class2use <- c("unclassified", "Diatom") + save_annotations_db(db_path, "sample_A", + data.frame(file_name = "sample_A_00001.png", + class_name = "Diatom", + stringsAsFactors = FALSE), + class2use, "test") + save_annotations_db(db_path, "sample_B", + data.frame(file_name = "sample_B_00001.png", + class_name = "Diatom", + stringsAsFactors = FALSE), + class2use, "test") + + samples <- list_annotated_samples_db(db_path) + expect_equal(sort(samples), c("sample_A", "sample_B")) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("round-trip: save then load returns identical data", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + sample_name <- "D20230101T120000_IFCB134" + class2use <- c("unclassified", "Diatom", "Ciliate", "Dinoflagellate") + + classifications <- data.frame( + file_name = sprintf("%s_%05d.png", sample_name, 1:5), + class_name = c("Diatom", "Ciliate", "Dinoflagellate", "Diatom", "unclassified"), + stringsAsFactors = FALSE + ) + + roi_dims <- data.frame( + roi_number = 1:5, + width = c(100, 150, 80, 200, 120), + height = c(80, 100, 60, 150, 90), + area = c(8000, 15000, 4800, 30000, 10800) + ) + + save_annotations_db(db_path, sample_name, classifications, class2use, "RoundTrip") + + loaded <- load_annotations_db(db_path, sample_name, roi_dims) + + # The loaded result is sorted by area descending + expected <- classifications + expected$score <- NA_real_ + expected$width <- roi_dims$width + expected$height <- roi_dims$height + expected$roi_area <- roi_dims$area + expected <- expected[order(-expected$roi_area), ] + rownames(expected) <- NULL + rownames(loaded) <- NULL + + expect_equal(loaded$file_name, expected$file_name) + expect_equal(loaded$class_name, expected$class_name) + expect_equal(loaded$width, expected$width) + expect_equal(loaded$height, expected$height) + expect_equal(loaded$roi_area, expected$roi_area) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("load_from_db delegates to load_annotations_db", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + sample_name <- "D20230101T120000_IFCB134" + class2use <- c("unclassified", "Diatom") + + classifications <- data.frame( + file_name = paste0(sample_name, "_00001.png"), + class_name = "Diatom", + stringsAsFactors = FALSE + ) + + roi_dims <- data.frame( + roi_number = 1L, width = 100, height = 80, area = 8000 + ) + + save_annotations_db(db_path, sample_name, classifications, class2use, "test") + + result <- load_from_db(db_path, sample_name, roi_dims) + expect_s3_class(result, "data.frame") + expect_equal(nrow(result), 1) + expect_equal(result$class_name, "Diatom") + + unlink(db_dir, recursive = TRUE) +}) + +test_that("update_annotator changes annotator for a single sample", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + sample_name <- "D20230101T120000_IFCB134" + class2use <- c("unclassified", "Diatom") + + classifications <- data.frame( + file_name = sprintf("%s_%05d.png", sample_name, 1:3), + class_name = c("Diatom", "Diatom", "unclassified"), + stringsAsFactors = FALSE + ) + save_annotations_db(db_path, sample_name, classifications, class2use, "OldUser") + + counts <- update_annotator(db_path, sample_name, "NewUser") + expect_equal(counts, c("D20230101T120000_IFCB134" = 3L)) + + # Verify in DB + con <- DBI::dbConnect(RSQLite::SQLite(), db_path) + on.exit(DBI::dbDisconnect(con)) + rows <- DBI::dbGetQuery(con, + "SELECT DISTINCT annotator FROM annotations WHERE sample_name = ?", + params = list(sample_name)) + expect_equal(rows$annotator, "NewUser") + + unlink(db_dir, recursive = TRUE) +}) + +test_that("update_annotator changes multiple samples", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + class2use <- c("unclassified", "Diatom") + + save_annotations_db(db_path, "sample_A", + data.frame(file_name = "sample_A_00001.png", + class_name = "Diatom", + stringsAsFactors = FALSE), + class2use, "User1") + save_annotations_db(db_path, "sample_B", + data.frame(file_name = c("sample_B_00001.png", "sample_B_00002.png"), + class_name = c("Diatom", "Diatom"), + stringsAsFactors = FALSE), + class2use, "User2") + + counts <- update_annotator(db_path, c("sample_A", "sample_B"), "SharedUser") + expect_equal(counts, c(sample_A = 1L, sample_B = 2L)) + + # Verify both updated + con <- DBI::dbConnect(RSQLite::SQLite(), db_path) + on.exit(DBI::dbDisconnect(con)) + rows <- DBI::dbGetQuery(con, "SELECT DISTINCT annotator FROM annotations") + expect_equal(rows$annotator, "SharedUser") + + unlink(db_dir, recursive = TRUE) +}) + +test_that("update_annotator returns 0 for non-existent sample", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + save_annotations_db(db_path, "existing", + data.frame(file_name = "existing_00001.png", + class_name = "Diatom", + stringsAsFactors = FALSE), + c("Diatom"), "test") + + counts <- update_annotator(db_path, "nonexistent", "NewUser") + expect_equal(counts, c(nonexistent = 0L)) + + # Mix of existing and non-existing + counts2 <- update_annotator(db_path, c("existing", "nonexistent"), "NewUser") + expect_equal(counts2, c(existing = 1L, nonexistent = 0L)) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("update_annotator validates inputs", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + save_annotations_db(db_path, "sample", + data.frame(file_name = "sample_00001.png", + class_name = "Diatom", + stringsAsFactors = FALSE), + c("Diatom"), "test") + + # Missing database + expect_error(update_annotator("/nonexistent/db.sqlite", "sample", "X"), + "Database not found") + + # Invalid annotator + expect_error(update_annotator(db_path, "sample", NA_character_), + "annotator must be") + expect_error(update_annotator(db_path, "sample", c("A", "B")), + "annotator must be") + + # Empty sample_names returns empty vector + counts <- update_annotator(db_path, character(0), "X") + expect_length(counts, 0) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("import_mat_to_db migrates data correctly", { + skip_if_not_installed("iRfcb") + skip_if_not(reticulate::py_available(), "Python not available") + skip_if_not(reticulate::py_module_available("scipy"), "scipy not available") + + sample_name <- "D20220522T000439_IFCB134" + + # Check if there's a test annotation mat file + output_test <- testthat::test_path("test_data", "manual") + test_mat <- file.path(output_test, paste0(sample_name, ".mat")) + skip_if_not(file.exists(test_mat), "No test MAT annotation file for migration test") + + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + result <- import_mat_to_db(test_mat, db_path, sample_name, "migrated") + expect_true(result) + + # Verify data was imported + samples <- list_annotated_samples_db(db_path) + expect_true(sample_name %in% samples) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("import_mat_to_db returns FALSE for missing file", { + result <- import_mat_to_db( + "/nonexistent/file.mat", + tempfile(fileext = ".sqlite"), + "sample", "test" + ) + expect_false(result) +}) + +test_that("export_db_to_mat creates valid .mat file", { + skip_if_not_installed("iRfcb") + skip_if_not(reticulate::py_available(), "Python not available") + skip_if_not(reticulate::py_module_available("scipy"), "scipy not available") + + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + sample_name <- "D20230101T120000_IFCB134" + class2use <- c("unclassified", "Diatom", "Ciliate", "Dinoflagellate") + + classifications <- data.frame( + file_name = sprintf("%s_%05d.png", sample_name, 1:4), + class_name = c("Diatom", "Ciliate", "Diatom", "Dinoflagellate"), + stringsAsFactors = FALSE + ) + save_annotations_db(db_path, sample_name, classifications, class2use, "TestUser") + + mat_dir <- tempfile("mat_") + dir.create(mat_dir) + + result <- export_db_to_mat(db_path, sample_name, mat_dir) + expect_true(result) + + mat_path <- file.path(mat_dir, paste0(sample_name, ".mat")) + expect_true(file.exists(mat_path)) + + # Verify contents via ifcb_get_mat_variable + classlist <- iRfcb::ifcb_get_mat_variable(mat_path, variable_name = "classlist") + expect_equal(nrow(classlist), 4) + # class indices: Diatom=2, Ciliate=3, Diatom=2, Dinoflagellate=4 + expect_equal(classlist[, 2], c(2, 3, 2, 4)) + + unlink(c(db_dir, mat_dir), recursive = TRUE) +}) + +test_that("export_db_to_mat returns FALSE for missing sample", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + # Create DB with one sample + save_annotations_db(db_path, "existing_sample", + data.frame(file_name = "existing_sample_00001.png", + class_name = "Diatom", + stringsAsFactors = FALSE), + c("unclassified", "Diatom"), "test") + + result <- export_db_to_mat(db_path, "nonexistent_sample", db_dir) + expect_false(result) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("export_db_to_mat returns FALSE for non-existent database", { + result <- export_db_to_mat("/nonexistent/db.sqlite", "sample", tempdir()) + expect_false(result) +}) + +test_that("import_all_mat_to_db imports multiple files and returns correct counts", { + skip_if_not_installed("iRfcb") + skip_if_not(reticulate::py_available(), "Python not available") + skip_if_not(reticulate::py_module_available("scipy"), "scipy not available") + + mat_dir <- tempfile("mat_") + dir.create(mat_dir) + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + class2use <- c("unclassified", "Diatom", "Ciliate") + + # Create two .mat files using ifcb_create_manual_file + iRfcb::ifcb_create_manual_file( + roi_length = 3, class2use = class2use, + output_file = file.path(mat_dir, "sample_A.mat"), + classlist = c(2, 3, 2) + ) + iRfcb::ifcb_create_manual_file( + roi_length = 2, class2use = class2use, + output_file = file.path(mat_dir, "sample_B.mat"), + classlist = c(1, 3) + ) + # Create a classifier file that should be excluded + iRfcb::ifcb_create_manual_file( + roi_length = 2, class2use = class2use, + output_file = file.path(mat_dir, "sample_C_class_v1.mat"), + classlist = c(1, 2) + ) + + result <- import_all_mat_to_db(mat_dir, db_path, "test") + + expect_equal(result$success, 2L) + expect_equal(result$failed, 0L) + expect_equal(result$skipped, 0L) + + # Verify both samples in DB + samples <- list_annotated_samples_db(db_path) + expect_true("sample_A" %in% samples) + expect_true("sample_B" %in% samples) + expect_false("sample_C_class_v1" %in% samples) + + # Re-import should skip existing + result2 <- import_all_mat_to_db(mat_dir, db_path, "test") + expect_equal(result2$success, 0L) + expect_equal(result2$skipped, 2L) + + unlink(c(mat_dir, db_dir), recursive = TRUE) +}) + +test_that("export_all_db_to_mat exports multiple samples", { + skip_if_not_installed("iRfcb") + skip_if_not(reticulate::py_available(), "Python not available") + skip_if_not(reticulate::py_module_available("scipy"), "scipy not available") + + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + class2use <- c("unclassified", "Diatom", "Ciliate") + + save_annotations_db(db_path, "sample_X", + data.frame(file_name = "sample_X_00001.png", + class_name = "Diatom", + stringsAsFactors = FALSE), + class2use, "test") + save_annotations_db(db_path, "sample_Y", + data.frame(file_name = "sample_Y_00001.png", + class_name = "Ciliate", + stringsAsFactors = FALSE), + class2use, "test") + + mat_dir <- tempfile("mat_") + dir.create(mat_dir) + + result <- export_all_db_to_mat(db_path, mat_dir) + + expect_equal(result$success, 2L) + expect_equal(result$failed, 0L) + expect_true(file.exists(file.path(mat_dir, "sample_X.mat"))) + expect_true(file.exists(file.path(mat_dir, "sample_Y.mat"))) + + unlink(c(db_dir, mat_dir), recursive = TRUE) +}) + +test_that("round-trip: DB -> .mat -> DB produces matching data", { + skip_if_not_installed("iRfcb") + skip_if_not(reticulate::py_available(), "Python not available") + skip_if_not(reticulate::py_module_available("scipy"), "scipy not available") + + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + sample_name <- "D20230101T120000_IFCB134" + class2use <- c("unclassified", "Diatom", "Ciliate", "Dinoflagellate") + + original <- data.frame( + file_name = sprintf("%s_%05d.png", sample_name, 1:5), + class_name = c("Diatom", "Ciliate", "Dinoflagellate", "Diatom", "unclassified"), + stringsAsFactors = FALSE + ) + save_annotations_db(db_path, sample_name, original, class2use, "Original") + + # Export to .mat + mat_dir <- tempfile("mat_") + dir.create(mat_dir) + export_db_to_mat(db_path, sample_name, mat_dir) + + # Import back to a fresh DB + db_dir2 <- tempfile("db2_") + dir.create(db_dir2) + db_path2 <- get_db_path(db_dir2) + + mat_path <- file.path(mat_dir, paste0(sample_name, ".mat")) + import_mat_to_db(mat_path, db_path2, sample_name, "reimported") + + # Compare: read both DBs and check class names match + con1 <- DBI::dbConnect(RSQLite::SQLite(), db_path) + on.exit(DBI::dbDisconnect(con1), add = TRUE) + rows1 <- DBI::dbGetQuery(con1, + "SELECT roi_number, class_name FROM annotations WHERE sample_name = ? ORDER BY roi_number", + params = list(sample_name)) + + con2 <- DBI::dbConnect(RSQLite::SQLite(), db_path2) + on.exit(DBI::dbDisconnect(con2), add = TRUE) + rows2 <- DBI::dbGetQuery(con2, + "SELECT roi_number, class_name FROM annotations WHERE sample_name = ? ORDER BY roi_number", + params = list(sample_name)) + + expect_equal(nrow(rows1), nrow(rows2)) + expect_equal(rows1$class_name, rows2$class_name) + + unlink(c(db_dir, db_dir2, mat_dir), recursive = TRUE) +}) + +test_that("export_db_to_png extracts images into class subfolders", { + roi_path <- testthat::test_path("test_data", "raw", "2022", "D20220522", + "D20220522T000439_IFCB134.roi") + skip_if_not(file.exists(roi_path), "Test ROI file not found") + + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + sample_name <- "D20220522T000439_IFCB134" + class2use <- c("unclassified", "Diatom", "Ciliate") + + # Save annotations for ROIs 2-5 (ROI 1 is empty in test data) + classifications <- data.frame( + file_name = sprintf("%s_%05d.png", sample_name, 2:5), + class_name = c("Diatom", "Ciliate", "Diatom", "Ciliate"), + stringsAsFactors = FALSE + ) + save_annotations_db(db_path, sample_name, classifications, class2use, "test") + + png_dir <- tempfile("png_") + dir.create(png_dir) + + result <- export_db_to_png(db_path, sample_name, roi_path, png_dir) + expect_true(result) + + # Check class subfolders exist + expect_true(dir.exists(file.path(png_dir, "Diatom"))) + expect_true(dir.exists(file.path(png_dir, "Ciliate"))) + + # Check that PNG files were created in the right subfolders + diatom_files <- list.files(file.path(png_dir, "Diatom"), pattern = "\\.png$") + ciliate_files <- list.files(file.path(png_dir, "Ciliate"), pattern = "\\.png$") + expect_equal(length(diatom_files), 2) + expect_equal(length(ciliate_files), 2) + + unlink(c(db_dir, png_dir), recursive = TRUE) +}) + +test_that("export_db_to_png skip_class excludes specified classes", { + roi_path <- testthat::test_path("test_data", "raw", "2022", "D20220522", + "D20220522T000439_IFCB134.roi") + skip_if_not(file.exists(roi_path), "Test ROI file not found") + + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + sample_name <- "D20220522T000439_IFCB134" + class2use <- c("unclassified", "Diatom", "Ciliate") + + # ROIs 2-5: two Diatom, one Ciliate, one unclassified + classifications <- data.frame( + file_name = sprintf("%s_%05d.png", sample_name, 2:5), + class_name = c("Diatom", "Ciliate", "Diatom", "unclassified"), + stringsAsFactors = FALSE + ) + save_annotations_db(db_path, sample_name, classifications, class2use, "test") + + png_dir <- tempfile("png_") + dir.create(png_dir) + + result <- export_db_to_png(db_path, sample_name, roi_path, png_dir, + skip_class = "unclassified") + expect_true(result) + + # unclassified subfolder should NOT exist + expect_false(dir.exists(file.path(png_dir, "unclassified"))) + # Diatom and Ciliate should exist + expect_true(dir.exists(file.path(png_dir, "Diatom"))) + expect_true(dir.exists(file.path(png_dir, "Ciliate"))) + + diatom_files <- list.files(file.path(png_dir, "Diatom"), pattern = "\\.png$") + ciliate_files <- list.files(file.path(png_dir, "Ciliate"), pattern = "\\.png$") + expect_equal(length(diatom_files), 2) + expect_equal(length(ciliate_files), 1) + + unlink(c(db_dir, png_dir), recursive = TRUE) +}) + +test_that("export_db_to_png skip_class with all ROIs skipped returns TRUE", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + sample_name <- "D20220522T000439_IFCB134" + class2use <- c("unclassified", "Diatom") + + classifications <- data.frame( + file_name = sprintf("%s_%05d.png", sample_name, 2:3), + class_name = c("unclassified", "unclassified"), + stringsAsFactors = FALSE + ) + save_annotations_db(db_path, sample_name, classifications, class2use, "test") + + roi_path <- testthat::test_path("test_data", "raw", "2022", "D20220522", + "D20220522T000439_IFCB134.roi") + skip_if_not(file.exists(roi_path), "Test ROI file not found") + + png_dir <- tempfile("png_") + dir.create(png_dir) + + result <- export_db_to_png(db_path, sample_name, roi_path, png_dir, + skip_class = "unclassified") + expect_true(result) + + # No class subfolders should be created + expect_equal(length(list.dirs(png_dir, recursive = FALSE)), 0) + + unlink(c(db_dir, png_dir), recursive = TRUE) +}) + +test_that("export_db_to_png returns FALSE for missing sample", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + # Create DB with one sample + save_annotations_db(db_path, "existing_sample", + data.frame(file_name = "existing_sample_00001.png", + class_name = "Diatom", + stringsAsFactors = FALSE), + c("unclassified", "Diatom"), "test") + + roi_path <- testthat::test_path("test_data", "raw", "2022", "D20220522", + "D20220522T000439_IFCB134.roi") + skip_if_not(file.exists(roi_path), "Test ROI file not found") + + result <- export_db_to_png(db_path, "nonexistent_sample", roi_path, tempdir()) + expect_false(result) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("export_db_to_png returns FALSE for missing ROI file", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + save_annotations_db(db_path, "sample_A", + data.frame(file_name = "sample_A_00001.png", + class_name = "Diatom", + stringsAsFactors = FALSE), + c("unclassified", "Diatom"), "test") + + result <- export_db_to_png(db_path, "sample_A", "/nonexistent/file.roi", tempdir()) + expect_false(result) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("export_all_db_to_png exports multiple samples and skips missing ROIs", { + roi_path <- testthat::test_path("test_data", "raw", "2022", "D20220522", + "D20220522T000439_IFCB134.roi") + skip_if_not(file.exists(roi_path), "Test ROI file not found") + + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + class2use <- c("unclassified", "Diatom") + + # sample_A has a valid ROI path (use ROI 2 since ROI 1 is empty in test data) + save_annotations_db(db_path, "D20220522T000439_IFCB134", + data.frame(file_name = "D20220522T000439_IFCB134_00002.png", + class_name = "Diatom", + stringsAsFactors = FALSE), + class2use, "test") + + # sample_B has no ROI path (will be skipped) + save_annotations_db(db_path, "sample_no_roi", + data.frame(file_name = "sample_no_roi_00001.png", + class_name = "Diatom", + stringsAsFactors = FALSE), + class2use, "test") + + png_dir <- tempfile("png_") + dir.create(png_dir) + + roi_map <- list("D20220522T000439_IFCB134" = roi_path) + + result <- export_all_db_to_png(db_path, png_dir, roi_map) + + expect_equal(result$success, 1L) + expect_equal(result$failed, 0L) + expect_equal(result$skipped, 1L) + expect_true(dir.exists(file.path(png_dir, "Diatom"))) + + unlink(c(db_dir, png_dir), recursive = TRUE) +}) + +test_that("save_annotations_db stores is_manual flags", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + sample_name <- "D20230101T120000_IFCB134" + class2use <- c("unclassified", "Diatom", "Ciliate") + + classifications <- data.frame( + file_name = sprintf("%s_%05d.png", sample_name, 1:3), + class_name = c("Diatom", "unclassified", "Ciliate"), + stringsAsFactors = FALSE + ) + + result <- save_annotations_db(db_path, sample_name, classifications, + class2use, "TestUser", + is_manual = c(1L, 0L, 1L)) + expect_true(result) + + con <- DBI::dbConnect(RSQLite::SQLite(), db_path) + on.exit(DBI::dbDisconnect(con)) + + rows <- DBI::dbGetQuery(con, + "SELECT roi_number, class_name, is_manual FROM annotations WHERE sample_name = ? ORDER BY roi_number", + params = list(sample_name)) + + expect_equal(rows$is_manual, c(1L, 0L, 1L)) + expect_equal(rows$class_name, c("Diatom", "unclassified", "Ciliate")) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("save_annotations_db defaults is_manual to 1", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + sample_name <- "D20230101T120000_IFCB134" + + classifications <- data.frame( + file_name = sprintf("%s_%05d.png", sample_name, 1:2), + class_name = c("Diatom", "Ciliate"), + stringsAsFactors = FALSE + ) + + save_annotations_db(db_path, sample_name, classifications, + c("unclassified", "Diatom", "Ciliate"), "TestUser") + + con <- DBI::dbConnect(RSQLite::SQLite(), db_path) + on.exit(DBI::dbDisconnect(con)) + + rows <- DBI::dbGetQuery(con, + "SELECT is_manual FROM annotations WHERE sample_name = ?", + params = list(sample_name)) + + expect_true(all(rows$is_manual == 1L)) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("schema migration adds is_manual to existing DB", { + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + # Create a database with the OLD schema (no is_manual column) + con <- DBI::dbConnect(RSQLite::SQLite(), db_path) + DBI::dbExecute(con, " + CREATE TABLE annotations ( + sample_name TEXT NOT NULL, + roi_number INTEGER NOT NULL, + class_name TEXT NOT NULL, + annotator TEXT, + timestamp TEXT DEFAULT (datetime('now')), + PRIMARY KEY (sample_name, roi_number) + ) + ") + DBI::dbExecute(con, " + CREATE TABLE class_lists ( + sample_name TEXT NOT NULL, + class_index INTEGER NOT NULL, + class_name TEXT NOT NULL, + PRIMARY KEY (sample_name, class_index) + ) + ") + + # Insert a row without is_manual + DBI::dbExecute(con, + "INSERT INTO annotations (sample_name, roi_number, class_name, annotator) VALUES (?, ?, ?, ?)", + params = list("sample_old", 1L, "Diatom", "test")) + DBI::dbDisconnect(con) + + # Now run init_db_schema which should migrate + con2 <- DBI::dbConnect(RSQLite::SQLite(), db_path) + on.exit(DBI::dbDisconnect(con2)) + init_db_schema(con2) + + cols <- DBI::dbGetQuery(con2, "PRAGMA table_info(annotations)") + expect_true("is_manual" %in% cols$name) + + # Existing row should have default value 1 + row <- DBI::dbGetQuery(con2, + "SELECT is_manual FROM annotations WHERE sample_name = 'sample_old'") + expect_equal(row$is_manual, 1L) + + unlink(db_dir, recursive = TRUE) +}) + +test_that("import_mat_to_db reads class2use_manual from .mat", { + skip_if_not_installed("iRfcb") + skip_if_not(reticulate::py_available(), "Python not available") + skip_if_not(reticulate::py_module_available("scipy"), "scipy not available") + + # Create a .mat file with a known class list + mat_dir <- tempfile("mat_") + dir.create(mat_dir) + mat_path <- file.path(mat_dir, "test_sample.mat") + class2use <- c("unclassified", "Diatom", "Ciliate") + + iRfcb::ifcb_create_manual_file( + roi_length = 3, class2use = class2use, + output_file = mat_path, + classlist = c(2, 3, 1) + ) + + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + result <- import_mat_to_db(mat_path, db_path, "test_sample") + expect_true(result) + + # Verify the class list stored in DB matches the .mat file's embedded list + con <- DBI::dbConnect(RSQLite::SQLite(), db_path) + on.exit(DBI::dbDisconnect(con)) + + cl <- DBI::dbGetQuery(con, + "SELECT class_name FROM class_lists WHERE sample_name = 'test_sample' ORDER BY class_index") + expect_equal(cl$class_name, class2use) + + # Verify class names mapped correctly + ann <- DBI::dbGetQuery(con, + "SELECT roi_number, class_name FROM annotations WHERE sample_name = 'test_sample' ORDER BY roi_number") + expect_equal(ann$class_name, c("Diatom", "Ciliate", "unclassified")) + + unlink(c(mat_dir, db_dir), recursive = TRUE) +}) + +test_that("import_mat_to_db preserves NaN as is_manual=0", { + skip_if_not_installed("iRfcb") + skip_if_not(reticulate::py_available(), "Python not available") + skip_if_not(reticulate::py_module_available("scipy"), "scipy not available") + + mat_dir <- tempfile("mat_") + dir.create(mat_dir) + mat_path <- file.path(mat_dir, "test_nan.mat") + class2use <- c("unclassified", "Diatom", "Ciliate") + + # Create .mat with NaN entries (unreviewed ROIs) + iRfcb::ifcb_create_manual_file( + roi_length = 4, class2use = class2use, + output_file = mat_path, + classlist = c(2, NaN, 3, NaN) + ) + + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + result <- import_mat_to_db(mat_path, db_path, "test_nan") + expect_true(result) + + con <- DBI::dbConnect(RSQLite::SQLite(), db_path) + on.exit(DBI::dbDisconnect(con)) + + rows <- DBI::dbGetQuery(con, + "SELECT roi_number, class_name, is_manual FROM annotations WHERE sample_name = 'test_nan' ORDER BY roi_number") + + expect_equal(rows$is_manual, c(1L, 0L, 1L, 0L)) + expect_equal(rows$class_name, c("Diatom", "unclassified", "Ciliate", "unclassified")) + + unlink(c(mat_dir, db_dir), recursive = TRUE) +}) + +test_that("export_db_to_mat restores NaN for is_manual=0 rows", { + skip_if_not_installed("iRfcb") + skip_if_not(reticulate::py_available(), "Python not available") + skip_if_not(reticulate::py_module_available("scipy"), "scipy not available") + + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + sample_name <- "D20230101T120000_IFCB134" + class2use <- c("unclassified", "Diatom", "Ciliate") + + classifications <- data.frame( + file_name = sprintf("%s_%05d.png", sample_name, 1:4), + class_name = c("Diatom", "unclassified", "Ciliate", "unclassified"), + stringsAsFactors = FALSE + ) + # ROIs 2 and 4 are unreviewed (NaN in .mat) + save_annotations_db(db_path, sample_name, classifications, class2use, + "TestUser", is_manual = c(1L, 0L, 1L, 0L)) + + mat_dir <- tempfile("mat_") + dir.create(mat_dir) + + result <- export_db_to_mat(db_path, sample_name, mat_dir) + expect_true(result) + + mat_path <- file.path(mat_dir, paste0(sample_name, ".mat")) + classlist <- iRfcb::ifcb_get_mat_variable(mat_path, variable_name = "classlist") + + # Reviewed ROIs should have valid indices, unreviewed should be NaN + expect_equal(classlist[1, 2], 2) # Diatom + expect_true(is.nan(classlist[2, 2])) # unreviewed -> NaN + expect_equal(classlist[3, 2], 3) # Ciliate + expect_true(is.nan(classlist[4, 2])) # unreviewed -> NaN + + unlink(c(db_dir, mat_dir), recursive = TRUE) +}) + +test_that("full roundtrip: .mat -> SQLite -> .mat preserves NaN and class list", { + skip_if_not_installed("iRfcb") + skip_if_not(reticulate::py_available(), "Python not available") + skip_if_not(reticulate::py_module_available("scipy"), "scipy not available") + + # Create original .mat with NaN entries + mat_dir <- tempfile("mat_orig_") + dir.create(mat_dir) + original_mat <- file.path(mat_dir, "roundtrip_sample.mat") + class2use <- c("unclassified", "Diatom", "Ciliate", "Dinoflagellate") + + original_classlist <- c(2, NaN, 3, 4, NaN) + iRfcb::ifcb_create_manual_file( + roi_length = 5, class2use = class2use, + output_file = original_mat, + classlist = original_classlist + ) + + # Import into SQLite + db_dir <- tempfile("db_") + dir.create(db_dir) + db_path <- get_db_path(db_dir) + + import_mat_to_db(original_mat, db_path, "roundtrip_sample") + + # Export back to .mat + export_dir <- tempfile("mat_export_") + dir.create(export_dir) + export_db_to_mat(db_path, "roundtrip_sample", export_dir) + + # Read back and compare + exported_mat <- file.path(export_dir, "roundtrip_sample.mat") + exported_classlist <- iRfcb::ifcb_get_mat_variable(exported_mat, + variable_name = "classlist") + exported_class2use <- as.character( + iRfcb::ifcb_get_mat_variable(exported_mat, + variable_name = "class2use_manual")) + + # Class list should match exactly + expect_equal(exported_class2use, class2use) + + # Classlist indices should match: classified ROIs keep their index, NaN stays NaN + for (i in seq_along(original_classlist)) { + if (is.nan(original_classlist[i])) { + expect_true(is.nan(exported_classlist[i, 2]), + info = paste("ROI", i, "should be NaN")) + } else { + expect_equal(exported_classlist[i, 2], original_classlist[i], + info = paste("ROI", i, "index mismatch")) + } + } + + unlink(c(mat_dir, db_dir, export_dir), recursive = TRUE) +}) diff --git a/tests/testthat/test-sample_saving.R b/tests/testthat/test-sample_saving.R index 391252e..daa34d0 100644 --- a/tests/testthat/test-sample_saving.R +++ b/tests/testthat/test-sample_saving.R @@ -305,6 +305,8 @@ test_that("save_sample_annotations creates MAT file with real data", { output_folder <- tempfile("output_") png_output_folder <- tempfile("png_output_") + db_folder <- tempfile("db_") + result <- save_sample_annotations( sample_name = sample_name, classifications = current_classifications, @@ -315,7 +317,9 @@ test_that("save_sample_annotations creates MAT file with real data", { png_output_folder = png_output_folder, roi_folder = roi_folder, class2use_path = class2use_path, - annotator = "TestUser" + annotator = "TestUser", + save_format = "mat", + db_folder = db_folder ) expect_true(result) @@ -338,4 +342,150 @@ test_that("save_sample_annotations creates MAT file with real data", { # Cleanup unlink(output_folder, recursive = TRUE) unlink(png_output_folder, recursive = TRUE) + unlink(db_folder, recursive = TRUE) +}) + +test_that("save_sample_annotations with save_format='sqlite' creates database", { + sample_name <- "D20230314T001205_IFCB134" + + # Create class2use file + class2use_file <- tempfile(fileext = ".txt") + writeLines(c("unclassified", "Diatom", "Ciliate"), class2use_file) + + # Create temp source folder with an image + src_folder <- tempfile("png_") + dir.create(file.path(src_folder, sample_name), recursive = TRUE) + file.create(file.path(src_folder, sample_name, paste0(sample_name, "_00001.png"))) + + output_folder <- tempfile("output_") + png_output_folder <- tempfile("png_out_") + db_folder <- tempfile("db_") + + classifications <- data.frame( + file_name = paste0(sample_name, "_00001.png"), + class_name = "Diatom", + score = NA_real_, + stringsAsFactors = FALSE + ) + + changes_log <- data.frame( + image = paste0(sample_name, "_00001.png"), + original_class = "unclassified", + new_class = "Diatom", + stringsAsFactors = FALSE + ) + + result <- save_sample_annotations( + sample_name = sample_name, + classifications = classifications, + original_classifications = classifications, + changes_log = changes_log, + temp_png_folder = src_folder, + output_folder = output_folder, + png_output_folder = png_output_folder, + roi_folder = tempdir(), + class2use_path = class2use_file, + annotator = "TestUser", + save_format = "sqlite", + db_folder = db_folder + ) + + expect_true(result) + + # SQLite database should exist in db_folder, not output_folder + db_path <- get_db_path(db_folder) + expect_true(file.exists(db_path)) + + # Should be able to load the annotations back + samples <- list_annotated_samples_db(db_path) + expect_true(sample_name %in% samples) + + # No .mat file should be created + mat_path <- file.path(output_folder, paste0(sample_name, ".mat")) + expect_false(file.exists(mat_path)) + + # No database in output_folder + expect_false(file.exists(get_db_path(output_folder))) + + # Cleanup + unlink(output_folder, recursive = TRUE) + unlink(png_output_folder, recursive = TRUE) + unlink(src_folder, recursive = TRUE) + unlink(db_folder, recursive = TRUE) + unlink(class2use_file) +}) + +test_that("save_sample_annotations with save_format='both' creates both outputs", { + skip_if_not_installed("iRfcb") + skip_if_not(reticulate::py_available(), "Python not available") + skip_if_not(reticulate::py_module_available("scipy"), "scipy not available") + + sample_name <- "D20220522T000439_IFCB134" + + png_folder <- testthat::test_path("test_data", "png") + roi_folder <- testthat::test_path("test_data", "raw") + class2use_path <- testthat::test_path("test_data", "class2use.mat") + + skip_if_not(dir.exists(file.path(png_folder, sample_name)), "Test PNG folder not found") + skip_if_not(file.exists(class2use_path), "Test class2use file not found") + skip_if_not( + file.exists(file.path(roi_folder, "2022", "D20220522", paste0(sample_name, ".adc"))), + "Test ADC file not found" + ) + + png_files <- list.files(file.path(png_folder, sample_name), pattern = "\\.png$") + skip_if(length(png_files) < 2, "Not enough test PNG files") + + original_classifications <- data.frame( + file_name = png_files, + class_name = rep("unclassified", length(png_files)), + score = rep(NA_real_, length(png_files)), + stringsAsFactors = FALSE + ) + + current_classifications <- data.frame( + file_name = png_files, + class_name = c("Mesodinium_rubrum", rep("Ciliophora", length(png_files) - 1)), + stringsAsFactors = FALSE + ) + + changes_log <- data.frame( + image = png_files[1], + original_class = "unclassified", + new_class = "Mesodinium_rubrum", + stringsAsFactors = FALSE + ) + + output_folder <- tempfile("output_") + png_output_folder <- tempfile("png_output_") + db_folder <- tempfile("db_") + + result <- save_sample_annotations( + sample_name = sample_name, + classifications = current_classifications, + original_classifications = original_classifications, + changes_log = changes_log, + temp_png_folder = png_folder, + output_folder = output_folder, + png_output_folder = png_output_folder, + roi_folder = roi_folder, + class2use_path = class2use_path, + annotator = "TestUser", + save_format = "both", + db_folder = db_folder + ) + + expect_true(result) + + # Both should exist + db_path <- get_db_path(db_folder) + expect_true(file.exists(db_path)) + + mat_file <- file.path(output_folder, paste0(sample_name, ".mat")) + expect_true(file.exists(mat_file)) + + # Cleanup + unlink(output_folder, recursive = TRUE) + unlink(png_output_folder, recursive = TRUE) + unlink(db_folder, recursive = TRUE) }) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 9cdc24b..da8f9b8 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -347,6 +347,26 @@ test_that("get_config_dir uses tempdir during R CMD check", { } }) +test_that("get_default_db_dir returns a valid path", { + db_folder <- get_default_db_dir() + expect_true(is.character(db_folder)) + expect_true(nzchar(db_folder)) +}) + +test_that("get_default_db_dir uses tempdir during R CMD check", { + old_val <- Sys.getenv("_R_CHECK_PACKAGE_NAME_", unset = NA) + Sys.setenv("_R_CHECK_PACKAGE_NAME_" = "ClassiPyR") + + db_folder <- get_default_db_dir() + expect_true(grepl(tempdir(), db_folder, fixed = TRUE)) + + if (is.na(old_val)) { + Sys.unsetenv("_R_CHECK_PACKAGE_NAME_") + } else { + Sys.setenv("_R_CHECK_PACKAGE_NAME_" = old_val) + } +}) + # ============================================================================= # File index cache functions # ============================================================================= diff --git a/vignettes/class-management.Rmd b/vignettes/class-management.Rmd index 3cba17f..c111e40 100644 --- a/vignettes/class-management.Rmd +++ b/vignettes/class-management.Rmd @@ -20,7 +20,7 @@ Understanding and managing your class list is important for maintaining consiste ## Why Class Indices Matter (ifcb-analysis Users) -> **Note**: This section is primarily relevant if you use the [ifcb-analysis](https://github.com/hsosik/ifcb-analysis) MATLAB toolbox (Sosik & Olson, 2007). If you only work with CSV exports, class indices are less critical. +> **Note**: This section is primarily relevant if you export `.mat` files for use with the [ifcb-analysis](https://github.com/hsosik/ifcb-analysis) MATLAB toolbox (Sosik & Olson, 2007). If you use the default SQLite storage or work with CSV exports, class indices are less critical because class names are stored directly. IFCB .mat annotations use **numerical indices** to reference classes: diff --git a/vignettes/faq.Rmd b/vignettes/faq.Rmd index f080c00..546de09 100644 --- a/vignettes/faq.Rmd +++ b/vignettes/faq.Rmd @@ -42,13 +42,9 @@ A: No. The app only reads your original files. All output is written to separate **Q: I see "Python not available" warning** -A: This warning affects saving .mat files. Python is required for: +A: This warning only appears when your storage format includes `.mat` files. Python is **not needed** for the default SQLite storage. -- Saving annotations as .mat files for [ifcb-analysis](https://github.com/hsosik/ifcb-analysis) - -Reading .mat files (annotations, classifier output, class lists) does not require Python. If you do not need to save .mat files, you can ignore this warning. - -To enable .mat support: +If you see this warning and don't need `.mat` files, switch to SQLite in Settings > Annotation Storage. Otherwise, to enable `.mat` support: ```{r, eval = FALSE} library(iRfcb) @@ -57,6 +53,10 @@ ifcb_py_install() # Creates venv in current working directory Then restart the app. +**Q: Do I need Python to use ClassiPyR?** + +A: No. The default storage format is SQLite, which works out of the box with no Python dependency. Python is only needed if you want to export `.mat` files for [ifcb-analysis](https://github.com/hsosik/ifcb-analysis) compatibility. + **Q: Where is the Python virtual environment created?** A: By default, `ifcb_py_install()` creates a `venv` folder in your home directory. You can specify a different location: @@ -86,7 +86,7 @@ When you specify `run_app(venv_path = "/path/to/venv")`, that path is used for P A: Make sure you have remotes installed and try: ```{r, eval = FALSE} -install.packages("remotes") +if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes") remotes::install_github("EuropeanIFCBGroup/ClassiPyR") ``` @@ -199,7 +199,7 @@ A: The ROI might be empty (no actual image data). These are filtered out automat A: Check that: 1. Output folder is writable -2. Python is available (required for saving .mat files) +2. If using MAT format: Python is available (not needed for default SQLite storage) 3. Click "Save Annotations" before closing --- @@ -254,22 +254,146 @@ Common taxonomic characters like hyphens (`-`), underscores (`_`), periods (`.`) **Q: Where are my annotations saved?** -A: In the Output Folder you configured: +A: Annotations are split across two locations: + +- **SQLite database** (default): stored in the **Database Folder** (a local directory) +- **MAT files and statistics**: stored in the **Output Folder** (can be on a network drive) + +``` +db_folder/ ← local drive (Database Folder) +└── annotations.sqlite ← single database for ALL samples + +output_folder/ ← can be a network drive (Output Folder) +├── D20230101T120000_IFCB134.mat ← only if storage format includes "MAT" +├── D20230202T080000_IFCB134.mat +└── validation_statistics/ + ├── ..._validation_stats.csv + └── ..._validation_detailed.csv +``` + +By default, the database is stored in a persistent local directory (`tools::R_user_dir("ClassiPyR", "data")`). Back up `annotations.sqlite` to preserve your work. + +**Q: Where is the default database location?** + +A: The default Database Folder is a platform-specific local directory: + +- **Linux**: `~/.local/share/R/ClassiPyR/` +- **macOS**: `~/Library/Application Support/org.R-project.R/R/ClassiPyR/` +- **Windows**: `%LOCALAPPDATA%/R/data/R/ClassiPyR/` + +You can find the exact path with: + +```{r, eval = FALSE} +ClassiPyR::get_default_db_dir() +``` + +You can change it in Settings > Database Folder, but it should always be a local drive. + +**Q: Can I put the database on a network drive?** + +A: No. SQLite databases are [not safe on network filesystems](https://www.sqlite.org/useovernet.html) (NFS, SMB/CIFS) because network file locking is unreliable, which can lead to database corruption. Always keep the Database Folder on a local drive. The Output Folder (for MAT files and statistics) can safely be on a network drive. + +**Q: How do I transfer my annotations to another computer?** + +A: Since the SQLite database is stored locally, you cannot simply share it over a network drive. Instead, use `.mat` files as the interchange format: + +1. **Export** from the source computer (requires Python with scipy): + +```{r, eval = FALSE} +library(ClassiPyR) +db_path <- get_db_path(get_default_db_dir()) +# Export all annotations to .mat files in a shared output folder +result <- export_all_db_to_mat(db_path, "/shared/network/manual") +cat(result$success, "exported\n") +``` + +Or use the **Export SQLite → .mat** button in Settings. + +2. **Import** on the target computer: + +```{r, eval = FALSE} +library(ClassiPyR) +class2use <- load_class_list("/shared/network/class2use.mat") +db_path <- get_db_path(get_default_db_dir()) +# Import .mat files from the shared folder into the local database +result <- import_all_mat_to_db("/shared/network/manual", db_path, class2use) +cat(result$success, "imported,", result$skipped, "skipped\n") +``` + +Or use the **Import .mat → SQLite** button in Settings. Already-imported samples are skipped automatically. -- MAT annotation files are saved directly in the output folder (one per sample) -- `validation_statistics/` subfolder contains CSV statistics -- PNGs are in the PNG Output Folder, organized by class name +You can also simply copy the `annotations.sqlite` file directly between machines if you prefer. **Q: Can I import annotations back to MATLAB?** -A: Yes, the MAT files are compatible with the [ifcb-analysis](https://github.com/hsosik/ifcb-analysis) toolbox (Sosik & Olson, 2007). Use the list in `startMC`, or load the list in MATLAB using: +A: Yes, if you save with the "MAT file" or "Both" storage format, the MAT files are compatible with the [ifcb-analysis](https://github.com/hsosik/ifcb-analysis) toolbox (Sosik & Olson, 2007). Use the list in `startMC`, or load the list in MATLAB using: ```matlab load('sample_name.mat'); % classlist contains [roi_number, class_index] ``` -Note: Python with `scipy` must be installed to save .mat files. +Note: Python with `scipy` must be installed to save .mat files. Change the storage format in Settings > Annotation Storage. + +**Q: Can I migrate existing .mat annotations to the SQLite database?** + +A: Yes. The easiest way is the **Import .mat → SQLite** button in Settings > Annotation Storage, which bulk-imports all `.mat` files in your output folder. + +You can also import programmatically — a single file: + +```{r, eval = FALSE} +library(ClassiPyR) +class2use <- load_class_list("/path/to/class2use.mat") +import_mat_to_db( + mat_path = "/data/manual/D20230101T120000_IFCB134.mat", + db_path = get_db_path(get_default_db_dir()), + sample_name = "D20230101T120000_IFCB134", + class2use = class2use +) +``` + +Or bulk-import all `.mat` files in a folder: + +```{r, eval = FALSE} +result <- import_all_mat_to_db("/data/manual", get_db_path(get_default_db_dir()), class2use) +cat(result$success, "imported,", result$failed, "failed,", result$skipped, "skipped\n") +``` + +**Q: Can I export SQLite annotations back to .mat files?** + +A: Yes. Use the **Export SQLite → .mat** button in Settings > Annotation Storage to export all annotated samples at once. This requires Python with scipy. + +You can also export programmatically: + +```{r, eval = FALSE} +# Single sample +export_db_to_mat(get_db_path(get_default_db_dir()), "D20230101T120000_IFCB134", "/data/manual") + +# All samples +result <- export_all_db_to_mat(get_db_path(get_default_db_dir()), "/data/manual") +cat(result$success, "exported,", result$failed, "failed\n") +``` + +**Q: Can I change the annotator name for existing annotations?** + +A: Yes. Use `update_annotator()` from the R console: + +```{r, eval = FALSE} +library(ClassiPyR) +db_path <- get_db_path(get_default_db_dir()) + +# Update a single sample +update_annotator(db_path, "D20230101T120000_IFCB134", "Jane") + +# Update several samples at once +update_annotator(db_path, c("sample_A", "sample_B"), "Jane") + +# Update all annotated samples (e.g. after a bulk import) +all_samples <- list_annotated_samples_db(db_path) +update_annotator(db_path, all_samples, "Jane") +``` + +The function returns a named vector showing how many annotation rows were updated per sample (0 means the sample was not found in the database). **Q: What's in the statistics CSV?** @@ -392,7 +516,7 @@ A: In the same config directory as your settings: |-------|----------| | "ROI file not found" | Check ROI Data Folder path; ensure `.roi` files use IFCB naming and click Sync | | "ADC file not found" | ADC file must be alongside ROI file | -| "Python not available" | Affects saving .mat files. Run `iRfcb::ifcb_py_install()` | +| "Python not available" | Only affects `.mat` export. Switch to SQLite in Settings, or run `iRfcb::ifcb_py_install()` | | "Error loading class list" | Check file format (.mat or .txt) | | "No samples found" | Check ROI Data Folder configuration | | App fails to start | Try `run_app(reset_settings = TRUE)` to clear saved settings | diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index c4eaee5..15e732c 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -27,9 +27,9 @@ Make sure you have: ### Python Requirements -Python is required for saving annotations as MATLAB .mat files for use with [ifcb-analysis](https://github.com/hsosik/ifcb-analysis). Reading existing .mat files (annotations, classifier output, class lists) does not require Python. +Python is **not required** for the default workflow. ClassiPyR stores annotations in a local SQLite database that works out of the box with no external dependencies. -If you only need to read .mat files or work with CSV classification files, Python is not required. +Python is only needed if you want to export annotations as MATLAB `.mat` files for use with [ifcb-analysis](https://github.com/hsosik/ifcb-analysis). Reading existing `.mat` files (annotations, classifier output, class lists) also does not require Python. ### CSV Classification Format @@ -43,9 +43,9 @@ D20230101T120000_IFCB134_00002.png,Ciliate An optional `score` column (confidence values between 0 and 1) can also be included. See the [User Guide](user-guide.html) for more details. -### Python Setup +### Python Setup (optional) -To set up Python: +Only needed if you plan to export `.mat` files. Skip this step if using the default SQLite storage. ```{r, eval = FALSE} library(iRfcb) @@ -79,8 +79,11 @@ Configure your folders using the built-in folder browser: |---------|-------------|---------| | Classification Folder | Where your CSV/MAT classifications are | `/ifcb/classified/` | | ROI Data Folder | Where your IFCB raw files are | `/ifcb/raw/` | -| Output Folder | Where annotations will be saved | `/ifcb/manual/` | -| PNG Output Folder | Where images will be organized | `/ifcb/png/` | +| Output Folder | Where MAT files and statistics go | `/ifcb/manual/` | +| Database Folder | Where the SQLite database is stored (must be local) | auto-detected | +| PNG Output Folder | Where images will be organized by class | `/ifcb/png/` | + +> **Network drives**: The Output Folder can safely reside on a network share (e.g., for MAT files and statistics). However, the Database Folder must be on a **local** drive because [SQLite is not safe on network filesystems](https://www.sqlite.org/useovernet.html). The default database location is a local user-level directory that works out of the box. Click **Save Settings**. The app will scan your folders and build a file index cache for fast loading. @@ -206,10 +209,16 @@ The images will move to their new class group. Click **Save Annotations** to save: -- MAT file for MATLAB compatibility (requires Python; for use with [ifcb-analysis](https://github.com/hsosik/ifcb-analysis)) +- **SQLite database** (default) - annotations are written to `annotations.sqlite` in your Output Folder. This single file stores annotations for all samples. No Python needed. - Statistics CSV with accuracy metrics - PNGs organized by class +You can change the storage format in **Settings > Annotation Storage**: + +- **SQLite** (recommended) - works out of the box +- **MAT file** - for [ifcb-analysis](https://github.com/hsosik/ifcb-analysis) compatibility (requires Python) +- **Both** - writes to both SQLite and `.mat` + ### Auto-save Work is automatically saved when: diff --git a/vignettes/user-guide.Rmd b/vignettes/user-guide.Rmd index c3676b4..a8d20ec 100644 --- a/vignettes/user-guide.Rmd +++ b/vignettes/user-guide.Rmd @@ -200,7 +200,7 @@ Files matching `*_class*.mat` pattern containing: ### Existing Annotations -Previously saved annotations (in output folder) are automatically detected and can be resumed. +Previously saved annotations (in SQLite database or `.mat` files in the output folder) are automatically detected and can be resumed. When both exist, the SQLite version is loaded (faster). --- @@ -248,9 +248,24 @@ ClassiPyR::rescan_file_index( ## Output Files -When you save, the app creates: +When you save, the app creates files based on your chosen storage format (configurable in Settings). -### Annotation MAT File +### SQLite Database (default) + +`db_folder/annotations.sqlite` + +A single SQLite database file containing annotations for all samples. This is the default storage backend: + +- No Python dependency required +- Fast read/write performance +- Single file for all samples — easy to back up and manage +- Contains `annotations` table (one row per ROI) and `class_lists` table (preserves class indices for `.mat` export) + +The database is stored in a separate **Database Folder** (configurable in Settings), which defaults to a local user-level directory (`tools::R_user_dir("ClassiPyR", "data")`). This separation ensures the SQLite database stays on a local filesystem even when the Output Folder is on a network drive. + +> **Note**: The SQLite database **must** be on a local drive. [SQLite file locking is unreliable on network filesystems](https://www.sqlite.org/useovernet.html) (NFS/SMB), which can lead to database corruption. For multi-user workflows, each annotator should use their own local Database Folder. + +### Annotation MAT File (optional) `output/[sample_name].mat` @@ -259,7 +274,7 @@ MATLAB-compatible format with: - `classlist`: ROI numbers and class indices - Compatible with [ifcb-analysis](https://github.com/hsosik/ifcb-analysis) toolbox -> **Note**: Saving MAT files requires Python with scipy. +> **Note**: Saving MAT files requires Python with scipy. Enable in Settings > Annotation Storage by selecting "MAT file" or "Both". ### Statistics Files @@ -287,11 +302,26 @@ Images organized into class folders for training CNN models or other classifiers |---------|-------------| | Classification Folder | Source of CSV/MAT classifications | | ROI Data Folder | IFCB raw files (ROI/ADC/HDR) | -| Output Folder | Where MAT and CSV output goes | +| Output Folder | Where MAT files and statistics go (can be on a network drive) | +| Database Folder | Where the SQLite database is stored (must be a local drive) | | PNG Output Folder | Where organized images go | Folder paths are configured using a web-based folder browser that works on all platforms (Linux, macOS, Windows). Changing folder paths in Settings automatically invalidates the file index cache, triggering a fresh scan. +### Annotation Storage + +| Format | Description | +|--------|-------------| +| SQLite (recommended) | Default. Stores annotations in `annotations.sqlite` in the Database Folder. No Python needed. | +| MAT file | MATLAB-compatible `.mat` files for [ifcb-analysis](https://github.com/hsosik/ifcb-analysis). Requires Python with scipy. | +| Both | Writes to both SQLite and `.mat` for maximum compatibility. | + +Below the format selector, two buttons allow bulk conversion between formats: + +- **Import .mat → SQLite**: Imports all `.mat` annotation files from the output folder into the SQLite database. Already-imported samples are skipped. +- **Export SQLite → .mat**: Exports all annotated samples from the database to `.mat` files. Requires Python with scipy. +- **Export SQLite → PNG**: Extracts annotated images from ROI files into class-name subfolders in the PNG Output Folder. Useful for building training datasets for CNN classifiers. + ### Auto-Sync | Setting | Description | @@ -374,11 +404,9 @@ Settings are loaded automatically when you start the app, so your folder paths, ## Dependencies -`ClassiPyR` relies on **[`iRfcb`](https://github.com/EuropeanIFCBGroup/iRfcb)** for all IFCB data operations: +`ClassiPyR` relies on: -- Extracting images from ROI files -- Reading ADC metadata (dimensions, timestamps) -- Reading and writing MATLAB .mat files -- Class list handling +- **[`iRfcb`](https://github.com/EuropeanIFCBGroup/iRfcb)** for IFCB data operations (extracting images, reading ADC metadata, reading/writing `.mat` files, class list handling) +- **[`RSQLite`](https://CRAN.R-project.org/package=RSQLite)** and **[`DBI`](https://CRAN.R-project.org/package=DBI)** for the SQLite annotation database -`iRfcb` is installed automatically as a dependency when you install `ClassiPyR`. +All R dependencies are installed automatically when you install `ClassiPyR`. Python is only needed for `.mat` file export.