From d2123341137c5b76c225bd67c188c8042d1bdb46 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Thu, 25 Dec 2025 18:37:53 -0500 Subject: [PATCH 1/7] Add customizable sex coding to checkParentIDs Introduced code_male and code_female parameters to checkParentIDs, allowing users to specify custom values for male and female sex coding. Updated documentation and NEWS to reflect this change. Improved handling of missing sex values in checkParentSex. --- NEWS.md | 1 + R/checkParents.R | 26 +++++++++++++++++++------- R/checkSex.R | 8 +++++++- man/checkParentIDs.Rd | 8 +++++++- man/checkSex.Rd | 2 +- man/recodeSex.Rd | 2 +- man/repairSex.Rd | 2 +- 7 files changed, 37 insertions(+), 12 deletions(-) diff --git a/NEWS.md b/NEWS.md index c561e732..10f4c368 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,7 @@ * Added tests for ped2gen * Fixed handling of character ID variables leading to a warning in ped2fam * Added famIDs to phantom parents +* Tweaked how sex coding is handled # BGmisc 1.5.1 ## CRAN submission diff --git a/R/checkParents.R b/R/checkParents.R index 5ee927f8..807a1987 100644 --- a/R/checkParents.R +++ b/R/checkParents.R @@ -12,11 +12,12 @@ #' @param repairsex A logical flag indicating whether to attempt repairs on sex of the parents #' @param addphantoms A logical flag indicating whether to add phantom parents for missing parent IDs. #' @param parentswithoutrow A logical flag indicating whether to add parents without a row in the pedigree. -#' @param famID Character. Column name for family IDs. -#' @param personID Character. Column name for individual IDs. +#' @param famID Character. Column name for family IDs. +#' @param personID Character. Column name for individual IDs. #' @param momID Character. Column name for maternal IDs. #' @param dadID Character. Column name for paternal IDs. -#' +#' @param code_male Value representing male sex +#' @param code_female Value representing female sex #' #' @return Depending on the value of `repair`, either a list containing validation results or a repaired dataframe is returned. #' @examples @@ -32,7 +33,10 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, famID = "famID", personID = "ID", momID = "momID", - dadID = "dadID") { + dadID = "dadID", + code_male = NULL, + code_female = NULL + ) { # Standardize column names in the input dataframe ped <- standardizeColnames(ped, verbose = verbose) @@ -87,18 +91,26 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, cat("Step 2: Determining the if moms are the same sex and dads are same sex\n") } # Determine modal sex values for moms and dads + + + mom_results <- checkParentSex(ped, parent_col = "momID", verbose = verbose) dad_results <- checkParentSex(ped, parent_col = "dadID", verbose = verbose) validation_results$mom_sex <- mom_results$unique_sexes validation_results$dad_sex <- dad_results$unique_sexes - validation_results$female_var <- mom_results$modal_sex - validation_results$male_var <- dad_results$modal_sex + validation_results$wrong_sex_moms <- mom_results$inconsistent_parents validation_results$wrong_sex_dads <- dad_results$inconsistent_parents validation_results$female_moms <- mom_results$all_same_sex validation_results$male_dads <- dad_results$all_same_sex - + if (!is.null(code_male) && !is.null(code_female)) { + validation_results$male_var <- code_male + validation_results$female_var <- code_female + } else { + validation_results$female_var <- mom_results$modal_sex + validation_results$male_var <- dad_results$modal_sex + } # Are any parents in both momID and dadID? momdad <- intersect(ped$dadID, ped$momID) if (length(momdad) > 0 && !is.na(momdad)) { diff --git a/R/checkSex.R b/R/checkSex.R index d7b3f8ab..ef5b64c7 100644 --- a/R/checkSex.R +++ b/R/checkSex.R @@ -17,7 +17,7 @@ #' #' @details This function uses the terms 'male' and 'female' in a biological context, referring to chromosomal and other biologically-based characteristics necessary for constructing genetic pedigrees. The biological aspect of sex used in genetic analysis (genotype) is distinct from the broader, richer concept of gender identity (phenotype). #' -#' We recognize the importance of using language and methodologies that affirm and respect the full spectrum of gender identities. +#' We recognize the importance of using language and methodologies that affirm and respect the full spectrum of gender identities. #' The developers of this package express unequivocal support for folx in the transgender #' and LGBTQ+ communities. #' @@ -225,9 +225,15 @@ checkParentSex <- function(ped, parent_col, sex_col = "sex", verbose = FALSE) { # Store the most frequent sex for moms and dads modal_sex <- names(sort(table(parent_sexes), decreasing = TRUE))[1] + if(all(is.na(modal_sex)) && verbose == TRUE) { + cat(paste0("All parents in role ", parent_col, " have missing sex values.\n")) +} + # Type coercion based on ped$sex type if (is.numeric(ped[[sex_col]])) { modal_sex <- as.numeric(modal_sex) + } else if (is.character(ped[[sex_col]])) { + modal_sex <- as.character(modal_sex) } # List ids for dads that are female, moms that are male diff --git a/man/checkParentIDs.Rd b/man/checkParentIDs.Rd index d7386b10..84852d03 100644 --- a/man/checkParentIDs.Rd +++ b/man/checkParentIDs.Rd @@ -14,7 +14,9 @@ checkParentIDs( famID = "famID", personID = "ID", momID = "momID", - dadID = "dadID" + dadID = "dadID", + code_male = NULL, + code_female = NULL ) } \arguments{ @@ -37,6 +39,10 @@ checkParentIDs( \item{momID}{Character. Column name for maternal IDs.} \item{dadID}{Character. Column name for paternal IDs.} + +\item{code_male}{Value representing male sex} + +\item{code_female}{Value representing female sex} } \value{ Depending on the value of `repair`, either a list containing validation results or a repaired dataframe is returned. diff --git a/man/checkSex.Rd b/man/checkSex.Rd index 866ee531..bca2fada 100644 --- a/man/checkSex.Rd +++ b/man/checkSex.Rd @@ -50,7 +50,7 @@ If `repair = TRUE`, the function standardizes sex coding by: This function uses the terms 'male' and 'female' in a biological context, referring to chromosomal and other biologically-based characteristics necessary for constructing genetic pedigrees. The biological aspect of sex used in genetic analysis (genotype) is distinct from the broader, richer concept of gender identity (phenotype). -We recognize the importance of using language and methodologies that affirm and respect the full spectrum of gender identities. +We recognize the importance of using language and methodologies that affirm and respect the full spectrum of gender identities. The developers of this package express unequivocal support for folx in the transgender and LGBTQ+ communities. } diff --git a/man/recodeSex.Rd b/man/recodeSex.Rd index 076ffa10..6781ef4d 100644 --- a/man/recodeSex.Rd +++ b/man/recodeSex.Rd @@ -51,7 +51,7 @@ If `repair = TRUE`, the function standardizes sex coding by: This function uses the terms 'male' and 'female' in a biological context, referring to chromosomal and other biologically-based characteristics necessary for constructing genetic pedigrees. The biological aspect of sex used in genetic analysis (genotype) is distinct from the broader, richer concept of gender identity (phenotype). -We recognize the importance of using language and methodologies that affirm and respect the full spectrum of gender identities. +We recognize the importance of using language and methodologies that affirm and respect the full spectrum of gender identities. The developers of this package express unequivocal support for folx in the transgender and LGBTQ+ communities. } diff --git a/man/repairSex.Rd b/man/repairSex.Rd index 5c8e1ae0..ca3aa26f 100644 --- a/man/repairSex.Rd +++ b/man/repairSex.Rd @@ -34,7 +34,7 @@ If `repair = TRUE`, the function standardizes sex coding by: This function uses the terms 'male' and 'female' in a biological context, referring to chromosomal and other biologically-based characteristics necessary for constructing genetic pedigrees. The biological aspect of sex used in genetic analysis (genotype) is distinct from the broader, richer concept of gender identity (phenotype). -We recognize the importance of using language and methodologies that affirm and respect the full spectrum of gender identities. +We recognize the importance of using language and methodologies that affirm and respect the full spectrum of gender identities. The developers of this package express unequivocal support for folx in the transgender and LGBTQ+ communities. } From 4060a6236b56cd43926df493564eaf96790af53b Mon Sep 17 00:00:00 2001 From: Mason Garrison <6001608+smasongarrison@users.noreply.github.com> Date: Fri, 2 Jan 2026 13:33:39 -0500 Subject: [PATCH 2/7] Add support for unknown sex code in pedigree functions Introduced the 'code_unknown' parameter to checkSex, repairSex, and recodeSex functions, allowing explicit handling and recoding of unknown sex values. Updated documentation to reflect the new parameter and its usage. Should help address https://github.com/R-Computing-Lab/ggpedigree/issues/95 --- NEWS.md | 2 +- R/checkSex.R | 108 +++++++++++++++++++++++++++++++---------------- man/checkSex.Rd | 3 ++ man/recodeSex.Rd | 6 +++ man/repairSex.Rd | 10 ++++- 5 files changed, 91 insertions(+), 38 deletions(-) diff --git a/NEWS.md b/NEWS.md index 10f4c368..591f9592 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,7 @@ * Added tests for ped2gen * Fixed handling of character ID variables leading to a warning in ped2fam * Added famIDs to phantom parents -* Tweaked how sex coding is handled +* Tweaked how sex coding is handled to allow for unknown sex # BGmisc 1.5.1 ## CRAN submission diff --git a/R/checkSex.R b/R/checkSex.R index ef5b64c7..e3be476e 100644 --- a/R/checkSex.R +++ b/R/checkSex.R @@ -24,6 +24,7 @@ #' @param ped A dataframe representing the pedigree data with a 'sex' column. #' @param code_male The current code used to represent males in the 'sex' column. #' @param code_female The current code used to represent females in the 'sex' column. If both are NULL, no recoding is performed. +#' @param code_unknown The current code used to represent unknown #' @param verbose A logical flag indicating whether to print progress and validation messages to the console. #' @param repair A logical flag indicating whether to attempt repairs on the sex coding. #' @param momID The column name for maternal IDs. Default is "momID". @@ -37,7 +38,10 @@ #' } #' @export #' -checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, repair = FALSE, +checkSex <- function(ped, code_male = NULL, + code_female = NULL, + code_unknown = NULL, + verbose = FALSE, repair = FALSE, momID = "momID", dadID = "dadID") { # Standardize column names in the input dataframe @@ -61,7 +65,6 @@ checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, } - # Are there multiple sexes/genders in the list of dads and moms? dad_results <- checkParentSex(ped, parent_col = dadID, verbose = verbose) @@ -92,7 +95,11 @@ checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, if (validation_results$sex_length == 2) { # Recode all dads to the most frequent male value - ped <- recodeSex(ped, code_male = validation_results$most_frequent_sex_dad) + ped <- recodeSex(ped, + code_male = validation_results$most_frequent_sex_dad, + code_female = validation_results$most_frequent_sex_mom, + code_unknown = code_unknown + ) # Count and record the change num_changes <- sum(original_ped$sex != ped$sex) # Record the change and the count @@ -128,8 +135,16 @@ checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, #' @export #' #' @seealso \code{\link{checkSex}} -repairSex <- function(ped, verbose = FALSE, code_male = NULL, code_female = NULL) { - checkSex(ped = ped, verbose = verbose, repair = TRUE, code_male = code_male, code_female = code_female) +repairSex <- function(ped, verbose = FALSE, + code_male = NULL, + code_female = NULL, + code_unknown = NULL) { + checkSex( + ped = ped, verbose = verbose, repair = TRUE, + code_male = code_male, + code_female = code_female, + code_unknown = code_unknown + ) } #' Recodes Sex Variable in a Pedigree Dataframe @@ -142,51 +157,72 @@ repairSex <- function(ped, verbose = FALSE, code_male = NULL, code_female = NULL #' @param recode_na The value to use for missing values. Default is NA_character_ #' @param recode_male The value to use for males. Default is "M" #' @param recode_female The value to use for females. Default is "F" +#' @param recode_unknown The value to use for unknown values. Default is "U" #' @inherit checkSex details #' @return A modified version of the input data.frame \code{ped}, containing an additional or modified 'sex_recode' column where the 'sex' values are recoded according to \code{code_male}. NA values in the 'sex' column are preserved. #' @export recodeSex <- function( - ped, verbose = FALSE, code_male = NULL, code_na = NULL, code_female = NULL, - recode_male = "M", recode_female = "F", recode_na = NA_character_) { + ped, verbose = FALSE, code_male = NULL, code_na = NULL, code_female = NULL, + code_unknown = NULL, + recode_male = "M", + recode_female = "F", + recode_unknown = "U", + recode_na = NA_character_ +) { + if (is.null(code_male) && is.null(code_female)) { + if (verbose == TRUE) { + warning("Both code male and code female are empty. No recoding was done.") + } + return(ped) + } + # First, set any code_na values to NA if (!is.null(code_na)) { ped$sex[ped$sex == code_na] <- NA } - # Recode as "F" or "M" based on code_male, preserving NAs - if (!is.null(code_male) && !is.null(code_female)) { - # Initialize sex_recode as NA, preserving the length of the 'sex' column - ped$sex_recode <- recode_na - ped$sex_recode[ped$sex == code_female] <- recode_female - ped$sex_recode[ped$sex == code_male] <- recode_male - # Overwriting temp recode variable - ped$sex <- ped$sex_recode - ped$sex_recode <- NULL - } else if (!is.null(code_male) && is.null(code_female)) { - # Initialize sex_recode as NA, preserving the length of the 'sex' column - ped$sex_recode <- recode_na - ped$sex_recode[ped$sex != code_male & !is.na(ped$sex)] <- recode_female + + # Initialize sex_recode as NA, preserving the length of the 'sex' column + ped$sex_recode <- recode_na + if (!is.null(code_male)) { ped$sex_recode[ped$sex == code_male] <- recode_male - # Overwriting temp recode variable - ped$sex <- ped$sex_recode - ped$sex_recode <- NULL - } else if (is.null(code_male) && !is.null(code_female)) { - # Initialize sex_recode as NA, preserving the length of the 'sex' column - ped$sex_recode <- recode_na - ped$sex_recode[ped$sex != code_female & !is.na(ped$sex)] <- recode_male + } + if (!is.null(code_female)) { ped$sex_recode[ped$sex == code_female] <- recode_female - # Overwriting temp recode variable - ped$sex <- ped$sex_recode - ped$sex_recode <- NULL - } else { - if (verbose == TRUE) { - warning("Both code male and code female are empty. No recoding was done.") + } + + # handle unknown codes + if (!is.null(code_unknown)) { + ped$sex_recode[ped$sex == code_unknown] <- recode_unknown + } else if (!is.null(code_male) && !is.null(code_female)) { + ped$sex_recode[!ped$sex %in% c(code_male, code_female) & !is.na(ped$sex)] <- recode_unknown + } + + + # Handle cases where only one of code + # just male + if (!is.null(code_male) && is.null(code_female)) { + if (!is.null(code_unknown)) { + ped$sex_recode[ped$sex != code_male & !is.na(ped$sex) & ped$sex != code_unknown] <- recode_female + } else if (is.null(code_unknown)) { + ped$sex_recode[ped$sex != code_male & !is.na(ped$sex)] <- recode_female } } + # just female + if (is.null(code_male) && !is.null(code_female)) { + if (!is.null(code_unknown)) { + ped$sex_recode[ped$sex != code_female & !is.na(ped$sex) & ped$sex != code_unknown] <- recode_male + } else if (is.null(code_unknown)) { + ped$sex_recode[ped$sex != code_female & !is.na(ped$sex)] <- recode_male + } + } + + # Overwriting temp recode variable + ped$sex <- ped$sex_recode + ped$sex_recode <- NULL return(ped) } - #' Check Parental Role Sex Consistency #' #' Validates sex coding consistency for a given parental role (momID or dadID). @@ -225,9 +261,9 @@ checkParentSex <- function(ped, parent_col, sex_col = "sex", verbose = FALSE) { # Store the most frequent sex for moms and dads modal_sex <- names(sort(table(parent_sexes), decreasing = TRUE))[1] - if(all(is.na(modal_sex)) && verbose == TRUE) { + if (all(is.na(modal_sex)) && verbose == TRUE) { cat(paste0("All parents in role ", parent_col, " have missing sex values.\n")) -} + } # Type coercion based on ped$sex type if (is.numeric(ped[[sex_col]])) { diff --git a/man/checkSex.Rd b/man/checkSex.Rd index bca2fada..6c6d58d4 100644 --- a/man/checkSex.Rd +++ b/man/checkSex.Rd @@ -8,6 +8,7 @@ checkSex( ped, code_male = NULL, code_female = NULL, + code_unknown = NULL, verbose = FALSE, repair = FALSE, momID = "momID", @@ -21,6 +22,8 @@ checkSex( \item{code_female}{The current code used to represent females in the 'sex' column. If both are NULL, no recoding is performed.} +\item{code_unknown}{The current code used to represent unknown} + \item{verbose}{A logical flag indicating whether to print progress and validation messages to the console.} \item{repair}{A logical flag indicating whether to attempt repairs on the sex coding.} diff --git a/man/recodeSex.Rd b/man/recodeSex.Rd index 6781ef4d..728f50a9 100644 --- a/man/recodeSex.Rd +++ b/man/recodeSex.Rd @@ -10,8 +10,10 @@ recodeSex( code_male = NULL, code_na = NULL, code_female = NULL, + code_unknown = NULL, recode_male = "M", recode_female = "F", + recode_unknown = "U", recode_na = NA_character_ ) } @@ -26,10 +28,14 @@ recodeSex( \item{code_female}{The current code used to represent females in the 'sex' column. If both are NULL, no recoding is performed.} +\item{code_unknown}{The current code used to represent unknown} + \item{recode_male}{The value to use for males. Default is "M"} \item{recode_female}{The value to use for females. Default is "F"} +\item{recode_unknown}{The value to use for unknown values. Default is "U"} + \item{recode_na}{The value to use for missing values. Default is NA_character_} } \value{ diff --git a/man/repairSex.Rd b/man/repairSex.Rd index ca3aa26f..aa0fd5ec 100644 --- a/man/repairSex.Rd +++ b/man/repairSex.Rd @@ -4,7 +4,13 @@ \alias{repairSex} \title{Repairs Sex Coding in a Pedigree Dataframe} \usage{ -repairSex(ped, verbose = FALSE, code_male = NULL, code_female = NULL) +repairSex( + ped, + verbose = FALSE, + code_male = NULL, + code_female = NULL, + code_unknown = NULL +) } \arguments{ \item{ped}{A dataframe representing the pedigree data with a 'sex' column.} @@ -14,6 +20,8 @@ repairSex(ped, verbose = FALSE, code_male = NULL, code_female = NULL) \item{code_male}{The current code used to represent males in the 'sex' column.} \item{code_female}{The current code used to represent females in the 'sex' column. If both are NULL, no recoding is performed.} + +\item{code_unknown}{The current code used to represent unknown} } \value{ A modified version of the input data.frame \code{ped}, containing an additional or modified 'sex_recode' column where the 'sex' values are recoded according to \code{code_male}. NA values in the 'sex' column are preserved. From 63182abea6980832954772abe99e7c5b175c452c Mon Sep 17 00:00:00 2001 From: Mason Garrison <6001608+smasongarrison@users.noreply.github.com> Date: Fri, 2 Jan 2026 14:29:33 -0500 Subject: [PATCH 3/7] Update checkSex.R --- R/checkSex.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/checkSex.R b/R/checkSex.R index e3be476e..9745060e 100644 --- a/R/checkSex.R +++ b/R/checkSex.R @@ -183,6 +183,8 @@ recodeSex <- function( # Initialize sex_recode as NA, preserving the length of the 'sex' column ped$sex_recode <- recode_na + + if (!is.null(code_male)) { ped$sex_recode[ped$sex == code_male] <- recode_male } @@ -191,8 +193,10 @@ recodeSex <- function( } # handle unknown codes - if (!is.null(code_unknown)) { + if (!is.null(code_unknown) && !is.na(code_unknown)) { ped$sex_recode[ped$sex == code_unknown] <- recode_unknown + } else if (!is.null(code_unknown) && is.na(code_unknown)) { + ped$sex_recode[is.na(ped$sex)] <- recode_unknown } else if (!is.null(code_male) && !is.null(code_female)) { ped$sex_recode[!ped$sex %in% c(code_male, code_female) & !is.na(ped$sex)] <- recode_unknown } From f0ae8411c0ddbe753d07c947f3a1b11f704a1353 Mon Sep 17 00:00:00 2001 From: Mason Garrison <6001608+smasongarrison@users.noreply.github.com> Date: Fri, 2 Jan 2026 14:41:33 -0500 Subject: [PATCH 4/7] Update NEWS.md --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 591f9592..6737cbd6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# BGmisc beta 1.5.2 +# BGmisc 1.5.2 * More flexible ID generation for simulatePedigree * Created ped2gen function to extract generation information from pedigree data.frames * Added tests for ped2gen From e37cd10bf178cf4291c7ad7739593ecf556fa02d Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Fri, 2 Jan 2026 15:31:53 -0500 Subject: [PATCH 5/7] Update R/checkSex.R Update R/checkParents.R Update R/checkParents.R Co-Authored-By: Copilot <175728472+Copilot@users.noreply.github.com> --- R/checkParents.R | 10 ++++++---- R/checkSex.R | 2 +- man/checkParentIDs.Rd | 4 ++-- man/checkSex.Rd | 2 +- man/recodeSex.Rd | 2 +- man/repairSex.Rd | 2 +- 6 files changed, 12 insertions(+), 10 deletions(-) diff --git a/R/checkParents.R b/R/checkParents.R index 807a1987..060553c3 100644 --- a/R/checkParents.R +++ b/R/checkParents.R @@ -16,8 +16,8 @@ #' @param personID Character. Column name for individual IDs. #' @param momID Character. Column name for maternal IDs. #' @param dadID Character. Column name for paternal IDs. -#' @param code_male Value representing male sex -#' @param code_female Value representing female sex +#' @param code_male The code value used to represent male sex in the 'sex' column of \code{ped}. +#' @param code_female The code value used to represent female sex in the 'sex' column of \code{ped}. #' #' @return Depending on the value of `repair`, either a list containing validation results or a repaired dataframe is returned. #' @examples @@ -107,9 +107,11 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, if (!is.null(code_male) && !is.null(code_female)) { validation_results$male_var <- code_male validation_results$female_var <- code_female + validation_results$sex_code_source <- "user_provided_codes" } else { - validation_results$female_var <- mom_results$modal_sex - validation_results$male_var <- dad_results$modal_sex + validation_results$female_var <- mom_results$modal_sex + validation_results$male_var <- dad_results$modal_sex + validation_results$sex_code_source <- "modal_parent_sex" } # Are any parents in both momID and dadID? momdad <- intersect(ped$dadID, ped$momID) diff --git a/R/checkSex.R b/R/checkSex.R index 9745060e..4090bbbb 100644 --- a/R/checkSex.R +++ b/R/checkSex.R @@ -24,7 +24,7 @@ #' @param ped A dataframe representing the pedigree data with a 'sex' column. #' @param code_male The current code used to represent males in the 'sex' column. #' @param code_female The current code used to represent females in the 'sex' column. If both are NULL, no recoding is performed. -#' @param code_unknown The current code used to represent unknown +#' @param code_unknown The current code used to represent unknown sex values in the 'sex' column. #' @param verbose A logical flag indicating whether to print progress and validation messages to the console. #' @param repair A logical flag indicating whether to attempt repairs on the sex coding. #' @param momID The column name for maternal IDs. Default is "momID". diff --git a/man/checkParentIDs.Rd b/man/checkParentIDs.Rd index 84852d03..c175f4e8 100644 --- a/man/checkParentIDs.Rd +++ b/man/checkParentIDs.Rd @@ -40,9 +40,9 @@ checkParentIDs( \item{dadID}{Character. Column name for paternal IDs.} -\item{code_male}{Value representing male sex} +\item{code_male}{The code value used to represent male sex in the 'sex' column of \code{ped}.} -\item{code_female}{Value representing female sex} +\item{code_female}{The code value used to represent female sex in the 'sex' column of \code{ped}.} } \value{ Depending on the value of `repair`, either a list containing validation results or a repaired dataframe is returned. diff --git a/man/checkSex.Rd b/man/checkSex.Rd index 6c6d58d4..ac575410 100644 --- a/man/checkSex.Rd +++ b/man/checkSex.Rd @@ -22,7 +22,7 @@ checkSex( \item{code_female}{The current code used to represent females in the 'sex' column. If both are NULL, no recoding is performed.} -\item{code_unknown}{The current code used to represent unknown} +\item{code_unknown}{The current code used to represent unknown sex values in the 'sex' column.} \item{verbose}{A logical flag indicating whether to print progress and validation messages to the console.} diff --git a/man/recodeSex.Rd b/man/recodeSex.Rd index 728f50a9..6c2b50d0 100644 --- a/man/recodeSex.Rd +++ b/man/recodeSex.Rd @@ -28,7 +28,7 @@ recodeSex( \item{code_female}{The current code used to represent females in the 'sex' column. If both are NULL, no recoding is performed.} -\item{code_unknown}{The current code used to represent unknown} +\item{code_unknown}{The current code used to represent unknown sex values in the 'sex' column.} \item{recode_male}{The value to use for males. Default is "M"} diff --git a/man/repairSex.Rd b/man/repairSex.Rd index aa0fd5ec..553c2312 100644 --- a/man/repairSex.Rd +++ b/man/repairSex.Rd @@ -21,7 +21,7 @@ repairSex( \item{code_female}{The current code used to represent females in the 'sex' column. If both are NULL, no recoding is performed.} -\item{code_unknown}{The current code used to represent unknown} +\item{code_unknown}{The current code used to represent unknown sex values in the 'sex' column.} } \value{ A modified version of the input data.frame \code{ped}, containing an additional or modified 'sex_recode' column where the 'sex' values are recoded according to \code{code_male}. NA values in the 'sex' column are preserved. From 55c2afc01249934be1970f847aa27c1d5f2c1e01 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Fri, 2 Jan 2026 15:43:22 -0500 Subject: [PATCH 6/7] Add test coverage for code_unknown and recode_unknown parameters in recodeSex (#107) * Update R/checkParents.R Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update R/checkParents.R Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Initial plan * Add comprehensive test coverage for code_unknown and recode_unknown parameters Co-authored-by: smasongarrison <6001608+smasongarrison@users.noreply.github.com> * Address code review feedback: improve documentation and test comments Co-authored-by: smasongarrison <6001608+smasongarrison@users.noreply.github.com> * Remove brittle line number references from test comments Co-authored-by: smasongarrison <6001608+smasongarrison@users.noreply.github.com> --------- Co-authored-by: Mason Garrison Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: smasongarrison <6001608+smasongarrison@users.noreply.github.com> --- R/checkSex.R | 2 +- tests/testthat/test-checkSex.R | 182 +++++++++++++++++++++++++++++++++ 2 files changed, 183 insertions(+), 1 deletion(-) diff --git a/R/checkSex.R b/R/checkSex.R index 4090bbbb..9eb8181e 100644 --- a/R/checkSex.R +++ b/R/checkSex.R @@ -24,7 +24,7 @@ #' @param ped A dataframe representing the pedigree data with a 'sex' column. #' @param code_male The current code used to represent males in the 'sex' column. #' @param code_female The current code used to represent females in the 'sex' column. If both are NULL, no recoding is performed. -#' @param code_unknown The current code used to represent unknown sex values in the 'sex' column. +#' @param code_unknown The current code used to represent unknown or ambiguous sex in the 'sex' column. Can be NA to indicate that missing values should be treated as unknown. If NULL and both code_male and code_female are provided, values not matching either will be inferred as unknown. #' @param verbose A logical flag indicating whether to print progress and validation messages to the console. #' @param repair A logical flag indicating whether to attempt repairs on the sex coding. #' @param momID The column name for maternal IDs. Default is "momID". diff --git a/tests/testthat/test-checkSex.R b/tests/testthat/test-checkSex.R index 335aee62..427d37b4 100644 --- a/tests/testthat/test-checkSex.R +++ b/tests/testthat/test-checkSex.R @@ -86,3 +86,185 @@ test_that("Functions handle missing values gracefully", { expect_silent(repairSex(ped_with_na, verbose = FALSE, code_male = "M")) expect_silent(recodeSex(ped_with_na, verbose = FALSE, code_male = "M", code_female = "F")) }) + + +# Test Case 5: Handle code_unknown parameter with explicit value +test_that("recodeSex handles code_unknown parameter when explicitly provided", { + # Create pedigree with unknown sex codes + ped <- data.frame( + ID = c(1, 2, 3, 4, 5, 6), + sex = c("M", "F", "M", "F", "U", "U"), + dadID = c(NA, NA, 1, 1, NA, NA), + momID = c(NA, NA, 2, 2, NA, NA) + ) + + # Test with code_unknown = "U" + recoded_ped <- recodeSex(ped, + code_male = "M", + code_female = "F", + code_unknown = "U", + recode_male = "Male", + recode_female = "Female", + recode_unknown = "Unknown" + ) + + # Check that unknown codes are recoded correctly + expect_equal(recoded_ped$sex[5], "Unknown") + expect_equal(recoded_ped$sex[6], "Unknown") + expect_equal(recoded_ped$sex[1], "Male") + expect_equal(recoded_ped$sex[2], "Female") +}) + + +# Test Case 6: Handle code_unknown when it's NA +test_that("recodeSex handles code_unknown = NA correctly", { + # Create pedigree where NA represents unknown sex + ped <- data.frame( + ID = c(1, 2, 3, 4, 5), + sex = c("M", "F", "M", "F", NA), + dadID = c(NA, NA, 1, 1, NA), + momID = c(NA, NA, 2, 2, NA) + ) + + # Test with code_unknown = NA + recoded_ped <- recodeSex(ped, + code_male = "M", + code_female = "F", + code_unknown = NA, + recode_male = "Male", + recode_female = "Female", + recode_unknown = "Unknown" + ) + + # Check that NA values are recoded to "Unknown" + expect_equal(recoded_ped$sex[5], "Unknown") + expect_equal(recoded_ped$sex[1], "Male") + expect_equal(recoded_ped$sex[2], "Female") +}) + + +# Test Case 7: Infer unknown values from data when code_unknown not provided +test_that("recodeSex infers unknown values when code_unknown is not provided", { + # Create pedigree with values that are neither male nor female + ped <- data.frame( + ID = c(1, 2, 3, 4, 5, 6), + sex = c("M", "F", "M", "F", "X", "?"), + dadID = c(NA, NA, 1, 1, NA, NA), + momID = c(NA, NA, 2, 2, NA, NA) + ) + + # Test without code_unknown - should infer "X" and "?" as unknown + recoded_ped <- recodeSex(ped, + code_male = "M", + code_female = "F", + recode_male = "Male", + recode_female = "Female", + recode_unknown = "Unknown" + ) + + # Check that values not in code_male/code_female are recoded to unknown + expect_equal(recoded_ped$sex[5], "Unknown") + expect_equal(recoded_ped$sex[6], "Unknown") + expect_equal(recoded_ped$sex[1], "Male") + expect_equal(recoded_ped$sex[2], "Female") +}) + + +# Test Case 8: Test recode_unknown parameter variations +test_that("recodeSex respects recode_unknown parameter", { + ped <- data.frame( + ID = c(1, 2, 3, 4, 5), + sex = c("M", "F", "M", "F", "U"), + dadID = c(NA, NA, 1, 1, NA), + momID = c(NA, NA, 2, 2, NA) + ) + + # Test with custom recode_unknown value + recoded_ped <- recodeSex(ped, + code_male = "M", + code_female = "F", + code_unknown = "U", + recode_male = "1", + recode_female = "0", + recode_unknown = "9" + ) + + expect_equal(recoded_ped$sex[5], "9") + expect_equal(recoded_ped$sex[1], "1") + expect_equal(recoded_ped$sex[2], "0") +}) + + +# Test Case 9: Test code_unknown with only code_male provided +test_that("recodeSex handles code_unknown with only code_male", { + ped <- data.frame( + ID = c(1, 2, 3, 4), + sex = c("M", "F", "M", "U"), + dadID = c(NA, NA, 1, NA), + momID = c(NA, NA, 2, NA) + ) + + # Test with only code_male and code_unknown + recoded_ped <- recodeSex(ped, + code_male = "M", + code_unknown = "U", + recode_male = "Male", + recode_female = "Female", + recode_unknown = "Unknown" + ) + + # Check recoding: M->Male, F->Female (inferred), U->Unknown + expect_equal(recoded_ped$sex[1], "Male") + expect_equal(recoded_ped$sex[2], "Female") + expect_equal(recoded_ped$sex[4], "Unknown") +}) + + +# Test Case 10: Test code_unknown with only code_female provided +test_that("recodeSex handles code_unknown with only code_female", { + ped <- data.frame( + ID = c(1, 2, 3, 4), + sex = c("M", "F", "F", "U"), + dadID = c(NA, NA, NA, NA), + momID = c(NA, NA, NA, NA) + ) + + # Test with only code_female and code_unknown + recoded_ped <- recodeSex(ped, + code_female = "F", + code_unknown = "U", + recode_male = "Male", + recode_female = "Female", + recode_unknown = "Unknown" + ) + + # Check recoding: F->Female, M->Male (inferred), U->Unknown + expect_equal(recoded_ped$sex[1], "Male") + expect_equal(recoded_ped$sex[2], "Female") + expect_equal(recoded_ped$sex[4], "Unknown") +}) + + +# Test Case 11: Test numeric codes with code_unknown +test_that("recodeSex handles numeric code_unknown values", { + ped <- data.frame( + ID = c(1, 2, 3, 4, 5), + sex = c(1, 0, 1, 0, 9), + dadID = c(NA, NA, 1, 1, NA), + momID = c(NA, NA, 2, 2, NA) + ) + + # Test with numeric codes + recoded_ped <- recodeSex(ped, + code_male = 1, + code_female = 0, + code_unknown = 9, + recode_male = "M", + recode_female = "F", + recode_unknown = "U" + ) + + expect_equal(recoded_ped$sex[5], "U") + expect_equal(recoded_ped$sex[1], "M") + expect_equal(recoded_ped$sex[2], "F") +}) From fb30e5b85ad6e92411a71d131e20a699ea0312f3 Mon Sep 17 00:00:00 2001 From: Mason Garrison <6001608+smasongarrison@users.noreply.github.com> Date: Fri, 2 Jan 2026 15:47:05 -0500 Subject: [PATCH 7/7] docs --- man/checkSex.Rd | 2 +- man/recodeSex.Rd | 2 +- man/repairSex.Rd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/man/checkSex.Rd b/man/checkSex.Rd index ac575410..fb1cfe24 100644 --- a/man/checkSex.Rd +++ b/man/checkSex.Rd @@ -22,7 +22,7 @@ checkSex( \item{code_female}{The current code used to represent females in the 'sex' column. If both are NULL, no recoding is performed.} -\item{code_unknown}{The current code used to represent unknown sex values in the 'sex' column.} +\item{code_unknown}{The current code used to represent unknown or ambiguous sex in the 'sex' column. Can be NA to indicate that missing values should be treated as unknown. If NULL and both code_male and code_female are provided, values not matching either will be inferred as unknown.} \item{verbose}{A logical flag indicating whether to print progress and validation messages to the console.} diff --git a/man/recodeSex.Rd b/man/recodeSex.Rd index 6c2b50d0..3da597bf 100644 --- a/man/recodeSex.Rd +++ b/man/recodeSex.Rd @@ -28,7 +28,7 @@ recodeSex( \item{code_female}{The current code used to represent females in the 'sex' column. If both are NULL, no recoding is performed.} -\item{code_unknown}{The current code used to represent unknown sex values in the 'sex' column.} +\item{code_unknown}{The current code used to represent unknown or ambiguous sex in the 'sex' column. Can be NA to indicate that missing values should be treated as unknown. If NULL and both code_male and code_female are provided, values not matching either will be inferred as unknown.} \item{recode_male}{The value to use for males. Default is "M"} diff --git a/man/repairSex.Rd b/man/repairSex.Rd index 553c2312..2f79b2b6 100644 --- a/man/repairSex.Rd +++ b/man/repairSex.Rd @@ -21,7 +21,7 @@ repairSex( \item{code_female}{The current code used to represent females in the 'sex' column. If both are NULL, no recoding is performed.} -\item{code_unknown}{The current code used to represent unknown sex values in the 'sex' column.} +\item{code_unknown}{The current code used to represent unknown or ambiguous sex in the 'sex' column. Can be NA to indicate that missing values should be treated as unknown. If NULL and both code_male and code_female are provided, values not matching either will be inferred as unknown.} } \value{ A modified version of the input data.frame \code{ped}, containing an additional or modified 'sex_recode' column where the 'sex' values are recoded according to \code{code_male}. NA values in the 'sex' column are preserved.