Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions R/databasesFromAndToCSV.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ omopVocabularyCSVsToDuckDB <- function(
"CONCEPT_RELATIONSHIP",
"CONCEPT_SYNONYM",
"DOMAIN",
"DRUG_STRENGTH",
"DRUG_STRENGTH",
"RELATIONSHIP",
"VOCABULARY"
)
Expand Down Expand Up @@ -55,7 +55,7 @@ omopVocabularyCSVsToDuckDB <- function(
sql = sql,
targetDialect = "duckdb"
)

# Fix DuckDB data type issues: replace NUMERIC with DOUBLE for float columns
# This prevents precision errors when importing large numeric values
sql <- gsub("NUMERIC NULL", "DOUBLE NULL", sql)
Expand Down Expand Up @@ -130,14 +130,14 @@ duckdbToOMOPVocabularyCSVs <- function(
for (table_name in OMOPVocabularyTableNames) {
message("Exporting table: ", table_name)
out_path <- file.path(pathToOMOPVocabularyCSVsFolder, paste0(table_name, ".csv"))

col_info <- DBI::dbGetQuery(
connection,
paste0("PRAGMA table_info(", table_name, ");")
)
cols <- col_info$name
date_cols <- col_info$name[grepl("^date$", tolower(col_info$type))]

select_cols <- sapply(cols, function(col) {
if (col %in% date_cols) {
paste0("STRFTIME('%Y%m%d', ", col, ") AS ", col)
Expand All @@ -147,7 +147,7 @@ duckdbToOMOPVocabularyCSVs <- function(
})

select_sql <- paste(select_cols, collapse = ", ")
sql <- paste0("COPY (SELECT ", select_sql, " FROM ", table_name, ") TO '", out_path, "' (HEADER, DELIM '\t');")
sql <- paste0("COPY (SELECT ", select_sql, " FROM ", table_name, ") TO '", out_path, "' (HEADER, DELIM '\t', QUOTE '');")
DatabaseConnector::dbExecute(connection, sql)
}

Expand Down
2 changes: 2 additions & 0 deletions R/validateUsagiFile.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#' - Check if all default Usagi columns are present:
#' - Check if sourceCode and conceptId are unique
#' - Check if sourceCode is not empty
#' - Check if sourceCode is less than 50 characters
#' - Check if sourceName is not empty
#' - Check if sourceName is less than 255 characters
#' If usagi file has C&CR columns:
Expand Down Expand Up @@ -96,6 +97,7 @@ validateUsagiFile <- function(
validationRules <- validate::validator(
SourceCode.is.empty = is_complete(sourceCode),
SourceCode.and.conceptId.are.not.unique = is_unique(sourceCode, conceptId),
SourceCode.is.more.than.50.characters = field_length(sourceCode, min = 0, max = 50),
SourceName.is.empty = is_complete(sourceName),
SourceName.is.more.than.255.characters = field_length(sourceName, min = 0, max = 255),
SourceFrequency.is.not.empty = is_complete(sourceFrequency),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ A01.0+G01,[SourceCode and conceptId are not unique]Meningitis (in) typhoid fever
A01.0+G01,[SourceCode and conceptId are not unique]Meningitis (in) typhoid fever,-1,,2000500101,Lavantautiin liittyvä aivokalvotulehdus,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A01|A01.0|G01,ICD10|ICD10|ICD10,0,APPROVED,EQUAL,PKo,1666794379045,4100102,Meningitis due to typhoid fever,Condition,MAPS_TO,,TAYS,1623974400000,,,
A01.0+J17.0,,-1,,2000500103,Lavantautiin liittyvä keuhkokuume,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A01|A01.0|J17.0,ICD10|ICD10|ICD10,0,APPROVED,EQUAL,PKo,1666794388143,4166072,Pneumonia in typhoid fever,Condition,MAPS_TO,,TAYS,1623974400000,,,
A01.4+M01.3,[SourceName is more than 255 characters]Arthritis in typhoid or paratyphoid fever ad [SourceName is more than 255 characters]Arthritis in typhoid or paratyphoid fever ad [SourceName is more than 255 characters]Arthritis in typhoid or paratyphoid fever ad ddd,-1,,2000500104,Lavantautiin tai pikkulavantautiin liittyvä nivelinfektio,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A01|A01.4|M01.3,ICD10|ICD10|ICD10,0.78,APPROVED,EQUIVALENT,PKo,1666806100347,80316,Salmonella arthritis,Condition,MAPS_TO,,PKo,1666806094598,,,
A01234567890123456789012345678901234567890123456789X,[SourceCode is more than 50 characters]Test entry for sourceCode length validation,-1,,2000500999,Test entry,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A01,ICD10,0,APPROVED,EQUAL,PKo,1666805697461,4100102,Meningitis due to typhoid fever,Condition,MAPS_TO,,TAYS,1623974400000,,,
A02.2+G01,[APPROVED mappingStatus conceptId is 0]Salmonella meningitis,-1,,2000500105,Salmonellan aiheuttama aivokalvotulehdus,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A02|A02.2|G01,ICD10|ICD10|ICD10,0,APPROVED,EQUAL,PKo,1666794600409,0,Salmonella meningitis,Condition,MAPS_TO,,TAYS,1623974400000,,,
A17.0+G01,[APPROVED mappingStatus with concepts outdated]Tuberculous meningitis,-1,,2000500115,Tuberkuloottinen meningiitti,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A17|A17.0|G01,ICD10|ICD10|ICD10,0,APPROVED,EQUAL,PKo,1669304049249,1234,Tuberculosis of meninges,Condition,MAPS_TO,,PKo,1666804429398,,,
A17.1+G07,[APPROVED mappingStatus with concepts outdated]Meningeal tuberculoma,-1,,2000500116,Aivokalvojen tuberkulooma,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A17|A17.1|G07,ICD10|ICD10|ICD10,0,APPROVED,EQUAL,PKo,1669304054597,1234,Tuberculoma of meninges,Condition,MAPS_TO,,TAYS,1623974400000,,,
Expand Down
8 changes: 8 additions & 0 deletions tests/testthat/test-validateUsagiFile.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,14 @@ test_that("test validateUsagiFile returns errors with the errored usagi file", {
validatedUsagiFile |> dplyr::filter(is.na(sourceName)) |> dplyr::pull(mappingStatus) |>
expect_equal("FLAGGED")

# SourceCode is more than 50 characters
validationsSummary |> dplyr::filter(step == "SourceCode is more than 50 characters") |> nrow() |> expect_equal(1)
validatedUsagiFile |> dplyr::filter(stringr::str_detect(sourceName, "SourceCode is more than 50 characters")) |> nrow() |> expect_equal(1)
validatedUsagiFile |> dplyr::filter(stringr::str_detect(sourceName, "SourceCode is more than 50 characters")) |> dplyr::pull(`ADD_INFO:validationMessages`) |>
expect_equal("ERROR: SourceCode is more than 50 characters")
validatedUsagiFile |> dplyr::filter(stringr::str_detect(sourceName, "SourceCode is more than 50 characters")) |> dplyr::pull(mappingStatus) |>
expect_equal("FLAGGED")

# SourceName is more than 255 characters
validationsSummary |> dplyr::filter(step == "SourceName is more than 255 characters") |> nrow() |> expect_equal(1)
validatedUsagiFile |> dplyr::filter(stringr::str_detect(sourceName, "SourceName is more than 255 characters")) |> nrow() |> expect_equal(1)
Expand Down
Loading