diff --git a/DESCRIPTION b/DESCRIPTION
index 8f4a851..6e14690 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -39,7 +39,8 @@ Imports:
     readr,
     ggh4x,
     data.table,
-    rlang
+    rlang,
+    duckdb
 Depends: 
     R (>= 3.4.1), 
     SummarizedExperiment,
diff --git a/NAMESPACE b/NAMESPACE
index ac7b6ec..3d95b18 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -186,6 +186,7 @@ import(methods)
 importClassesFrom(RSQLite,SQLiteConnection)
 importClassesFrom(S4Vectors,DFrame)
 importClassesFrom(S4Vectors,DataFrame)
+importClassesFrom(duckdb,duckdb_connection)
 importFrom(S4Vectors,setValidity2)
 importFrom(magrittr,"%$%")
 importFrom(magrittr,"%>%")
diff --git a/R/aggDb.R b/R/aggDb.R
index d3f6cb7..5eabd9f 100644
--- a/R/aggDb.R
+++ b/R/aggDb.R
@@ -14,7 +14,7 @@ aggdb <- function(path) {
   }
   tryCatch(
     {
-      con <- DBI::dbConnect(RSQLite::SQLite(), path)
+      con <- DBI::dbConnect(DBI::dbDriver("SQLite"), path)
     },
     error = function(e) {
       stop(sprintf("Invalid aggdb path '%s'", path), call. = FALSE)
diff --git a/R/allClasses.R b/R/allClasses.R
index 4fa1deb..d5a9b68 100644
--- a/R/allClasses.R
+++ b/R/allClasses.R
@@ -92,7 +92,7 @@ setClass("genoMatrix", contains = "SummarizedExperiment")
 #' @description
 #' Compressed SQLite (".gdb") representation of genotype data and associated variant annotation and sample info tables.
 #' These allow for rapid and memory-efficient loading of sample genotype data and associated metadata within R.
-#' The slots of the gdb class are inherited entirely from the [`RSQLite::SQLiteConnection-class`].
+#' The slots of the gdb class are inherited entirely from the duckdb::duckdb() connection.
 #' A host of RVAT methods described here allow for convenient querying and manipulation of a gdb, for complex queries
 #' users can also directly perform SQL queries on the gdb as exemplified in the examples and tutorials.
 #'
@@ -154,9 +154,9 @@ NULL
 #'
 #' @rdname gdb
 #' @import DBI
-#' @importClassesFrom RSQLite SQLiteConnection
+#' @importClassesFrom duckdb duckdb_connection
 #' @export
-setClass("gdb", contains = "SQLiteConnection")
+setClass("gdb", contains = "duckdb_connection")
 
 
 #' Class to manage multiple varSets
@@ -706,6 +706,7 @@ NULL
 
 #' @rdname aggdb
 #' @usage NULL
+#' @importClassesFrom RSQLite SQLiteConnection
 #' @export
 setClass("aggdb", contains = "SQLiteConnection")
 
diff --git a/R/allGenerics.R b/R/allGenerics.R
index c7c804c..95c97cd 100644
--- a/R/allGenerics.R
+++ b/R/allGenerics.R
@@ -417,7 +417,7 @@ setGeneric("insertVarRecord", function(object, record) {
   standardGeneric("insertVarRecord")
 })
 
-setGeneric("insertDosageRecord", function(object, record) {
+setGeneric("insertDosageRecord", function(object, record, var_id) {
   standardGeneric("insertDosageRecord")
 })
 
diff --git a/R/gdb-anno-cohort.R b/R/gdb-anno-cohort.R
index 6c70660..4992f56 100644
--- a/R/gdb-anno-cohort.R
+++ b/R/gdb-anno-cohort.R
@@ -54,10 +54,14 @@ setMethod(
 
     # build where statement
     if (!is.null(VAR_id)) {
-      VAR_id <- sprintf(
-        "VAR_id in (%s)",
-        paste(as.integer(VAR_id), collapse = ",")
-      )
+      if (length(VAR_id) > 0L) {
+        VAR_id <- sprintf(
+          "VAR_id in (%s)",
+          paste(as.integer(VAR_id), collapse = ",")
+        )
+      } else {
+        VAR_id <- "1=0"
+      }
       if (length(where) > 0L) {
         where <- sprintf("(%s) AND (%s)", VAR_id, where)
       } else {
@@ -221,12 +225,14 @@ setMethod(
     if (verbose) {
       message(sprintf("Loading table '%s' from '%s'", name, value))
     }
-    DBI::dbWriteTable(
-      con = gdb,
-      name = name,
-      value = value,
-      sep = sep,
-      overwrite = TRUE
+    DBI::dbExecute(
+      gdb,
+      sprintf(
+        "CREATE TABLE %s AS SELECT * FROM read_csv_auto('%s', delim='%s')",
+        name,
+        value,
+        sep
+      )
     )
     source_info <- value
   } else {
@@ -605,8 +611,8 @@ setMethod(
     if (meta_anno_exists) {
       DBI::dbExecute(
         object,
-        "DELETE FROM anno WHERE name = :table_to_remove",
-        params = list(table_to_remove = name)
+        "DELETE FROM anno WHERE name = ?",
+        params = list(name)
       )
     }
 
@@ -614,8 +620,8 @@ setMethod(
     if (meta_cohort_exists) {
       DBI::dbExecute(
         object,
-        "DELETE FROM cohort WHERE name = :table_to_remove",
-        params = list(table_to_remove = name)
+        "DELETE FROM cohort WHERE name = ?",
+        params = list(name)
       )
     }
 
diff --git a/R/gdb-buildGdb.R b/R/gdb-buildGdb.R
index 29d668d..7785850 100644
--- a/R/gdb-buildGdb.R
+++ b/R/gdb-buildGdb.R
@@ -25,7 +25,7 @@
 buildGdb <- function(
   vcf,
   output,
-  skipIndexes = FALSE,
+  skipIndexes = FALSE, ## DuckDB does indexing internally, so no need to do it manually as well ##TODO: check is this is true
   skipVarRanges = FALSE,
   overWrite = FALSE,
   genomeBuild = NULL,
@@ -190,7 +190,7 @@ setMethod(
           carrierN <- length(carrierIndex)
           for (ai in seq_along(alleles)) {
             # write variant info
-            insertVarRecord(
+            var_id <- insertVarRecord(
               object,
               record = c(records[i, 1:4], alleles[ai], records[i, 6:9])
             )
@@ -203,14 +203,15 @@ setMethod(
                 collapse = "/"
               )
             }
-            insertDosageRecord(object, record = gtia)
+            insertDosageRecord(object, record = gtia, var_id = var_id)
           }
           # bi-allelic site
         } else {
-          insertVarRecord(object, record = records[i, 1:9])
+          var_id <- insertVarRecord(object, record = records[i, 1:9])
           insertDosageRecord(
             object,
-            record = substr(records[i, -(1:9), drop = FALSE], 1, 3)
+            record = substr(records[i, -(1:9), drop = FALSE], 1, 3),
+            var_id = var_id
           )
         }
       }
@@ -233,32 +234,34 @@ setMethod(
   "insertVarRecord",
   signature = "gdb",
   definition = function(object, record) {
-    DBI::dbExecute(
+    var_id <- DBI::dbGetQuery(
       object,
-      paste0(
-        "insert into var(CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT) ",
-        "values (:chrom, :pos, :id, :ref, :alt, :qual, :flt, :info, :form)"
-      ),
+      "INSERT INTO var(CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+        RETURNING VAR_id;",
       params = list(
-        chrom = record[1],
-        pos = record[2],
-        id = record[3],
-        ref = record[4],
-        alt = record[5],
-        qual = record[6],
-        flt = record[7],
-        info = record[8],
-        form = record[9]
+        record[1], # CHROM
+        as.integer(record[2]), # POS
+        record[3], # ID
+        record[4], # REF
+        record[5], # ALT
+        record[6], # QUAL
+        record[7], # FILTER
+        record[8], # INFO
+        record[9] # FORMAT
       )
-    )
+    )$VAR_id
+
+    return(var_id)
   }
 )
 
 setMethod(
   "insertDosageRecord",
   signature = "gdb",
-  definition = function(object, record) {
-    record[record == "0/0"] <- "0"
+  definition = function(object, record, var_id) {
+    ## record contains the rows of the VCF file, where each row corresponds to a variant, the first 9 columns correspond to headers (CHROM, POS, etc.) and from column 10 the samples. var_id is added so that it can be passed from teh insertVarRecord function
+    record[record == "0/0"] <- "0" ## all different options, if a allele is unkown (due to for example sequence quality), it is set to 0 (the reference allele)
     record[record == "./0"] <- "0"
     record[record == "0/1"] <- "1"
     record[record == "1/0"] <- "1"
@@ -272,8 +275,9 @@ setMethod(
     record[record == "1|1"] <- "2"
     record[record == ".|."] <- "N"
     record[record == ".:0"] <- "N"
-    obs <- sort(unique(c(record)))
+    obs <- sort(unique(c(record))) ## stores all unique genotypes
     if (sum(!obs %in% c("0", "1", "2", "N")) > 0L) {
+      ## if the obs contains other characters than 0, 1, 2, N it leads to an error
       stop(
         sprintf(
           paste0(
@@ -285,12 +289,15 @@ setMethod(
         call. = FALSE
       )
     }
-
+    ## compress
     record <- I(list(memCompress(paste(record, collapse = ""), type = "gzip")))
+
     DBI::dbExecute(
+      ## opens the connection to the gdb
       object,
-      "insert into dosage(GT) values (:record)",
-      params = list(record = record)
+      "INSERT INTO dosage(VAR_id, GT) 
+        VALUES (?, ?)", ## inserts into the var_id, since this is not done automatically with duckDB
+      params = list(var_id, record)
     )
   }
 )
@@ -340,10 +347,10 @@ setMethod(
       # insert GRanges into gdb as blobs per chromosome
       DBI::dbExecute(
         object,
-        statement = "insert into var_ranges(CHROM,ranges) values (:CHROM, :ranges)",
+        statement = "insert into var_ranges(CHROM,ranges) values (?, ?)",
         params = list(
-          CHROM = chrom,
-          ranges = list(serialize(gr, connection = NULL))
+          chrom,
+          list(serialize(gr, connection = NULL))
         )
       )
     }
@@ -373,23 +380,70 @@ setMethod(
   invisible(NULL)
 }
 
+.buildgdb_create_schema <- function(gdb, verbose = TRUE) {
+  ## drop tables if they already exist. This is needed in DuckDB.
+  .drop_duckdb_objects(gdb, verbose = verbose)
 
-.buildgdb_create_schema <- function(gdb) {
-  DBI::dbExecute(
-    gdb,
-    paste0(
-      "create table var (VAR_id integer primary key, CHROM text, POS int, ",
-      "ID text, REF text, ALT text, QUAL text, FILTER text, INFO text, FORMAT text);"
-    )
-  )
-  DBI::dbExecute(gdb, "create table SM (IID text, sex int)")
+  # create sequence
+  DBI::dbExecute(gdb, "CREATE SEQUENCE var_seq START 1;")
+
+  # create table with default VAR_id from sequence
   DBI::dbExecute(
     gdb,
-    "create table dosage (VAR_id integer primary key, GT BLOB);"
+    "
+      CREATE TABLE var (
+        VAR_id INTEGER PRIMARY KEY DEFAULT nextval('var_seq'),
+        CHROM TEXT,
+        POS INT,
+        ID TEXT,
+        REF TEXT,
+        ALT TEXT,
+        QUAL TEXT,
+        FILTER TEXT,
+        INFO TEXT,
+        FORMAT TEXT
+      );"
   )
-  DBI::dbExecute(gdb, "create table anno (name text,value text,date text)")
-  DBI::dbExecute(gdb, "create table cohort (name text,value text,date text)")
-  DBI::dbExecute(gdb, "create table meta (name text,value text)")
+
+  DBI::dbExecute(gdb, "CREATE TABLE SM (IID TEXT, sex INT);")
+  DBI::dbExecute(gdb, "CREATE TABLE dosage (VAR_id INTEGER, GT BLOB);")
+  DBI::dbExecute(gdb, "CREATE TABLE anno (name TEXT,value TEXT,date TEXT);")
+  DBI::dbExecute(gdb, "CREATE TABLE cohort (name TEXT,value TEXT,date TEXT);")
+  DBI::dbExecute(gdb, "CREATE TABLE meta (name TEXT,value TEXT);")
 
   invisible(NULL)
 }
+
+
+.drop_duckdb_objects <- function(gdb, verbose = TRUE) {
+  # drop tables if they exist
+  tables <- DBI::dbListTables(gdb)
+  for (tbl in c(
+    "var",
+    "SM",
+    "dosage",
+    "anno",
+    "cohort",
+    "meta",
+    "var_ranges"
+  )) {
+    if (tbl %in% tables) {
+      if (verbose) {
+        message(sprintf("Dropping existing table '%s'", tbl))
+      }
+      DBI::dbExecute(gdb, sprintf("DROP TABLE IF EXISTS %s;", tbl))
+    }
+  }
+
+  # drop sequence
+  sequences <- DBI::dbGetQuery(
+    gdb,
+    "SELECT sequence_name FROM duckdb_sequences();"
+  )
+  if ("var_seq" %in% sequences$sequence_name) {
+    if (verbose) {
+      message("Dropping existing sequence 'var_seq'")
+    }
+    DBI::dbExecute(gdb, "DROP SEQUENCE var_seq;")
+  }
+}
diff --git a/R/gdb-getGT.R b/R/gdb-getGT.R
index 7f27da4..8755d41 100644
--- a/R/gdb-getGT.R
+++ b/R/gdb-getGT.R
@@ -342,7 +342,7 @@ setMethod(
         "select VAR_id, 'diploid' as ploidy, CHROM, POS, REF ",
         "from var where VAR_id in (%s);"
       ),
-      paste(as.integer(VAR_id), collapse = ",")
+      paste(as.integer(VAR_id), collapse = ",") ##TODO, will this work if VAR_id is very large? or better to split into chunks?
     )
   )
 
@@ -404,7 +404,7 @@ setMethod(
     "select VAR_id, GT from dosage where VAR_id in (%s);",
     paste(as.integer(VAR_id), collapse = ",")
   )
-  GT <- RSQLite::dbGetQuery(object, query)
+  GT <- DBI::dbGetQuery(object, query)
 
   # check if all variants are found
   success <- sum(VAR_id %in% GT[, 1])
diff --git a/R/gdb-shared-helpers.R b/R/gdb-shared-helpers.R
index bdb6756..5c74eed 100644
--- a/R/gdb-shared-helpers.R
+++ b/R/gdb-shared-helpers.R
@@ -61,25 +61,33 @@
   # add rvat version to meta table
   DBI::dbExecute(
     gdb,
-    "INSERT INTO meta VALUES (:name, :value)",
+    "INSERT INTO meta VALUES (?, ?)",
     params = list(
-      name = "rvatVersion",
-      value = as.character(packageVersion("rvat"))
+      "rvatVersion",
+      as.character(packageVersion("rvat"))
     )
   )
 
   # add random identifier
   DBI::dbExecute(
     gdb,
-    "INSERT INTO meta VALUES (:name, :value)",
+    "INSERT INTO meta VALUES (?, ?)",
     params = list(
-      name = "id",
-      value = paste(sample(c(letters, 0:9), 28L, replace = TRUE), collapse = "")
+      "id",
+      paste(sample(c(letters, 0:9), 28L, replace = TRUE), collapse = "")
     )
   )
 
   # add genome build
-  if (!is.null(genomeBuild) && !genomeBuild %in% names(nonPAR)) {
+  if (!is.null(genomeBuild)) {
+    ## added this to insert into genomeBuild into the meta table
+    DBI::dbExecute(
+      gdb,
+      "INSERT INTO meta VALUES (?, ?)",
+      params = list("genomeBuild", genomeBuild)
+    )
+  }
+  if (!genomeBuild %in% names(nonPAR)) {
     warning(
       sprintf(
         paste0(
@@ -95,26 +103,15 @@
     )
   }
 
-  DBI::dbExecute(
-    gdb,
-    "INSERT INTO meta VALUES (:name, :value)",
-    params = list(
-      name = "genomeBuild",
-      value = if (is.null(genomeBuild)) NA_character_ else genomeBuild
-    )
-  )
-
   # add creation date
   DBI::dbExecute(
     gdb,
-    "INSERT INTO meta VALUES (:name, :value)",
+    "INSERT INTO meta VALUES (?, ?)",
     params = list(
-      name = "creationDate",
-      value = as.character(round(Sys.time(), units = "secs"))
+      "creationDate",
+      as.character(round(Sys.time(), units = "secs"))
     )
   )
-
-  invisible(NULL)
 }
 
 
@@ -129,8 +126,11 @@
       as.character(round(Sys.time(), units = "secs"))
     ))
   }
-  DBI::dbExecute(gdb, "create index var_idx on var (VAR_id)")
-  DBI::dbExecute(gdb, "create index var_idx2 on var (CHROM,POS,REF,ALT)")
+  DBI::dbExecute(gdb, "create index IF NOT EXISTS var_idx on var (VAR_id)")
+  DBI::dbExecute(
+    gdb,
+    "create index IF NOT EXISTS var_idx2 on var (CHROM,POS,REF,ALT)"
+  )
 
   # SM indexes
   if (verbose) {
@@ -139,7 +139,7 @@
       as.character(round(Sys.time(), units = "secs"))
     ))
   }
-  DBI::dbExecute(gdb, "create index SM_idx on SM (IID)")
+  DBI::dbExecute(gdb, "create index IF NOT EXISTS SM_idx on SM (IID)")
 
   # dosage indexes
   if (verbose) {
@@ -148,7 +148,12 @@
       as.character(round(Sys.time(), units = "secs"))
     ))
   }
-  DBI::dbExecute(gdb, "create index dosage_idx on dosage (VAR_id)")
+  DBI::dbExecute(
+    gdb,
+    "create index IF NOT EXISTS dosage_idx on dosage (VAR_id)"
+  )
+
+  DBI::dbExecute(gdb, "ANALYZE") ##TODO: SEE IF THIS IS INTERESTING TO USE
 
   # return nothing
   invisible(NULL)
@@ -340,28 +345,28 @@
 
   if (table_name %in% existing_tables) {
     delete_query <- sprintf(
-      "DELETE FROM %s WHERE name = :name",
+      "DELETE FROM %s WHERE name = ?",
       meta_table
     )
     DBI::dbExecute(
       gdb,
       delete_query,
-      params = list(name = table_name)
+      params = list(table_name)
     )
   }
 
   # using :name, :value, :date as placeholders
   insert_query <- sprintf(
-    "INSERT INTO %s (name, value, date) VALUES (:name, :value, :date)",
+    "INSERT INTO %s (name, value, date) VALUES (?, ?, ?)",
     meta_table
   )
   DBI::dbExecute(
     gdb,
     insert_query,
     params = list(
-      name = table_name,
-      value = source_info,
-      date = date()
+      table_name,
+      source_info,
+      date()
     )
   )
 
diff --git a/R/gdb-utils.R b/R/gdb-utils.R
index dc5bd03..6d3fa6d 100755
--- a/R/gdb-utils.R
+++ b/R/gdb-utils.R
@@ -68,7 +68,7 @@ concatGdb <- function(
     if (verbose) {
       message(sprintf("Merging '%s'", gdb_i))
     }
-    DBI::dbExecute(gdb, "attach :src as src", params = list(src = gdb_i))
+    DBI::dbExecute(gdb, sprintf("ATTACH '%s' AS src", gdb_i))
     DBI::dbExecute(gdb, "insert into var select * from src.var order by VAR_id")
     DBI::dbExecute(
       gdb,
@@ -110,8 +110,44 @@ concatGdb <- function(
         as.character(round(Sys.time(), units = "secs"))
       ))
     }
-    DBI::dbExecute(gdb, "update var set VAR_id=rowid")
-    DBI::dbExecute(gdb, "update dosage set VAR_id=rowid")
+    # generate VAR_id map
+    DBI::dbExecute(
+      gdb,
+      "
+  CREATE TEMP TABLE VAR_id_map AS
+  SELECT
+    VAR_id AS old_id,
+    ROW_NUMBER() OVER (
+      ORDER BY VAR_id 
+    ) AS new_id
+  FROM var
+  "
+    )
+
+    # apply the mapping to the var table
+    DBI::dbExecute(
+      gdb,
+      "
+  UPDATE var
+  SET VAR_id = m.new_id
+  FROM VAR_id_map AS m
+  WHERE var.VAR_id = m.old_id
+  "
+    )
+
+    # apply the same mapping to the dosage table
+    DBI::dbExecute(
+      gdb,
+      "
+  UPDATE dosage
+  SET VAR_id = m.new_id
+  FROM VAR_id_map AS m
+  WHERE dosage.VAR_id = m.old_id
+  "
+    )
+
+    # clean up
+    DBI::dbExecute(gdb, "DROP TABLE VAR_id_map")
   }
 
   # generate indexes
@@ -239,7 +275,7 @@ concatGdb <- function(
   if (verbose) {
     message("Creating SM, anno and cohort tables")
   }
-  DBI::dbExecute(gdb, "attach :src as src", params = list(src = gdb_list[1]))
+  DBI::dbExecute(gdb, sprintf("attach '%s' as src", gdb_list[1]))
   DBI::dbExecute(gdb, "create table SM as select * from src.SM")
   DBI::dbExecute(gdb, "detach src")
   DBI::dbExecute(gdb, "create table anno (name text,value text,date text)")
@@ -278,7 +314,7 @@ setMethod(
     )
 
     # list tables
-    master <- DBI::dbGetQuery(object, "select * from sqlite_master")
+    master <- DBI::dbGetQuery(object, "select * from sqlite_master") ##TODO: works with duckdb, but is this the best option?
     tables.base <- gdb_protected_tables[
       !gdb_protected_tables %in% c("var_ranges", "tmp")
     ]
@@ -365,6 +401,7 @@ setMethod(
     addRangedVarinfo(gdb, overwrite = TRUE, verbose = verbose)
 
     ## meta table
+    DBI::dbExecute(gdb, "DROP TABLE IF EXISTS meta")
     DBI::dbExecute(gdb, "create table meta (name text,value text)")
     .gdb_populate_meta_table(
       gdb = gdb,
@@ -542,14 +579,23 @@ setMethod(
   for (copied in tables) {
     indexes <- master[
       master$type == "index" &
-        master$tbl_name == copied,
+        master$tbl_name == copied &
+        !is.na(master$sql),
     ]$sql
 
+    ## skip if there are no indexes (is this necessary, or had the varInfo table always an index for VAR_id?)
+    if (length(indexes) == 0) {
+      next
+    }
+
+    ## duckdb uses schema.table syntax (it must use attach_name.tablename)
     for (index in indexes) {
-      DBI::dbExecute(
-        gdb,
-        gsub("CREATE INDEX ", sprintf("CREATE INDEX %s.", attach_name), index)
+      query <- sub(
+        paste0("ON\\s+", copied, "\\b"),
+        paste0("ON ", attach_name, ".", copied),
+        index
       )
+      DBI::dbExecute(gdb, query)
     }
   }
 
diff --git a/R/gdb.R b/R/gdb.R
index 3611bf8..75ce754 100644
--- a/R/gdb.R
+++ b/R/gdb.R
@@ -4,29 +4,33 @@
 #' @rdname gdb
 #' @usage NULL
 #' @export
-setMethod("show", signature = "gdb", definition = function(object) {
+#'
+setMethod("show", "gdb", function(object) {
   cat("rvat gdb object\n", "Path:", getGdbPath(object), "\n")
 })
 
 #' @rdname gdb
 #' @usage NULL
 #' @export
+#'
 setMethod("close", signature = "gdb", definition = function(con) {
   DBI::dbDisconnect(con)
 })
 
+
 # constructors --------------------------------------------
 
 #' @rdname gdb
 #' @usage NULL
 #' @export
+
 gdb <- function(path) {
   if (!file.exists(path)) {
     stop(sprintf("'%s' doesn't exist.", path), call. = FALSE)
   }
   tryCatch(
     {
-      con <- DBI::dbConnect(DBI::dbDriver("SQLite"), path)
+      con <- DBI::dbConnect(duckdb::duckdb(), path)
     },
     error = function(e) {
       stop(sprintf("Invalid gdb path '%s'", path), call. = FALSE)
@@ -35,10 +39,11 @@ gdb <- function(path) {
   new("gdb", con)
 }
 
+
 gdb_init <- function(path) {
   tryCatch(
     {
-      con <- DBI::dbConnect(DBI::dbDriver("SQLite"), path)
+      con <- DBI::dbConnect(duckdb::duckdb(), path)
     },
     error = function(e) {
       stop(sprintf("Invalid gdb path '%s'", path), call. = FALSE)
@@ -92,7 +97,7 @@ setMethod("getGdbId", signature = "gdb", definition = function(object) {
 #' @usage NULL
 #' @export
 setMethod("getGdbPath", signature = "gdb", definition = function(object) {
-  object@dbname
+  DBI::dbGetInfo(object)$dbname
 })
 
 #' @rdname gdb
@@ -198,7 +203,7 @@ setMethod(
         ranges[x, ][["end"]] + padding
       )
       query <- paste("select * from var where", paste(query, collapse = " or "))
-      query <- RSQLite::dbGetQuery(gdb, query)
+      query <- DBI::dbGetQuery(gdb, query)
       query
     }
     queries <- do.call(
diff --git a/R/mapToCDS.R b/R/mapToCDS.R
index d189114..caf67cd 100755
--- a/R/mapToCDS.R
+++ b/R/mapToCDS.R
@@ -176,7 +176,7 @@ setMethod(
 
 .mapToCDS_get_overlapping_chroms <- function(object, exons) {
   chrom <- unique(as.character(seqnames(exons)))
-  chrom_gdb <- RSQLite::dbGetQuery(object, "select CHROM from var_ranges")$CHROM
+  chrom_gdb <- DBI::dbGetQuery(object, "select CHROM from var_ranges")$CHROM
   if (any(grepl("chr", chrom_gdb, fixed = TRUE))) {
     chrom_gdb <- gsub("chr", "", chrom_gdb, fixed = TRUE)
   }
diff --git a/R/spatialClust.R b/R/spatialClust.R
index 1884a32..0bdfb77 100755
--- a/R/spatialClust.R
+++ b/R/spatialClust.R
@@ -59,7 +59,7 @@ setMethod(
 
     # add grouped concat
     query <- sprintf(
-      "select unit, group_concat(VAR_id) as VAR_id, group_concat(weight) as weight, '%s' as varSetName, group_concat(POS) as POS from (%s) x group by unit",
+      "select unit, group_concat(VAR_id order by VAR_id) as VAR_id, group_concat(weight order by VAR_id) as weight, '%s' as varSetName, group_concat(POS order by VAR_id) as POS from (%s) x group by unit order by unit",
       varSetName,
       query
     )
@@ -77,10 +77,10 @@ setMethod(
       con = output
     )
 
-    handle <- RSQLite::dbSendQuery(object, query)
+    handle <- DBI::dbSendQuery(object, query)
     on.exit(DBI::dbClearResult(handle), add = TRUE)
-    while (!RSQLite::dbHasCompleted(handle)) {
-      chunk <- RSQLite::dbFetch(handle, n = memlimit)
+    while (!DBI::dbHasCompleted(handle)) {
+      chunk <- DBI::dbFetch(handle, n = memlimit)
 
       if (nrow(chunk) == 0L) {
         break
diff --git a/R/varSet.R b/R/varSet.R
index 9a81d92..9054568 100644
--- a/R/varSet.R
+++ b/R/varSet.R
@@ -854,7 +854,7 @@ setMethod(
 
   # add grouped concat
   query <- sprintf(
-    "select unit, group_concat(VAR_id) as VAR_id, group_concat(w) as w, '%s' as varSetName from (%s) x group by unit",
+    "select unit, group_concat(VAR_id order by VAR_id) as VAR_id, group_concat(w order by VAR_id) as w, '%s' as varSetName from (%s) x group by unit order by unit",
     varSetName,
     query
   )
diff --git a/man/ACAT.Rd b/man/ACAT.Rd
index c8e8032..83cde6f 100644
--- a/man/ACAT.Rd
+++ b/man/ACAT.Rd
@@ -65,7 +65,7 @@ library(rvatData)
 # this will first ACAT P-values across statistical tests
 # and then ACAT these P-values across varSets
 data(rvbresults)
-rvbresults <- rvbresults[1:1000,]
+rvbresults <- rvbresults[1:1000, ]
 ACAT(
   rvbresults,
   aggregate = list("test", "varSetName")
@@ -78,8 +78,8 @@ ACAT(
 )
 
 # Input P-value shouldn't be exactly 0 or 1 (see: https://github.com/yaowuliu/ACAT).
-# By default, P-values that are exactly 0 or 1 are reset (`fixpval = TRUE`) to the 
-# minimum P-value (>0) and maximum P-value (<1) in the results. 
+# By default, P-values that are exactly 0 or 1 are reset (`fixpval = TRUE`) to the
+# minimum P-value (>0) and maximum P-value (<1) in the results.
 # Alternatives include:
 # manual minmax values:
 ACAT(
diff --git a/man/aggdb.Rd b/man/aggdb.Rd
index 2488779..dfa09e4 100644
--- a/man/aggdb.Rd
+++ b/man/aggdb.Rd
@@ -66,22 +66,28 @@ gdb <- create_example_gdb()
 
 # generate the aggregates based on a varSetFile
 varsetfile <- varSetFile(rvat_example("rvatData_varsetfile.txt.gz"))
-varset <- getVarSet(varsetfile, unit = c("NEK1", "SOD1", "ABCA4"), varSetName = "High")
+varset <- getVarSet(
+  varsetfile,
+  unit = c("NEK1", "SOD1", "ABCA4"),
+  varSetName = "High"
+)
 aggfile <- tempfile()
-aggregate(x = gdb,
-          varSet = varset,
-          maxMAF = 0.001,
-          output = aggfile,
-          verbose = FALSE)
+aggregate(
+  x = gdb,
+  varSet = varset,
+  maxMAF = 0.001,
+  output = aggfile,
+  verbose = FALSE
+)
 
 # connect to aggdb, see ?aggdb for more details
 aggdb <- aggdb(aggfile)
 
-# list units and samples in aggdb 
+# list units and samples in aggdb
 head(listUnits(aggdb))
 head(listSamples(aggdb))
 
-# retrieve aggregates 
+# retrieve aggregates
 aggregates <- getUnit(aggdb, unit = "SOD1")
 
 # see ?`assocTest-aggdb` for details on running association tests on an aggdb
diff --git a/man/assocTest-aggdb.Rd b/man/assocTest-aggdb.Rd
index 7d69252..a6ad020 100644
--- a/man/assocTest-aggdb.Rd
+++ b/man/assocTest-aggdb.Rd
@@ -75,7 +75,11 @@ gdb <- create_example_gdb()
 # assocTest-aggdb allows for running association tests on pre-constructed aggregates.
 # below we first generate the aggregates based on a varSetFile
 varsetfile <- varSetFile(rvat_example("rvatData_varsetfile.txt.gz"))
-varset <- getVarSet(varsetfile, unit = c("NEK1", "SOD1", "ABCA4"), varSetName = "High")
+varset <- getVarSet(
+  varsetfile,
+  unit = c("NEK1", "SOD1", "ABCA4"),
+  varSetName = "High"
+)
 aggfile <- tempfile()
 aggregate(
   x = gdb,
diff --git a/man/assocTest-gdb.Rd b/man/assocTest-gdb.Rd
index 852a925..1301e24 100644
--- a/man/assocTest-gdb.Rd
+++ b/man/assocTest-gdb.Rd
@@ -178,110 +178,137 @@ varsetfile <- varSetFile(rvat_example("rvatData_varsetfile.txt.gz"))
 
 # for example purposes, upload a small cohort
 cohort <- getCohort(gdb, "pheno")
-cohort <- cohort[cohort$IID \%in\% colnames(GTsmall),]
+cohort <- cohort[cohort$IID \%in\% colnames(GTsmall), ]
 uploadCohort(gdb, name = "phenosmall", value = cohort)
 
 # run a firth burden test on a binary phenotype
-varsetlist <- getVarSet(varsetfile, unit = c("SOD1", "NEK1"), varSetName = "High")
-rvb <- assocTest(gdb,
-                 varSet = varsetlist,
-                 cohort = "phenosmall",
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = "firth",
-                 name = "example")
+varsetlist <- getVarSet(
+  varsetfile,
+  unit = c("SOD1", "NEK1"),
+  varSetName = "High"
+)
+rvb <- assocTest(
+  gdb,
+  varSet = varsetlist,
+  cohort = "phenosmall",
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = "firth",
+  name = "example"
+)
 
 
 # run ACAT-v and SKAT tests
-rvb <- assocTest(gdb,
-                 varSet = varsetlist,
-                 cohort = "phenosmall",
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("acatvfirth", "skat_burden_robust", "skato_robust"),
-                 name = "example")
+rvb <- assocTest(
+  gdb,
+  varSet = varsetlist,
+  cohort = "phenosmall",
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("acatvfirth", "skat_burden_robust", "skato_robust"),
+  name = "example"
+)
 
 # run a burden test on a continuous phenotype
-rvb <- assocTest(gdb,
-                 varSet = varsetlist,
-                 cohort = "phenosmall",
-                 pheno = "age",
-                 continuous = TRUE,
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("lm", "skat", "acatv"),
-                 name = "example")
-
-# run single variant tests on a binary phenotype 
-sv <- assocTest(gdb,
-                varSet = varsetlist,
-                cohort = "phenosmall",
-                pheno = "pheno",
-                singlevar = TRUE,
-                covar = c("PC1", "PC2", "PC3", "PC4"),
-                test = c("firth", "glm", "scoreSPA"),
-                name = "example",
-                minCarriers = 1)
+rvb <- assocTest(
+  gdb,
+  varSet = varsetlist,
+  cohort = "phenosmall",
+  pheno = "age",
+  continuous = TRUE,
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("lm", "skat", "acatv"),
+  name = "example"
+)
+
+# run single variant tests on a binary phenotype
+sv <- assocTest(
+  gdb,
+  varSet = varsetlist,
+  cohort = "phenosmall",
+  pheno = "pheno",
+  singlevar = TRUE,
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("firth", "glm", "scoreSPA"),
+  name = "example",
+  minCarriers = 1
+)
 
 # similarly a list of VAR_ids can be specified instead of a varSetList/varSetFile
-sv <- assocTest(gdb,
-                VAR_id = 1:20,
-                cohort = "phenosmall",
-                pheno = "pheno",
-                singlevar = TRUE,
-                covar = c("PC1", "PC2", "PC3", "PC4"),
-                test = c("firth", "glm", "scoreSPA"),
-                name = "example",
-                minCarriers = 1)
+sv <- assocTest(
+  gdb,
+  VAR_id = 1:20,
+  cohort = "phenosmall",
+  pheno = "pheno",
+  singlevar = TRUE,
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("firth", "glm", "scoreSPA"),
+  name = "example",
+  minCarriers = 1
+)
 
 # apply variant filters
-rvb <- assocTest(gdb,
-                 varSet = varsetlist,
-                 cohort = "phenosmall",
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("firth", "skat_robust", "acatv"),
-                 name = "example", 
-                 maxMAF = 0.05,
-                 minCarriers = 1,
-                 minCallrateVar = 0.9,
-                 minCallrateSM = 0.95)
+rvb <- assocTest(
+  gdb,
+  varSet = varsetlist,
+  cohort = "phenosmall",
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("firth", "skat_robust", "acatv"),
+  name = "example",
+  maxMAF = 0.05,
+  minCarriers = 1,
+  minCallrateVar = 0.9,
+  minCallrateSM = 0.95
+)
 
 # Perform MAF-weighted burden tests (madsen-browning)
-rvb <- assocTest(gdb,
-                 varSet = varsetlist,
-                 cohort = "phenosmall",
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("firth", "skat_robust", "acatv"),
-                 MAFweights = "mb")
+rvb <- assocTest(
+  gdb,
+  varSet = varsetlist,
+  cohort = "phenosmall",
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("firth", "skat_robust", "acatv"),
+  MAFweights = "mb"
+)
 
 # CADD-weighted burden test
-varsetlist_cadd <- getVarSet(varsetfile, unit = c("SOD1", "NEK1"), varSetName = "CADD")
-rvb <- assocTest(gdb,
-                 varSet = varsetlist_cadd,
-                 cohort = "phenosmall",
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("firth", "skat_robust", "acatv"))
+varsetlist_cadd <- getVarSet(
+  varsetfile,
+  unit = c("SOD1", "NEK1"),
+  varSetName = "CADD"
+)
+rvb <- assocTest(
+  gdb,
+  varSet = varsetlist_cadd,
+  cohort = "phenosmall",
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("firth", "skat_robust", "acatv")
+)
 
 # Perform recessive burden test
-rvb <- assocTest(gdb,
-                 varSet = varsetlist,
-                 cohort = "phenosmall",
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("firth", "skat_robust", "acatv"),
-                 geneticModel = "recessive")
-
-# Resampled burden test 
-rvb <- assocTest(gdb,
-                 varSet = varsetlist[1],
-                 cohort = "phenosmall",
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("skat", "skat_burden", "acatv"),
-                 name = "example",
-                 methodResampling = "permutation",
-                 nResampling = 100)
+rvb <- assocTest(
+  gdb,
+  varSet = varsetlist,
+  cohort = "phenosmall",
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("firth", "skat_robust", "acatv"),
+  geneticModel = "recessive"
+)
 
+# Resampled burden test
+rvb <- assocTest(
+  gdb,
+  varSet = varsetlist[1],
+  cohort = "phenosmall",
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("skat", "skat_burden", "acatv"),
+  name = "example",
+  methodResampling = "permutation",
+  nResampling = 100
+)
 }
diff --git a/man/assocTest-genoMatrix.Rd b/man/assocTest-genoMatrix.Rd
index 0660e45..b7ddd6e 100644
--- a/man/assocTest-genoMatrix.Rd
+++ b/man/assocTest-genoMatrix.Rd
@@ -146,79 +146,96 @@ library(rvatData)
 data(GTsmall)
 
 # run a firth burden test on a binary phenotype
-rvb <- assocTest(GTsmall,
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = "firth",
-                 name = "example")
+rvb <- assocTest(
+  GTsmall,
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = "firth",
+  name = "example"
+)
 
 # run ACAT-v and SKAT tests
-rvb <- assocTest(GTsmall,
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("acatvfirth", "skat_burden_robust", "skato_robust"),
-                 name = "example")
+rvb <- assocTest(
+  GTsmall,
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("acatvfirth", "skat_burden_robust", "skato_robust"),
+  name = "example"
+)
 
 # run a burden test on a continuous phenotype
-rvb <- assocTest(GTsmall,
-                 pheno = "age",
-                 continuous = TRUE,
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("lm", "skat", "acatv"),
-                 name = "example")
+rvb <- assocTest(
+  GTsmall,
+  pheno = "age",
+  continuous = TRUE,
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("lm", "skat", "acatv"),
+  name = "example"
+)
 
 # run single variant tests on a binary phenotype
-sv <- assocTest(GTsmall,
-                pheno = "pheno",
-                singlevar = TRUE,
-                covar = c("PC1", "PC2", "PC3", "PC4"),
-                test = c("firth", "glm", "scoreSPA"),
-                name = "example",
-                minCarriers = 1
-                )
+sv <- assocTest(
+  GTsmall,
+  pheno = "pheno",
+  singlevar = TRUE,
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("firth", "glm", "scoreSPA"),
+  name = "example",
+  minCarriers = 1
+)
 
 # apply variant filters
-rvb <- assocTest(GTsmall,
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("firth", "skat_robust", "acatv"),
-                 name = "example",
-                 maxMAF = 0.001,
-                 minCarriers = 2,
-                 minCallrateVar = 0.9,
-                 minCallrateSM = 0.95)
+rvb <- assocTest(
+  GTsmall,
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("firth", "skat_robust", "acatv"),
+  name = "example",
+  maxMAF = 0.001,
+  minCarriers = 2,
+  minCallrateVar = 0.9,
+  minCallrateSM = 0.95
+)
 
 # Perform MAF-weighted burden tests (madsen-browning)
-rvb <- assocTest(GTsmall,
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("firth", "skat_robust", "acatv"),
-                 MAFweights = "mb")
+rvb <- assocTest(
+  GTsmall,
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("firth", "skat_robust", "acatv"),
+  MAFweights = "mb"
+)
 
 # Perform weighted burden tests with custom weights
 gdb <- create_example_gdb()
 # add cadd scores
 caddscores <- getAnno(gdb, "varInfo", VAR_id = rownames(GTsmall))
-caddscores <- caddscores[match(rownames(GTsmall), caddscores$VAR_id),]
+caddscores <- caddscores[match(rownames(GTsmall), caddscores$VAR_id), ]
 # CADD-weighted burden test
-rvb <- assocTest(recode(GTsmall, weights = caddscores$CADDphred),
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("firth", "skat_robust", "acatv"))
+rvb <- assocTest(
+  recode(GTsmall, weights = caddscores$CADDphred),
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("firth", "skat_robust", "acatv")
+)
 
 # Perform recessive burden test
-rvb <- assocTest(GTsmall,
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("firth", "skat_robust", "acatv"),
-                 geneticModel = "recessive")
+rvb <- assocTest(
+  GTsmall,
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("firth", "skat_robust", "acatv"),
+  geneticModel = "recessive"
+)
 
 # Resampled burden test
-rvb <- assocTest(GTsmall,
-                 pheno = "pheno",
-                 covar = c("PC1", "PC2", "PC3", "PC4"),
-                 test = c("skat", "skat_burden", "acatv"),
-                 name = "example",
-                 methodResampling = "permutation",
-                 nResampling = 100)
+rvb <- assocTest(
+  GTsmall,
+  pheno = "pheno",
+  covar = c("PC1", "PC2", "PC3", "PC4"),
+  test = c("skat", "skat_burden", "acatv"),
+  name = "example",
+  methodResampling = "permutation",
+  nResampling = 100
+)
 }
diff --git a/man/buildCorMatrix.Rd b/man/buildCorMatrix.Rd
index 9cc9f75..e1afbf6 100644
--- a/man/buildCorMatrix.Rd
+++ b/man/buildCorMatrix.Rd
@@ -47,15 +47,19 @@ gdb <- create_example_gdb()
 
 # generate the aggregates based on a varSetFile
 varsetfile <- varSetFile(rvat_example("rvatData_varsetfile.txt.gz"))
-varset <- getVarSet(varsetfile, 
-                    unit = c("NEK1", "SOD1", "ABCA4"), 
-                    varSetName = "High")
+varset <- getVarSet(
+  varsetfile,
+  unit = c("NEK1", "SOD1", "ABCA4"),
+  varSetName = "High"
+)
 aggfile <- tempfile()
-aggregate(x = gdb,
-          varSet = varset,
-          maxMAF = 0.001,
-          output = aggfile,
-          verbose = FALSE)
+aggregate(
+  x = gdb,
+  varSet = varset,
+  maxMAF = 0.001,
+  output = aggfile,
+  verbose = FALSE
+)
 
 # build a block-wise correlation matrix
 cormatrix <- buildCorMatrix(
diff --git a/man/buildGeneSet.Rd b/man/buildGeneSet.Rd
index 2ac4821..327466d 100644
--- a/man/buildGeneSet.Rd
+++ b/man/buildGeneSet.Rd
@@ -39,16 +39,18 @@ Currently these can be built directly from GMT-files, data.frames and lists.
 }
 \examples{
 genesetlist <- buildGeneSet(
-  list("geneset1" = c("SOD1", "NEK1"),
-       "geneset2" = c("ABCA4", "SOD1", "NEK1"),
-       "geneset3" = c("FUS", "NEK1")
-       ))
+  list(
+    "geneset1" = c("SOD1", "NEK1"),
+    "geneset2" = c("ABCA4", "SOD1", "NEK1"),
+    "geneset3" = c("FUS", "NEK1")
+  )
+)
 geneset <- genesetlist[[1]]
 
 # list units and weights included in the geneSet
 listUnits(geneset)
 listWeights(geneset)
 
-# note that usually you'll work with geneSets in either a geneSetList or 
+# note that usually you'll work with geneSets in either a geneSetList or
 # a geneSetFile (see ?geneSetList and ?geneSetFile)
 }
diff --git a/man/buildVarSet-gdb.Rd b/man/buildVarSet-gdb.Rd
index 663a55d..57e2b26 100644
--- a/man/buildVarSet-gdb.Rd
+++ b/man/buildVarSet-gdb.Rd
@@ -57,21 +57,25 @@ library(rvatData)
 # Build a varset including variants with a moderate predicted impact
 gdb <- create_example_gdb()
 varsetfile_moderate <- tempfile()
-buildVarSet(object = gdb,
-            output = varsetfile_moderate,
-            varSetName = "Moderate",
-            unitTable = "varInfo",
-            unitName = "gene_name",
-            where = "ModerateImpact = 1")
+buildVarSet(
+  object = gdb,
+  output = varsetfile_moderate,
+  varSetName = "Moderate",
+  unitTable = "varInfo",
+  unitName = "gene_name",
+  where = "ModerateImpact = 1"
+)
 
 # Build a varset that contains CADD scores
 varsetfile_cadd <- tempfile()
-buildVarSet(object = gdb,
-            output = varsetfile_cadd,
-            varSetName = "CADD",
-            unitTable = "varInfo",
-            unitName = "gene_name",
-            weightName = "CADDphred")
+buildVarSet(
+  object = gdb,
+  output = varsetfile_cadd,
+  varSetName = "CADD",
+  unitTable = "varInfo",
+  unitName = "gene_name",
+  weightName = "CADDphred"
+)
 
 # connect to varsetfile and retrieve variant sets
 varsetfile <- varSetFile(varsetfile_moderate)
diff --git a/man/buildVarSet.Rd b/man/buildVarSet.Rd
index 871e0dc..ffa004e 100644
--- a/man/buildVarSet.Rd
+++ b/man/buildVarSet.Rd
@@ -15,21 +15,25 @@ library(rvatData)
 # Build a varSetFile including variants with a moderate predicted impact
 gdb <- create_example_gdb()
 varsetfile_moderate <- tempfile()
-buildVarSet(object = gdb, 
-            output = varsetfile_moderate,
-            varSetName = "Moderate", 
-            unitTable = "varInfo", 
-            unitName = "gene_name",
-            where = "ModerateImpact = 1")
+buildVarSet(
+  object = gdb,
+  output = varsetfile_moderate,
+  varSetName = "Moderate",
+  unitTable = "varInfo",
+  unitName = "gene_name",
+  where = "ModerateImpact = 1"
+)
 
 # Build a varSetFile that contains CADD scores
 varsetfile_cadd <- tempfile()
-buildVarSet(object = gdb, 
-            output = varsetfile_cadd,
-            varSetName = "CADD", 
-            unitTable = "varInfo", 
-            unitName = "gene_name",
-            weightName = "CADDphred")
+buildVarSet(
+  object = gdb,
+  output = varsetfile_cadd,
+  varSetName = "CADD",
+  unitTable = "varInfo",
+  unitName = "gene_name",
+  weightName = "CADDphred"
+)
 
 # in addition to building a varSetFile from a gdb,
 # it can also be build directly from a data.frame
diff --git a/man/collapseAggDbs.Rd b/man/collapseAggDbs.Rd
index c76a23e..5980a4b 100644
--- a/man/collapseAggDbs.Rd
+++ b/man/collapseAggDbs.Rd
@@ -29,18 +29,22 @@ gdb <- create_example_gdb()
 # generate two aggregate files
 varsetfile <- varSetFile(rvat_example("rvatData_varsetfile.txt.gz"))
 aggdb1 <- tempfile()
-aggregate(x = gdb,
-          varSet = getVarSet(varsetfile, unit = c("SOD1", "FUS"), varSetName = "High"),
-          maxMAF = 0.001,
-          output = aggdb1,
-          verbose = FALSE)
+aggregate(
+  x = gdb,
+  varSet = getVarSet(varsetfile, unit = c("SOD1", "FUS"), varSetName = "High"),
+  maxMAF = 0.001,
+  output = aggdb1,
+  verbose = FALSE
+)
 
 aggdb2 <- tempfile()
-aggregate(x = gdb,
-          varSet = getVarSet(varsetfile, unit = c("NEK1"), varSetName = "High"),
-          maxMAF = 0.001,
-          output = aggdb2,
-          verbose = FALSE)
+aggregate(
+  x = gdb,
+  varSet = getVarSet(varsetfile, unit = c("NEK1"), varSetName = "High"),
+  maxMAF = 0.001,
+  output = aggdb2,
+  verbose = FALSE
+)
 
 # collapse aggdbs
 aggdb <- tempfile()
diff --git a/man/gdb.Rd b/man/gdb.Rd
index 9b52327..4173166 100644
--- a/man/gdb.Rd
+++ b/man/gdb.Rd
@@ -26,7 +26,7 @@
 \description{
 Compressed SQLite (".gdb") representation of genotype data and associated variant annotation and sample info tables.
 These allow for rapid and memory-efficient loading of sample genotype data and associated metadata within R.
-The slots of the gdb class are inherited entirely from the \code{\link[RSQLite:SQLiteConnection-class]{RSQLite::SQLiteConnection}}.
+The slots of the gdb class are inherited entirely from the duckdb::duckdb() connection.
 A host of RVAT methods described here allow for convenient querying and manipulation of a gdb, for complex queries
 users can also directly perform SQL queries on the gdb as exemplified in the examples and tutorials.
 }
@@ -91,13 +91,11 @@ gdb and clear from annotation / cohort metadata tables.
 
 \examples{
 library(rvatData)
- 
+
 # build a gdb directly from a vcf file
 vcfpath <- rvat_example("rvatData.vcf.gz") # example vcf from rvatData package
 gdbpath <- tempfile() # write gdb to temporary file
-buildGdb(vcf = vcfpath,
-         output = gdbpath, 
-         genomeBuild = "GRCh38")
+buildGdb(vcf = vcfpath, output = gdbpath, genomeBuild = "GRCh38")
 gdb <- gdb(gdbpath)
 
 # upload variant info
@@ -113,9 +111,11 @@ listAnno(gdb)
 listCohort(gdb)
 
 # retrieve annotations for a genomic interval
-varinfo <- getAnno(gdb, 
-                   table = "varinfo", 
-                   ranges = data.frame(CHROM = "chr1", start = 11013847, end = 11016874))
+varinfo <- getAnno(
+  gdb,
+  table = "varinfo",
+  ranges = data.frame(CHROM = "chr1", start = 11013847, end = 11016874)
+)
 head(varinfo)
 # see ?getAnno for more details
 
@@ -126,7 +126,7 @@ head(pheno)
 
 # delete table
 uploadAnno(gdb, name = "varInfo2", value = varinfo)
-dropTable(gdb,  "varInfo2")
+dropTable(gdb, "varInfo2")
 
 # retrieve gdb metadata
 getGdbPath(gdb)
@@ -135,26 +135,35 @@ getGenomeBuild(gdb)
 # retrieve genotypes
 # first extract variant ids that we want to retrieve the genotypes for
 # note that `where` here is an SQL compliant
-varinfo <- getAnno(gdb, 
-                   table = "varinfo", 
-                   where = "gene_name = 'SOD1' and ModerateImpact = 1")
+varinfo <- getAnno(
+  gdb,
+  table = "varinfo",
+  where = "gene_name = 'SOD1' and ModerateImpact = 1"
+)
 
-# retrieve genotypes for variants extracted above 
-GT <- getGT(gdb, 
-            VAR_id = varinfo$VAR_id,
-            cohort = "pheno")
+# retrieve genotypes for variants extracted above
+GT <- getGT(gdb, VAR_id = varinfo$VAR_id, cohort = "pheno")
 
 # retrieve genotypes for a given genomic interval
 GT_fromranges <- getGT(
- gdb,
- ranges = data.frame(CHROM = "chr21", start = 31659666, end = 31668931),
- cohort = "pheno",
- anno = "varInfo",
- annoFields = c("VAR_id", "CHROM", "POS", "REF", "ALT", "HighImpact", "ModerateImpact", "Synonymous")
+  gdb,
+  ranges = data.frame(CHROM = "chr21", start = 31659666, end = 31668931),
+  cohort = "pheno",
+  anno = "varInfo",
+  annoFields = c(
+    "VAR_id",
+    "CHROM",
+    "POS",
+    "REF",
+    "ALT",
+    "HighImpact",
+    "ModerateImpact",
+    "Synonymous"
+  )
 )
 # Learn about the getGT method in ?getGT
 # Learn about the genoMatrix format in ?genoMatrix
- 
+
 # Learn more about the downstream methods available for the gdb class in the relevant help pages
 # e.g. ?mapVariants, ?subsetGdb, ?assocTest, ?writeVcf
 close(gdb)
diff --git a/man/geneSet.Rd b/man/geneSet.Rd
index 0d1ea93..406d463 100644
--- a/man/geneSet.Rd
+++ b/man/geneSet.Rd
@@ -40,17 +40,19 @@ In the following code snippets, x is a geneSet object.
 
 \examples{
 genesetlist <- buildGeneSet(
-  list("geneset1" = c("SOD1", "NEK1"),
-       "geneset2" = c("ABCA4", "SOD1", "NEK1"),
-       "geneset3" = c("FUS", "NEK1")
-       ))
+  list(
+    "geneset1" = c("SOD1", "NEK1"),
+    "geneset2" = c("ABCA4", "SOD1", "NEK1"),
+    "geneset3" = c("FUS", "NEK1")
+  )
+)
 geneset <- genesetlist[[1]]
 
 # list units and weights included in the geneSet
 listUnits(geneset)
 listWeights(geneset)
 
-# note that usually you'll work with geneSets in either a geneSetList or 
+# note that usually you'll work with geneSets in either a geneSetList or
 # a geneSetFile (see ?geneSetList and ?geneSetFile)
 }
 \seealso{
diff --git a/man/geneSetAssoc.Rd b/man/geneSetAssoc.Rd
index 5b037d4..ff94cb3 100644
--- a/man/geneSetAssoc.Rd
+++ b/man/geneSetAssoc.Rd
@@ -103,15 +103,19 @@ Perform self-contained or competitive gene set analyses.
 \examples{
 library(rvatData)
 data(rvbresults)
-res <- rvbresults[rvbresults$test == "firth" & 
-                    rvbresults$varSetName == "ModerateImpact", ]
+res <- rvbresults[
+  rvbresults$test == "firth" &
+    rvbresults$varSetName == "ModerateImpact",
+]
 
 # example genesetlist used in examples below (see ?buildGeneSet on build geneSetLists/geneSetFiles)
 genesetlist <- buildGeneSet(
-  list("geneset1" = c("SOD1", "NEK1"),
-       "geneset2" = c("ABCA4", "SOD1", "NEK1"),
-       "geneset3" = c("FUS", "NEK1")
-       ))
+  list(
+    "geneset1" = c("SOD1", "NEK1"),
+    "geneset2" = c("ABCA4", "SOD1", "NEK1"),
+    "geneset3" = c("FUS", "NEK1")
+  )
+)
 
 # Perform competitive gene set analysis using a linear model
 GSAresults <- geneSetAssoc(
@@ -121,13 +125,13 @@ GSAresults <- geneSetAssoc(
   test = c("lm")
 )
 
-# Outlying gene association scores can be remedied by either setting Z-score cutoffs (i.e. all Z-scores exceeding these values will be set to the respective cutoff), 
+# Outlying gene association scores can be remedied by either setting Z-score cutoffs (i.e. all Z-scores exceeding these values will be set to the respective cutoff),
 # or inverse normal transforming the Z-scores:
 GSAresults <- geneSetAssoc(
   res,
   genesetlist,
   covar = c("nvar"),
-  test = c("lm"), 
+  test = c("lm"),
   maxSetSize = 500,
   Zcutoffs = c(-4, 4) # lower and upper bounds
 )
@@ -136,20 +140,20 @@ GSAresults <- geneSetAssoc(
   res,
   genesetlist,
   covar = c("nvar"),
-  test = c("lm"), 
+  test = c("lm"),
   maxSetSize = 500,
   INT = TRUE # perform inverse normal transformation
 )
 
 
-# Conditional gene set analyses can be performed to test whether gene sets are associated independently with the phenotype of interest. 
+# Conditional gene set analyses can be performed to test whether gene sets are associated independently with the phenotype of interest.
 # In the example below we test whether gene sets are independent of geneset1
 GSAresults <- geneSetAssoc(
   res,
   condition = getGeneSet(genesetlist, "geneset1"),
   geneSet = genesetlist,
   covar = c("nvar"),
-  test = c("lm"), 
+  test = c("lm"),
   maxSetSize = 500
 )
 
@@ -158,7 +162,7 @@ GSAresults <- geneSetAssoc(
   res,
   genesetlist,
   covar = c("nvar"),
-  test = c("lm"), 
+  test = c("lm"),
   maxSetSize = 500,
   oneSided = TRUE
 )
@@ -169,7 +173,7 @@ GSAresults <- geneSetAssoc(
 GSAresults <- geneSetAssoc(
   res,
   genesetlist,
-  test = c("fisher"), 
+  test = c("fisher"),
   threshold = 1e-4,
   maxSetSize = 500
 )
diff --git a/man/geneSetFile.Rd b/man/geneSetFile.Rd
index 947b0de..6ce6d39 100644
--- a/man/geneSetFile.Rd
+++ b/man/geneSetFile.Rd
@@ -80,12 +80,13 @@ library(rvatData)
 # can also be build based on a GMT-file (see ?buildGeneSet)
 file <- tempfile()
 genesetfile <- buildGeneSet(
-  list("geneset1" = c("SOD1", "NEK1"),
-       "geneset2" = c("ABCA4", "SOD1", "NEK1"),
-       "geneset3" = c("FUS", "NEK1")
-       ),
+  list(
+    "geneset1" = c("SOD1", "NEK1"),
+    "geneset2" = c("ABCA4", "SOD1", "NEK1"),
+    "geneset3" = c("FUS", "NEK1")
+  ),
   output = file
-  )
+)
 
 # connect to a geneSetFile
 genesetfile <- geneSetFile(file)
@@ -93,7 +94,7 @@ genesetfile <- geneSetFile(file)
 # extract a couple of gene sets from the geneSetFile, which will return a geneSetList (see ?geneSetList)
 getGeneSet(genesetfile, c("geneset1", "geneset2"))
 
-# list included gene sets 
+# list included gene sets
 genesets <- listGeneSets(genesetfile)
 head(genesets)
 
diff --git a/man/geneSetList.Rd b/man/geneSetList.Rd
index cdd53e9..550e0c0 100644
--- a/man/geneSetList.Rd
+++ b/man/geneSetList.Rd
@@ -99,11 +99,12 @@ library(rvatData)
 # build a geneSetList
 # can also be build based on a GMT-file (see ?buildGeneSet)
 genesetlist <- buildGeneSet(
-  list("geneset1" = c("SOD1", "NEK1"),
-       "geneset2" = c("ABCA4", "SOD1", "NEK1"),
-       "geneset3" = c("FUS", "NEK1")
-       )
+  list(
+    "geneset1" = c("SOD1", "NEK1"),
+    "geneset2" = c("ABCA4", "SOD1", "NEK1"),
+    "geneset3" = c("FUS", "NEK1")
   )
+)
 
 # extract a couple of gene sets from the geneSetList, which will return a new geneSetList
 getGeneSet(genesetlist, c("geneset1", "geneset2"))
@@ -130,7 +131,12 @@ dropUnits(genesetlist, unit = "NEK1")
 # remap IDs
 linker <- data.frame(
   gene_name = c("SOD1", "NEK1", "FUS", "ABCA4"),
-  gene_id = c("ENSG00000142168","ENSG00000137601", "ENSG00000089280", "ENSG00000198691")
+  gene_id = c(
+    "ENSG00000142168",
+    "ENSG00000137601",
+    "ENSG00000089280",
+    "ENSG00000198691"
+  )
 )
 genesetlist_remapped <- remapIDs(genesetlist, linker)
 listUnits(genesetlist_remapped)
diff --git a/man/genoMatrix.Rd b/man/genoMatrix.Rd
index 842d503..02c0f11 100644
--- a/man/genoMatrix.Rd
+++ b/man/genoMatrix.Rd
@@ -139,10 +139,10 @@ dim(GT)
 GT[1:5, 1:5]
 
 # Subset samples based on sample info
-GT[ ,GT$pheno == 1]
+GT[, GT$pheno == 1]
 
 # Subset first two variants
-GT[1:2,]
+GT[1:2, ]
 
 # Extract variant/sample summaries --------------------------------------------------
 
@@ -170,7 +170,7 @@ recode(GT, geneticModel = "dominant")
 recode(GT, geneticModel = "recessive")
 # see ?recode for details
 
-# recode missing genotypes 
+# recode missing genotypes
 recode(GT, imputeMethod = "meanImpute")
 recode(GT, imputeMethod = "missingToRef")
 # see ?recode for details
@@ -181,19 +181,22 @@ recode(GT, imputeMethod = "missingToRef")
 aggregate(recode(GT, imputeMethod = "meanImpute"))
 
 # set `returnGT = FALSE` to return aggregates as a vector instead
-aggregate <- aggregate(recode(GT, imputeMethod = "meanImpute"), returnGT = FALSE)
+aggregate <- aggregate(
+  recode(GT, imputeMethod = "meanImpute"),
+  returnGT = FALSE
+)
 head(aggregate)
 
 # Update cohort and variant info in genoMatrix --------------------------
 gdb <- create_example_gdb()
 anno <- getAnno(gdb, "varInfo", fields = c("VAR_id", "CADDphred", "PolyPhen"))
-updateGT(GT, anno = anno )
+updateGT(GT, anno = anno)
 pheno <- colData(GT)
-updateGT(GT, SM = colData(GT)[,1:4])
+updateGT(GT, SM = colData(GT)[, 1:4])
 
 # Get variant carriers
 
-# retrieve a data.frame that lists the samples carrying each respective 
+# retrieve a data.frame that lists the samples carrying each respective
 # variant in the genoMatrix. Additional variant and sample info can be included
 # using the `rowDataFields` and `colDataFields` respectively.
 carriers <- getCarriers(
diff --git a/man/getAnno.Rd b/man/getAnno.Rd
index f6f1690..1fedf4f 100644
--- a/man/getAnno.Rd
+++ b/man/getAnno.Rd
@@ -54,30 +54,40 @@ varinfo <- getAnno(gdb, table = "varInfo")
 head(varinfo)
 
 # extract a genomic range
-varinfo <- getAnno(gdb, 
-                   table = "varInfo", 
-                   ranges = data.frame(CHROM = "chr1", start = 11013847, end = 11016874))
+varinfo <- getAnno(
+  gdb,
+  table = "varInfo",
+  ranges = data.frame(CHROM = "chr1", start = 11013847, end = 11016874)
+)
 head(varinfo)
 
 # keep only specified fields
-varinfo <- getAnno(gdb, 
-                   table = "varInfo", 
-                   fields = c("VAR_id", "CHROM", "POS", "REF", "ALT", "ModerateImpact"),
-                   ranges = data.frame(CHROM = "chr1", start = 11013847, end = 11016874))
+varinfo <- getAnno(
+  gdb,
+  table = "varInfo",
+  fields = c("VAR_id", "CHROM", "POS", "REF", "ALT", "ModerateImpact"),
+  ranges = data.frame(CHROM = "chr1", start = 11013847, end = 11016874)
+)
 head(varinfo)
 
 # the `where` parameter can be used to to pass an SQL-compliant where clause t
-varinfo <- getAnno(gdb, 
-                   table = "varInfo", 
-                   where = "gene_name = 'SOD1' and ModerateImpact = 1")
+varinfo <- getAnno(
+  gdb,
+  table = "varInfo",
+  where = "gene_name = 'SOD1' and ModerateImpact = 1"
+)
 head(varinfo)
 
 
 # the `inner` and `left` parameters can be used to perform inner and left join operations respectively
 # e.g. we can use the `inner` parameter to filter e.g. based on a table containing QC-passing variants
 # for example:
-uploadAnno(gdb, name = "QCpass", value = data.frame(VAR_id = 1:100), skipRemap = TRUE, verbose = FALSE)
-varinfo <- getAnno(gdb, 
-                   inner = "QCpass",
-                   table = "varInfo")
+uploadAnno(
+  gdb,
+  name = "QCpass",
+  value = data.frame(VAR_id = 1:100),
+  skipRemap = TRUE,
+  verbose = FALSE
+)
+varinfo <- getAnno(gdb, inner = "QCpass", table = "varInfo")
 }
diff --git a/man/getGT.Rd b/man/getGT.Rd
index 57f27cd..a62e7de 100644
--- a/man/getGT.Rd
+++ b/man/getGT.Rd
@@ -90,11 +90,16 @@ library(rvatData)
 gdb <- create_example_gdb()
 
 # retrieve genotypes of a set of variants based on their VAR_ids
-varinfo <- getAnno(gdb, table = "varinfo", where = "gene_name = 'SOD1' and ModerateImpact = 1")
+varinfo <- getAnno(
+  gdb,
+  table = "varinfo",
+  where = "gene_name = 'SOD1' and ModerateImpact = 1"
+)
 GT <- getGT(
   gdb,
   VAR_id = varinfo$VAR_id,
-  cohort = "pheno")
+  cohort = "pheno"
+)
 
 # retrieve genotypes of a set of variants in a varSet
 varsetfile <- varSetFile(rvat_example("rvatData_varsetfile.txt.gz"))
@@ -102,14 +107,16 @@ varset <- getVarSet(varsetfile, unit = "NEK1", varSetName = "High")
 GT <- getGT(
   gdb,
   varSet = varset,
-  cohort = "pheno")
+  cohort = "pheno"
+)
 # see ?varSetFile and ?getVarSet for more details
 
 # retrieve genotypes for a genomic interval
 GT <- getGT(
   gdb,
   ranges = data.frame(CHROM = "chr21", start = 31659666, end = 31668931),
-  cohort = "pheno")
+  cohort = "pheno"
+)
 
 # the `anno` parameter can be specified to include variant annotations in the rowData of the genoMatrix
 GT <- getGT(
@@ -117,7 +124,16 @@ GT <- getGT(
   ranges = data.frame(CHROM = "chr21", start = 31659666, end = 31668931),
   cohort = "pheno",
   anno = "varInfo",
-  annoFields = c("VAR_id", "CHROM", "POS", "REF", "ALT", "HighImpact", "ModerateImpact", "Synonymous")
+  annoFields = c(
+    "VAR_id",
+    "CHROM",
+    "POS",
+    "REF",
+    "ALT",
+    "HighImpact",
+    "ModerateImpact",
+    "Synonymous"
+  )
 )
 head(rowData(GT))
 
@@ -134,7 +150,11 @@ head(rowData(GT))
 # to assign variant ploidy. (diploid, XnonPAR, YnonPAR). Accepted inputs are GRCh37, hg19, GRCh38, hg38.
 # We recommend, however, to set the genome build when building the gdb: the genome build will theb
 # be included in the gdb metadata and used automatically. see ?buildGdb for details
-varinfo <- getAnno(gdb, table = "varinfo", where = "gene_name = 'UBQLN2' and ModerateImpact = 1")
+varinfo <- getAnno(
+  gdb,
+  table = "varinfo",
+  where = "gene_name = 'UBQLN2' and ModerateImpact = 1"
+)
 GT <- getGT(
   gdb,
   VAR_id = varinfo$VAR_id,
diff --git a/man/getVarSet.Rd b/man/getVarSet.Rd
index 46acdba..9b35ec2 100644
--- a/man/getVarSet.Rd
+++ b/man/getVarSet.Rd
@@ -34,10 +34,7 @@ head(varsets)
 unique(listVarSets(varsetfile))
 
 # specific varSets can be selected using the `varSetName` parameter
-varsets <- getVarSet(varsetfile, 
-                     unit = c("SOD1", "FUS"),
-                     varSetName = "CADD"
-                     )
+varsets <- getVarSet(varsetfile, unit = c("SOD1", "FUS"), varSetName = "CADD")
 head(varsets)
 
 # see ?varSetFile and ?varSetList for more details on connecting and handling varsetfiles.
diff --git a/man/manhattan.Rd b/man/manhattan.Rd
index 704f7b2..77d9136 100644
--- a/man/manhattan.Rd
+++ b/man/manhattan.Rd
@@ -51,27 +51,43 @@ library(rvatData)
 data(rvbresults)
 
 # generate manhatan plot
-man <- manhattan(rvbresults[rvbresults$varSetName == "ModerateImpact" & rvbresults$test == "firth",],
-                 label = "unit", 
-                 contigs = "GRCh38")
+man <- manhattan(
+  rvbresults[
+    rvbresults$varSetName == "ModerateImpact" & rvbresults$test == "firth",
+  ],
+  label = "unit",
+  contigs = "GRCh38"
+)
 
 # if many overlapping gene label, try setting `labelRepel = TRUE`
-man <- manhattan(rvbresults[rvbresults$varSetName == "ModerateImpact" & rvbresults$test == "firth",],
-                 label = "unit", 
-                 labelRepel = TRUE,
-                 contigs = "GRCh38")
+man <- manhattan(
+  rvbresults[
+    rvbresults$varSetName == "ModerateImpact" & rvbresults$test == "firth",
+  ],
+  label = "unit",
+  labelRepel = TRUE,
+  contigs = "GRCh38"
+)
 
 # alter the significane threshold using the `threshold` parameter
-man <- manhattan(rvbresults[rvbresults$varSetName == "ModerateImpact" & rvbresults$test == "firth",],
-                 label = "unit", 
-                 labelRepel = TRUE,
-                 threshold = 1e-5,
-                 contigs = "GRCh38")
+man <- manhattan(
+  rvbresults[
+    rvbresults$varSetName == "ModerateImpact" & rvbresults$test == "firth",
+  ],
+  label = "unit",
+  labelRepel = TRUE,
+  threshold = 1e-5,
+  contigs = "GRCh38"
+)
 
 # the threshold for displaying labels can be set using the `labelTrheshold` parameter
-man <- manhattan(rvbresults[rvbresults$varSetName == "ModerateImpact" & rvbresults$test == "firth",],
-                 label = "unit", 
-                 labelRepel = TRUE,
-                 labelThreshold = 1e-12,
-                 contigs = "GRCh38")
+man <- manhattan(
+  rvbresults[
+    rvbresults$varSetName == "ModerateImpact" & rvbresults$test == "firth",
+  ],
+  label = "unit",
+  labelRepel = TRUE,
+  labelThreshold = 1e-12,
+  contigs = "GRCh38"
+)
 }
diff --git a/man/mapVariants.Rd b/man/mapVariants.Rd
index 855bb4d..f1e87e2 100644
--- a/man/mapVariants.Rd
+++ b/man/mapVariants.Rd
@@ -82,10 +82,7 @@ ranges <- GRanges(
   gene_name = c("SOD1", "NEK1")
 )
 
-mapVariants(gdb,
-            ranges = ranges,
-            uploadName = "gene",
-            verbose = FALSE)
+mapVariants(gdb, ranges = ranges, uploadName = "gene", verbose = FALSE)
 
 # similarly, ranges can be a data.frame
 ranges <- data.frame(
@@ -95,38 +92,48 @@ ranges <- data.frame(
   gene_name = c("SOD1", "NEK1")
 )
 
-mapVariants(gdb,
-            ranges = ranges,
-            uploadName = "gene",
-            verbose = FALSE,
-            overWrite = TRUE)
+mapVariants(
+  gdb,
+  ranges = ranges,
+  uploadName = "gene",
+  verbose = FALSE,
+  overWrite = TRUE
+)
 
 # often you'd want to map variants to a large set of ranges, such as ensembl models
 # mapVariants supports several file formats, including gff/gtf, bed and ranges
 
 # map variants using a gtf file
 gtffile <- tempfile(fileext = ".gtf")
-rtracklayer::export(makeGRangesFromDataFrame(ranges),
-                    con = gtffile, 
-                    format = "gtf")
+rtracklayer::export(
+  makeGRangesFromDataFrame(ranges),
+  con = gtffile,
+  format = "gtf"
+)
 
-mapVariants(gdb,
-            gff = gtffile,
-            uploadName = "gene",
-            verbose = FALSE,
-            overWrite = TRUE)
+mapVariants(
+  gdb,
+  gff = gtffile,
+  uploadName = "gene",
+  verbose = FALSE,
+  overWrite = TRUE
+)
 
 # map variants using a bed file
 
 bedfile <- tempfile(fileext = ".bed")
-rtracklayer::export(makeGRangesFromDataFrame(ranges),
-                    con = bedfile, 
-                    format = "bed")
-mapVariants(gdb,
-            bed = bedfile,
-            uploadName = "gene",
-            verbose = FALSE,
-            overWrite = TRUE)
+rtracklayer::export(
+  makeGRangesFromDataFrame(ranges),
+  con = bedfile,
+  format = "bed"
+)
+mapVariants(
+  gdb,
+  bed = bedfile,
+  uploadName = "gene",
+  verbose = FALSE,
+  overWrite = TRUE
+)
 
 # see the variant annotation tutorial on the rvat website for more details
 }
diff --git a/tests/testthat/data/rvatData.gdb b/tests/testthat/data/rvatData.gdb
new file mode 100644
index 0000000..9b83b52
Binary files /dev/null and b/tests/testthat/data/rvatData.gdb differ
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index 9702fc6..7987feb 100644
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -4,3 +4,20 @@ library(rvatData)
 
 gdbpath <- rvat_example("rvatData.gdb")
 testgdb <- withr::local_tempfile()
+
+create_example_gdb <- function (gdbpath = NULL, filepath = NULL) 
+{
+    if (is.null(filepath)) {
+        filepath <- tempfile()
+    } else {
+        if (file.exists(filepath)) {
+            stop(sprintf("%s already exists. Please delete or use a different filepath.", 
+                filepath))
+        }
+    }
+    gdb <- test_path("data/rvatData.gdb")
+    if (!file.copy(gdb, filepath)) {
+        stop("Failed to copy example gdb")
+    }
+    gdb(filepath)
+}
diff --git a/tests/testthat/test-assocTest.R b/tests/testthat/test-assocTest.R
index 8c5f357..4783365 100644
--- a/tests/testthat/test-assocTest.R
+++ b/tests/testthat/test-assocTest.R
@@ -360,9 +360,10 @@ test_that("dominant/recessive model filtering works", {
 
 test_that("assocTest-gdb and assocTest-GT result in identical output", {
   params <- generate_param()
+  gdb <- create_example_gdb()
   warnings <- capture_warnings(
     run_test_gdb <- run_tests_gdb(
-      gdb(rvat_example("rvatData.gdb")),
+      gdb,
       params,
       VAR_id = rownames(GT),
       var_sv = rownames(GT)[1:25]
diff --git a/tests/testthat/test-cli.R b/tests/testthat/test-cli.R
index 3d5414d..f703e99 100644
--- a/tests/testthat/test-cli.R
+++ b/tests/testthat/test-cli.R
@@ -3,6 +3,9 @@
 # - directly checks if arguments are passed correctly, rather than indirectly by checking outputs etc.
 # - tests will directly fail if parameters or defaults ihave not been properly updated in CLI
 
+gdb <- create_example_gdb()
+gdb_path <- getGdbPath(gdb)
+
 # buildGdb
 test_that("--buildGdb works", {
   # generate mocked function/method that returns the environment
@@ -189,7 +192,7 @@ test_that("--subsetGdb works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--subsetGdb",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--output=example_subset.gdb"
       )
     },
@@ -205,7 +208,7 @@ test_that("--subsetGdb works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     output = "example_subset.gdb"
   )
   expected_args <- c(
@@ -225,7 +228,7 @@ test_that("--subsetGdb works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--subsetGdb",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--output=example.gdb",
         "--intersection=varInfo",
         "--where='ModerateImpact = 1'",
@@ -248,7 +251,7 @@ test_that("--subsetGdb works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     output = "example.gdb",
     intersection = "varInfo",
     where = "'ModerateImpact = 1'",
@@ -289,7 +292,7 @@ test_that("--uploadAnno works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--uploadAnno",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--name=data",
         "--value=data.txt"
       )
@@ -306,7 +309,7 @@ test_that("--uploadAnno works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     name = "data",
     value = "data.txt"
   )
@@ -325,7 +328,7 @@ test_that("--uploadAnno works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--uploadAnno",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--name=data",
         "--value=data.txt",
         "--sep='|'",
@@ -350,7 +353,7 @@ test_that("--uploadAnno works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     name = "data",
     value = "data.txt",
     sep = "'|'",
@@ -393,7 +396,7 @@ test_that("--uploadCohort works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--uploadCohort",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--name=data",
         "--value=data.txt"
       )
@@ -410,7 +413,7 @@ test_that("--uploadCohort works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     name = "data",
     value = "data.txt"
   )
@@ -431,7 +434,7 @@ test_that("--uploadCohort works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--uploadCohort",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--name=data",
         "--value=data.txt",
         "--sep='|'",
@@ -451,7 +454,7 @@ test_that("--uploadCohort works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     name = "data",
     value = "data.txt",
     sep = "'|'",
@@ -492,7 +495,7 @@ test_that("--listAnno works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--listAnno",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb"))
+        sprintf("--gdb=%s", gdb_path)
       )
     },
     .package = "base"
@@ -507,7 +510,7 @@ test_that("--listAnno works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb"))
+    object = gdb
   )
   expected_args <- c(
     expected_args,
@@ -537,7 +540,7 @@ test_that("--listCohort works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--listCohort",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb"))
+        sprintf("--gdb=%s", gdb_path)
       )
     },
     .package = "base"
@@ -552,7 +555,7 @@ test_that("--listCohort works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb"))
+    object = gdb
   )
   expected_args <- c(
     expected_args,
@@ -581,7 +584,7 @@ test_that("--mapVariants works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--mapVariants",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb"))
+        sprintf("--gdb=%s", gdb_path)
       )
     },
     .package = "base"
@@ -596,7 +599,7 @@ test_that("--mapVariants works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb"))
+    object = gdb
   )
   expected_args <- c(
     expected_args,
@@ -615,7 +618,7 @@ test_that("--mapVariants works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--mapVariants",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--ranges=ranges.txt",
         "--gff=ensembl.gtf",
         "--bed=bedfile.bed",
@@ -641,7 +644,7 @@ test_that("--mapVariants works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     ranges = "ranges.txt",
     gff = "ensembl.gtf",
     bed = "bedfile.bed",
@@ -686,7 +689,7 @@ test_that("--buildVarSet works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--buildVarSet",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--varSetName=LOF",
         "--unitTable=gene",
         "--unitName=gene_id",
@@ -705,7 +708,7 @@ test_that("--buildVarSet works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     varSetName = "LOF",
     unitTable = "gene",
     unitName = "gene_id",
@@ -728,7 +731,7 @@ test_that("--buildVarSet works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--buildVarSet",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--varSetName=LOF",
         "--unitTable=gene",
         "--unitName=gene_id",
@@ -752,7 +755,7 @@ test_that("--buildVarSet works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     varSetName = "LOF",
     unitTable = "gene",
     unitName = "gene_id",
@@ -795,7 +798,7 @@ test_that("--spatialClust works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--spatialClust",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--output=varsetfile.txt.gz",
         "--varSetName=LOF",
         "--unitTable=gene",
@@ -816,7 +819,7 @@ test_that("--spatialClust works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     varSetName = "LOF",
     unitTable = "gene",
     unitName = "gene_id",
@@ -843,7 +846,7 @@ test_that("--spatialClust works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--spatialClust",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--varSetName=LOF",
         "--unitTable=gene",
         "--unitName=gene_id",
@@ -868,7 +871,7 @@ test_that("--spatialClust works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     varSetName = "LOF",
     unitTable = "gene",
     unitName = "gene_id",
@@ -915,7 +918,7 @@ test_that("--summariseGeno works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--summariseGeno",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--output=sumgeno.txt.gz"
       )
     },
@@ -931,7 +934,7 @@ test_that("--summariseGeno works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     output = "sumgeno.txt.gz"
   )
   expected_args <- c(
@@ -957,7 +960,7 @@ test_that("--summariseGeno works", {
   ## save varsetfile
   varsetfile <- withr::local_tempfile()
   buildVarSet(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     varSetName = "Moderate",
     unitTable = "varInfo",
     unitName = "gene_name",
@@ -974,7 +977,7 @@ test_that("--summariseGeno works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--summariseGeno",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--output=sumgeno.txt.gz",
         "--cohort=pheno",
         sprintf("--varSet=%s", varsetfile),
@@ -1013,7 +1016,7 @@ test_that("--summariseGeno works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     cohort = "pheno",
     VAR_id = as.character(1:10),
     pheno = "pheno",
@@ -1078,7 +1081,7 @@ test_that("--aggregate works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--aggregate",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         sprintf("--VAR_id=%s", varids),
         "--output=aggregate.txt.gz"
       )
@@ -1095,7 +1098,7 @@ test_that("--aggregate works", {
 
   # check against expected args
   expected_args <- list(
-    x = gdb(rvatData::rvat_example("rvatData.gdb")),
+    x = gdb,
     VAR_id = as.character(1:10),
     output = "aggregate.txt.gz"
   )
@@ -1119,7 +1122,7 @@ test_that("--aggregate works", {
   ## save varsetfile
   varsetfile <- withr::local_tempfile()
   buildVarSet(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     varSetName = "Moderate",
     unitTable = "varInfo",
     unitName = "gene_name",
@@ -1136,7 +1139,7 @@ test_that("--aggregate works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--aggregate",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--cohort=pheno",
         sprintf("--varSet=%s", varsetfile),
         sprintf("--VAR_id=%s", varids),
@@ -1177,7 +1180,7 @@ test_that("--aggregate works", {
 
   # check against expected args
   expected_args <- list(
-    x = gdb(rvatData::rvat_example("rvatData.gdb")),
+    x = gdb,
     cohort = "pheno",
     VAR_id = as.character(1:10),
     pheno = "pheno",
@@ -1236,7 +1239,7 @@ test_that("--mergeAggDbs works", {
   # test defaults
   aggdb1 <- withr::local_tempfile()
   aggdb2 <- withr::local_tempfile()
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- gdb
   agg1 <- aggregate(
     gdb,
     VAR_id = 1:10,
@@ -1345,7 +1348,7 @@ test_that("--collapseAggDbs works", {
   # test defaults
   aggdb1 <- withr::local_tempfile()
   aggdb2 <- withr::local_tempfile()
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- gdb
   agg1 <- aggregate(
     gdb,
     VAR_id = 1:10,
@@ -1461,7 +1464,7 @@ test_that("--assocTest works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--assocTest",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--pheno=pheno",
         "--test=firth",
         "--output=assocTest.txt.gz"
@@ -1479,7 +1482,7 @@ test_that("--assocTest works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     pheno = "pheno",
     test = "firth",
     output = "assocTest.txt.gz"
@@ -1504,7 +1507,7 @@ test_that("--assocTest works", {
   ## save varsetfile
   varsetfile <- withr::local_tempfile()
   null <- buildVarSet(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     varSetName = "Moderate",
     unitTable = "varInfo",
     unitName = "gene_name",
@@ -1529,7 +1532,7 @@ test_that("--assocTest works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--assocTest",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--pheno=pheno,sex",
         "--test=firth,glm,scoreSPA",
         "--cohort=pheno",
@@ -1581,7 +1584,7 @@ test_that("--assocTest works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     pheno = c("pheno", "sex"),
     test = c("firth", "glm", "scoreSPA"),
     cohort = "pheno",
@@ -1657,7 +1660,7 @@ test_that("--assocTest works", {
   genesetfile <- withr::local_tempfile()
 
   null <- buildVarSet(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     varSetName = "Moderate",
     unitTable = "varInfo",
     unitName = "gene_name",
@@ -1667,7 +1670,7 @@ test_that("--assocTest works", {
   )
 
   null <- aggregate(
-    x = gdb(rvatData::rvat_example("rvatData.gdb")),
+    x = gdb,
     varSet = getVarSet(varSetFile(varsetfile), unit = c("SOD1", "ABCA4")),
     output = aggdb,
     verbose = FALSE
@@ -1690,7 +1693,7 @@ test_that("--assocTest works", {
         "--test=firth",
         sprintf("--aggdb=%s", aggdb),
         sprintf("--geneSet=%s", genesetfile),
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--output=assocTest.txt.gz"
       )
     },
@@ -1742,7 +1745,7 @@ test_that("--assocTest works", {
         "--pheno=pheno",
         "--test=firth,glm",
         sprintf("--geneSet=%s", genesetfile),
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--cohort=pheno",
         "--name=test",
         "--continuous",
@@ -1993,7 +1996,7 @@ test_that("--buildCorMatrix works", {
 
   # test defaults
   aggdb <- withr::local_tempfile()
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- gdb
   varsetfile <- varSetFile(rvat_example("rvatData_varsetfile.txt.gz"))
   varsetlist <- getVarSet(
     varsetfile,
@@ -2199,7 +2202,7 @@ test_that("--geneSetAssoc works", {
   ## cormatrix
   aggdb <- withr::local_tempfile()
   cormatrix <- withr::local_tempfile()
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- gdb
   varsetfile <- varSetFile(rvat_example("rvatData_varsetfile.txt.gz"))
   varsetlist <- getVarSet(
     varsetfile,
@@ -2410,7 +2413,7 @@ test_that("--writeVcf works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--writeVcf",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--output=writeVcf.txt.gz"
       )
     },
@@ -2426,7 +2429,7 @@ test_that("--writeVcf works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     output = "writeVcf.txt.gz"
   )
 
@@ -2452,7 +2455,7 @@ test_that("--writeVcf works", {
     commandArgs = function(trailingOnly = TRUE) {
       c(
         "--writeVcf",
-        sprintf("--gdb=%s", rvatData::rvat_example("rvatData.gdb")),
+        sprintf("--gdb=%s", gdb_path),
         "--output=writeVcf.txt.gz",
         sprintf("--VAR_id=%s", varids),
         sprintf("--IID=%s", iids),
@@ -2475,7 +2478,7 @@ test_that("--writeVcf works", {
 
   # check against expected args
   expected_args <- list(
-    object = gdb(rvatData::rvat_example("rvatData.gdb")),
+    object = gdb,
     output = "writeVcf.txt.gz",
     VAR_id = as.character(1:10),
     IID = as.character(1:10),
diff --git a/tests/testthat/test-gdb-extractRanges.R b/tests/testthat/test-gdb-extractRanges.R
index 5a5a503..a211956 100644
--- a/tests/testthat/test-gdb-extractRanges.R
+++ b/tests/testthat/test-gdb-extractRanges.R
@@ -20,7 +20,7 @@ create_test_ranges <- function() {
 }
 
 test_that("extractRanges basic functionality works", {
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- create_example_gdb()
   ranges <- create_test_ranges()
 
   # check basic extraction
@@ -48,7 +48,7 @@ test_that("extractRanges basic functionality works", {
 })
 
 test_that("extractRanges handles different input formats", {
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- create_example_gdb()
 
   # ensembl gene info
   gtf <- readr::read_tsv(
@@ -84,7 +84,7 @@ test_that("extractRanges handles different input formats", {
 })
 
 test_that("extractRanges padding functionality works", {
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- create_example_gdb()
 
   # check whether INDELs are extracted correctly
   # includes three deletions that should overlap the start coordinates of ranges
@@ -104,11 +104,11 @@ test_that("extractRanges padding functionality works", {
     ranges = ranges,
     padding = 10L
   )
-  expect_identical(check_with_padding, c("1362", "1206", "1765"))
+  expect_identical(sort(check_with_padding), sort(c("1362", "1206", "1765")))
 })
 
 test_that("extractRanges handles edge cases correctly", {
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- create_example_gdb()
   ranges <- create_test_ranges()
 
   # check no overlap message and empty result
@@ -148,7 +148,7 @@ test_that("extractRanges handles edge cases correctly", {
 })
 
 test_that("extractRanges input validation works correctly", {
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- create_example_gdb()
   ranges <- create_test_ranges()$basic
 
   # expect an error when invalid padding values are provided
diff --git a/tests/testthat/test-gdb-getGT.R b/tests/testthat/test-gdb-getGT.R
index ee81077..546c376 100644
--- a/tests/testthat/test-gdb-getGT.R
+++ b/tests/testthat/test-gdb-getGT.R
@@ -1,6 +1,6 @@
 test_that("getGT basic functionality works", {
   # GT with 100 variants for testing
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- create_example_gdb()
   GT <- expect_no_error(getGT(
     gdb,
     VAR_id = 1:100,
@@ -27,7 +27,7 @@ test_that("getGT basic functionality works", {
 
 
 test_that("getGT handles invalid VAR_ids correctly", {
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- create_example_gdb()
 
   # loading VAR_ids that are not present in gdb should yield a warning
   GT_reference <- getGT(gdb, VAR_id = 1:100, cohort = "pheno", verbose = FALSE)
@@ -58,7 +58,7 @@ test_that("getGT handles invalid VAR_ids correctly", {
 })
 
 test_that("getGT works with varSet and weights", {
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- create_example_gdb()
 
   # build a varset containing CADD weights
   CADD_file <- withr::local_tempfile(fileext = ".txt.gz")
@@ -105,7 +105,7 @@ test_that("getGT works with varSet and weights", {
 })
 
 test_that("getGT handles different cohort specifications", {
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- create_example_gdb()
 
   # test specifying gdb cohort name
   GT1 <- getGT(gdb, VAR_id = 1:100, cohort = "pheno", verbose = FALSE)
@@ -125,7 +125,7 @@ test_that("getGT handles different cohort specifications", {
 })
 
 test_that("getGT annotation functionality works", {
-  gdb <- gdb(rvatData::rvat_example("rvatData.gdb"))
+  gdb <- create_example_gdb()
 
   # GT without annotations
   GT1 <- getGT(gdb, VAR_id = 1:100, cohort = "pheno", verbose = FALSE)