From 22cf4ffdd304d617ae0ba980cc2b32dc04d9bd2c Mon Sep 17 00:00:00 2001
From: Gregory Jefferis <jefferis@gmail.com>
Date: Mon, 2 Feb 2026 00:22:38 +0000
Subject: [PATCH 1/8] multi_connection_table gets prefer.forein arg

* default value retains current behaviour
* but makes it easier to switch behaviour
---
 R/cosine.R            | 74 +++++++++++++++++++++++++++++--------------
 man/cf_cosine_plot.Rd | 43 +++++++++++++++++++------
 2 files changed, 85 insertions(+), 32 deletions(-)

diff --git a/R/cosine.R b/R/cosine.R
index 7d0c1df..6008ec2 100644
--- a/R/cosine.R
+++ b/R/cosine.R
@@ -296,29 +296,53 @@ cf_cosine_plot <- function(ids=NULL, ..., threshold=5,
 }
 
 
-#' @description \code{multi_connection_table} fetches partner connectivity data
-#'   (the first step in \code{cf_cosine_plot} but then gives you the option e.g.
-#'   to select specific classes of partner neurons. See examples.
+#'@description \code{multi_connection_table} fetches partner connectivity data
+#'  (the first step in \code{cf_cosine_plot} but then gives you the option e.g.
+#'  to select specific classes of partner neurons. See examples.
 #'
-#' @importFrom dplyr distinct all_of
-#' @param check_missing Whether to report if any query neurons are dropped (due
-#'   to insufficient partner neurons) (default:\code{TRUE}).
-#' @param min_datasets How many datasets a type must be in to be included in the
-#'   output. The default of \code{Inf} => all datasets must contain the cell
-#'   type. A negative number defines the number of datasets from which a type
-#'   can be missing. For example \code{-1} would mean that types would still be
-#'   included even if they are missing from one dataset.
+#'@details At present the malecns dataset is the best integrated of all with
+#'  "foreign type" columns referencing the prior flywire female brain and MANC
+#'  male nerve cord datasets. These in turn have been the target of ongoing FANC
+#'  and BANC annotation efforts. Therefore right now the simplest way to ensure
+#'  that types can be matched across datasets is to use \code{prefer.foreign=T}
+#'  when requesting multiple datasets. However when using just the malecns, the
+#'  standard typing for that dataset has some improvements, so
+#'  \code{prefer.foreign=FALSE} would be better. The default setting of
+#'  \code{prefer.foreign=NA} therefore chooses \code{prefer.foreign=TRUE} when
+#'  malecns and at least one other dataset are being requested and \code{FALSE}
+#'  otherwise.
 #'
-#' @rdname cf_cosine_plot
-#' @export
-#' @return \code{multi_connection_table} returns a connectivity dataframe as
-#'   returned by \code{cf_partners} but with an additional column
-#'   \code{partners} which indicates (for each row) whether the partner neurons
-#'   are the input or output neurons.
+#'  Nevertheless, if you want really tight control of the type to type mapping
+#'  it is recommended to fetch with \code{prefer.foreign=F, min_datasets=1} and
+#'  then manually review and fix up any types that you know should match.
+#'
+#'@importFrom dplyr distinct all_of
+#'@param check_missing Whether to report if any query neurons are dropped (due
+#'  to insufficient partner neurons) (default:\code{TRUE}).
+#'@param min_datasets How many datasets a type must be in to be included in the
+#'  output. The default of \code{Inf} => all datasets must contain the cell
+#'  type. A negative number defines the number of datasets from which a type can
+#'  be missing. For example \code{-1} would mean that types would still be
+#'  included even if they are missing from one dataset.
+#'@param prefer.foreign Whether to use foreign types for male CNS data. The
+#'  default value of \code{NA} prefers foreign types when multiple datasets
+#'  including malecns are requested. See details.
+#'@param MoreArgs Passed to \code{\link{cf_partners}} For expert use only.
+#'@param ... additional arguments passed to \code{\link{cf_partners}}
+#'
+#'@rdname cf_cosine_plot
+#'@export
+#'@return \code{multi_connection_table} returns a connectivity dataframe as
+#'  returned by \code{cf_partners} but with an additional column \code{partners}
+#'  which indicates (for each row) whether the partner neurons are the input or
+#'  output neurons.
 multi_connection_table <- function(ids, partners=c("inputs", "outputs"),
                                    threshold=1L, group='type',
                                    check_missing=TRUE,
-                                   min_datasets=Inf
+                                   min_datasets=Inf,
+                                   prefer.foreign=NA,
+                                   MoreArgs=NULL,
+                                   ...
                                    ) {
   if(isTRUE(group))
     group='type'
@@ -327,7 +351,8 @@ multi_connection_table <- function(ids, partners=c("inputs", "outputs"),
   if(length(partners)>1) {
     l=sapply(partners, simplify = F, function(p)
       multi_connection_table(kk, partners=p, threshold = threshold, group=group,
-                             check_missing=F, min_datasets = min_datasets))
+                             check_missing=F, min_datasets = min_datasets,
+                             prefer.foreign=prefer.foreign, MoreArgs=MoreArgs, ...))
     l=dplyr::bind_rows(l)
     if(check_missing) {
       query_keys <- l %>% group_by(partners) %>%
@@ -346,10 +371,13 @@ multi_connection_table <- function(ids, partners=c("inputs", "outputs"),
   }
   kdf=keys2df(kk)
   datasets=unique(kdf$dataset)
-  MoreArgs=list()
-  if(length(datasets)>1 && "malecns" %in% datasets)
-    MoreArgs=list(malecns=list(prefer.foreign=TRUE))
-  x <- cf_partners(kk, threshold = threshold, partners = partners, MoreArgs = MoreArgs)
+  if(is.null(MoreArgs)){
+    MoreArgs=list()
+    if(isTRUE(prefer.foreign) ||
+       ((length(datasets)>1 && "malecns" %in% datasets) && is.na(prefer.foreign)))
+      MoreArgs=list(malecns=list(prefer.foreign=TRUE))
+  }
+  x <- cf_partners(kk, threshold = threshold, partners = partners, MoreArgs = MoreArgs, ...)
   if(is.character(group))
     x <- match_types(x, group, partners=partners, min_datasets = min_datasets)
   # mark which column was used for the query
diff --git a/man/cf_cosine_plot.Rd b/man/cf_cosine_plot.Rd
index 4010580..e19a743 100644
--- a/man/cf_cosine_plot.Rd
+++ b/man/cf_cosine_plot.Rd
@@ -29,7 +29,10 @@ multi_connection_table(
   threshold = 1L,
   group = "type",
   check_missing = TRUE,
-  min_datasets = Inf
+  min_datasets = Inf,
+  prefer.foreign = NA,
+  MoreArgs = NULL,
+  ...
 )
 }
 \arguments{
@@ -37,7 +40,7 @@ multi_connection_table(
 wrapped by \code{\link{cf_ids}} \emph{or} a dataframe compatible with the
 \code{\link{keys}} function.}
 
-\item{...}{Additional arguments passed to \code{\link{heatmap}}}
+\item{...}{additional arguments passed to \code{\link{cf_partners}}}
 
 \item{threshold}{return only edges with at least this many matches. 0 is an
 option since neuprint sometimes returns 0 weight edges.}
@@ -77,8 +80,8 @@ for details.}
 
 \item{min_datasets}{How many datasets a type must be in to be included in the
 output. The default of \code{Inf} => all datasets must contain the cell
-type. A negative number defines the number of datasets from which a type
-can be missing. For example \code{-1} would mean that types would still be
+type. A negative number defines the number of datasets from which a type can
+be missing. For example \code{-1} would mean that types would still be
 included even if they are missing from one dataset.}
 
 \item{nas}{What to do with entries that have NAs. Default is to set them to 0
@@ -88,6 +91,12 @@ similarity.}
 
 \item{check_missing}{Whether to report if any query neurons are dropped (due
 to insufficient partner neurons) (default:\code{TRUE}).}
+
+\item{prefer.foreign}{Whether to use foreign types for male CNS data. The
+default value of \code{NA} prefers foreign types when multiple datasets
+including malecns are requested. See details.}
+
+\item{MoreArgs}{Passed to \code{\link{cf_partners}} For expert use only.}
 }
 \value{
 The result of \code{\link{heatmap}} invisibly including the row and
@@ -96,9 +105,9 @@ The result of \code{\link{heatmap}} invisibly including the row and
   matrix.
 
 \code{multi_connection_table} returns a connectivity dataframe as
-  returned by \code{cf_partners} but with an additional column
-  \code{partners} which indicates (for each row) whether the partner neurons
-  are the input or output neurons.
+ returned by \code{cf_partners} but with an additional column \code{partners}
+ which indicates (for each row) whether the partner neurons are the input or
+ output neurons.
 }
 \description{
 \code{cf_cosine_plot} is the workhorse function for within and
@@ -107,8 +116,8 @@ output of \code{multi_connection_table} if you need more control. See
 examples.
 
 \code{multi_connection_table} fetches partner connectivity data
-  (the first step in \code{cf_cosine_plot} but then gives you the option e.g.
-  to select specific classes of partner neurons. See examples.
+ (the first step in \code{cf_cosine_plot} but then gives you the option e.g.
+ to select specific classes of partner neurons. See examples.
 }
 \details{
 \code{group=FALSE} only makes sense for single dataset clustering -
@@ -141,6 +150,22 @@ examples.
   argument then you will get an error. This is because \code{cf_cosine_plot}
   has no way of knowing which label corresponds to which neuron, almost
   certainly resulting in incorrect row labels on your dendrogram.
+
+At present the malecns dataset is the best integrated of all with
+ "foreign type" columns referencing the prior flywire female brain and MANC
+ male nerve cord datasets. These in turn have been the target of ongoing FANC
+ and BANC annotation efforts. Therefore right now the simplest way to ensure
+ that types can be matched across datasets is to use \code{prefer.foreign=T}
+ when requesting multiple datasets. However when using just the malecns, the
+ standard typing for that dataset has some improvements, so
+ \code{prefer.foreign=FALSE} would be better. The default setting of
+ \code{prefer.foreign=NA} therefore chooses \code{prefer.foreign=TRUE} when
+ malecns and at least one other dataset are being requested and \code{FALSE}
+ otherwise.
+
+ Nevertheless, if you want really tight control of the type to type mapping
+ it is recommended to fetch with \code{prefer.foreign=F, min_datasets=1} and
+ then manually review and fix up any types that you know should match.
 }
 \examples{
 \donttest{

From c11633d5c88ade5a0000cd3264309ee7d5344627 Mon Sep 17 00:00:00 2001
From: Gregory Jefferis <jefferis@gmail.com>
Date: Mon, 2 Feb 2026 09:54:17 +0000
Subject: [PATCH 2/8] give cf_partners a keep.all argument

* but help bind_rows2 to bind columns with different data types
---
 R/partners.R       |  8 +++++---
 R/utils.R          | 30 +++++++++++++++++++++++++++++-
 man/cf_meta.Rd     |  2 +-
 man/cf_partners.Rd |  8 ++++++--
 4 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/R/partners.R b/R/partners.R
index e3dbb39..c658e5d 100644
--- a/R/partners.R
+++ b/R/partners.R
@@ -18,7 +18,9 @@
 #' @param partners Whether to return inputs or outputs
 #' @param bind.rows Whether to bind data.frames for each dataset together,
 #'   keeping only the common columns (default \code{TRUE} for convenience but
-#'   note that some columns will be dropped).
+#'   note that some columns will be dropped by default).
+#' @param keep.all Whether to keep all columns when \code{bind.rows=TRUE}.
+#'   Default \code{FALSE} only keeps shared columns.
 #' @param MoreArgs Additional arguments in the form of a hierarchical list
 #'   (expert use; see details and examples).
 #'
@@ -42,7 +44,7 @@
 #'   MoreArgs = list(malecns=list(prefer.foreign=TRUE))
 #' }
 cf_partners <- function(ids, threshold=1L, partners=c("inputs", "outputs"),
-                        bind.rows=TRUE, MoreArgs=list()) {
+                        bind.rows=TRUE, MoreArgs=list(), keep.all=FALSE) {
   partners=match.arg(partners)
   threshold <- checkmate::assert_integerish(
     threshold, lower=0L,len = 1, null.ok = F, all.missing = F)
@@ -127,7 +129,7 @@ cf_partners <- function(ids, threshold=1L, partners=c("inputs", "outputs"),
     res[[n]]=tres
   }
   if(isTRUE(bind.rows)) {
-    res=bind_rows2(res)
+    res=bind_rows2(res, keep.all = keep.all)
     # record the datasets we tried to find
     attr(res, 'datasets')=names(ids)
     res
diff --git a/R/utils.R b/R/utils.R
index 4f0439c..99061ec 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -11,11 +11,39 @@ bind_rows2 <- function(l, keep.all=FALSE) {
     l=lapply(l, "[", commoncols)
     l <- do.call(function(...) rbind(..., make.row.names=FALSE), l)
   } else {
-    l <- dplyr::bind_rows(l)
+    l2=fix_mixed_col_types(l)
+    l <- dplyr::bind_rows(l2)
   }
   l
 }
 
+fix_mixed_col_types <- function(l) {
+  dd=dplyr::bind_rows(
+    lapply(l, function(x) tibble::tibble(
+      name=names(x),
+      mode=sapply(x, mode))),
+    .id = 'dfname')
+  tofix <- dd %>%
+    group_by(name) %>%
+    summarise(nmodes=n_distinct(mode),
+              ndfs=n_distinct(dfname),
+              some_character=any(mode=='character')) %>%
+    filter(some_character & nmodes>1)
+  if(nrow(tofix)<1)
+    return(l)
+
+  lapply(l, function(d) {
+    for (nm in intersect(tofix$name, names(d))) {
+      x <- d[[nm]]
+      if (!is.character(x)) {
+        # IMPORTANT: use id2char rather than as.character
+        d[[nm]] <- coconat::id2char(x)
+      }
+    }
+    d
+  })
+}
+
 cf_connections <- function() {
   dslist=list()
   npds=c("hemibrain", "manc", "malecns", 'opticlobe')
diff --git a/man/cf_meta.Rd b/man/cf_meta.Rd
index cb6eed4..a794e2f 100644
--- a/man/cf_meta.Rd
+++ b/man/cf_meta.Rd
@@ -19,7 +19,7 @@ other input that can be processed by the \code{\link{keys}} function
 
 \item{bind.rows}{Whether to bind data.frames for each dataset together,
 keeping only the common columns (default \code{TRUE} for convenience but
-note that some columns will be dropped).}
+note that some columns will be dropped by default).}
 
 \item{integer64}{Whether ids should be character vectors (default) or 64 bit
 ints (more compact but a little fragile as they rely on the \code{bit64}
diff --git a/man/cf_partners.Rd b/man/cf_partners.Rd
index 5661bef..4a347ec 100644
--- a/man/cf_partners.Rd
+++ b/man/cf_partners.Rd
@@ -9,7 +9,8 @@ cf_partners(
   threshold = 1L,
   partners = c("inputs", "outputs"),
   bind.rows = TRUE,
-  MoreArgs = list()
+  MoreArgs = list(),
+  keep.all = FALSE
 )
 }
 \arguments{
@@ -24,10 +25,13 @@ option since neuprint sometimes returns 0 weight edges.}
 
 \item{bind.rows}{Whether to bind data.frames for each dataset together,
 keeping only the common columns (default \code{TRUE} for convenience but
-note that some columns will be dropped).}
+note that some columns will be dropped by default).}
 
 \item{MoreArgs}{Additional arguments in the form of a hierarchical list
 (expert use; see details and examples).}
+
+\item{keep.all}{Whether to keep all columns when \code{bind.rows=TRUE}.
+Default \code{FALSE} only keeps shared columns.}
 }
 \value{
 A data.frame or a named list (when \code{bind.rows=FALSE})

From 8c5c628f9a747e5f0b8add89e5fe8d8d38f26b76 Mon Sep 17 00:00:00 2001
From: Gregory Jefferis <jefferis@gmail.com>
Date: Mon, 2 Feb 2026 10:04:33 +0000
Subject: [PATCH 3/8] bind_rows2: also handle mixed type non id columns

* for example a logical column and a character vector wouldn't have worked.
---
 R/utils.R | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index 99061ec..97ed3eb 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -36,9 +36,12 @@ fix_mixed_col_types <- function(l) {
     for (nm in intersect(tofix$name, names(d))) {
       x <- d[[nm]]
       if (!is.character(x)) {
-        # IMPORTANT: use id2char rather than as.character
-        d[[nm]] <- coconat::id2char(x)
+        # use id2char rather than as.character to ensure eg 100000 processed ok
+        ix <- try(coconat::id2char(x), silent = T)
+        # but fall back if id2char can't handle it
+        if(inherits(ix, 'try-error')) ix <- as.character(x)
       }
+      d[[nm]] <- ix
     }
     d
   })

From 747cb1d37e1f4066f5b80b1a9fe4a74aea833340 Mon Sep 17 00:00:00 2001
From: Gregory Jefferis <jefferis@gmail.com>
Date: Mon, 2 Feb 2026 10:15:11 +0000
Subject: [PATCH 4/8] Give multi_connection_table (and polish docs)

---
 R/cosine.R            | 25 +++++++++++++++----------
 R/meta.R              |  4 ----
 R/partners.R          |  7 ++++---
 man/cf_cosine_plot.Rd | 28 ++++++++++++++++++----------
 man/cf_meta.Rd        |  8 ++++----
 man/cf_partners.Rd    |  7 ++++---
 6 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/R/cosine.R b/R/cosine.R
index 6008ec2..47dd73b 100644
--- a/R/cosine.R
+++ b/R/cosine.R
@@ -297,24 +297,27 @@ cf_cosine_plot <- function(ids=NULL, ..., threshold=5,
 
 
 #'@description \code{multi_connection_table} fetches partner connectivity data
-#'  (the first step in \code{cf_cosine_plot} but then gives you the option e.g.
-#'  to select specific classes of partner neurons. See examples.
+#'  (the first step in \code{cf_cosine_plot}) but then gives you the option e.g.
+#'  to select specific classes of partner neurons, fix type names etc. See
+#'  examples.
 #'
 #'@details At present the malecns dataset is the best integrated of all with
 #'  "foreign type" columns referencing the prior flywire female brain and MANC
 #'  male nerve cord datasets. These in turn have been the target of ongoing FANC
 #'  and BANC annotation efforts. Therefore right now the simplest way to ensure
-#'  that types can be matched across datasets is to use \code{prefer.foreign=T}
-#'  when requesting multiple datasets. However when using just the malecns, the
-#'  standard typing for that dataset has some improvements, so
-#'  \code{prefer.foreign=FALSE} would be better. The default setting of
-#'  \code{prefer.foreign=NA} therefore chooses \code{prefer.foreign=TRUE} when
-#'  malecns and at least one other dataset are being requested and \code{FALSE}
-#'  otherwise.
+#'  that types can be matched across datasets is to use
+#'  \code{prefer.foreign=TRUE} when requesting multiple datasets. However when
+#'  using just the malecns, the standard typing for that dataset has some
+#'  improvements, so \code{prefer.foreign=FALSE} would be better. The default
+#'  setting of \code{prefer.foreign=NA} therefore chooses
+#'  \code{prefer.foreign=TRUE} when malecns and at least one other dataset are
+#'  being requested and \code{FALSE} otherwise.
 #'
 #'  Nevertheless, if you want really tight control of the type to type mapping
 #'  it is recommended to fetch with \code{prefer.foreign=F, min_datasets=1} and
-#'  then manually review and fix up any types that you know should match.
+#'  then manually review and fix up any types that you know should match. If you
+#'  also set \code{keep.all=T} they you can access the foreign types columns
+#'  as part of your logic for doing this.
 #'
 #'@importFrom dplyr distinct all_of
 #'@param check_missing Whether to report if any query neurons are dropped (due
@@ -329,6 +332,7 @@ cf_cosine_plot <- function(ids=NULL, ..., threshold=5,
 #'  including malecns are requested. See details.
 #'@param MoreArgs Passed to \code{\link{cf_partners}} For expert use only.
 #'@param ... additional arguments passed to \code{\link{cf_partners}}
+#'@inheritParams cf_partners
 #'
 #'@rdname cf_cosine_plot
 #'@export
@@ -341,6 +345,7 @@ multi_connection_table <- function(ids, partners=c("inputs", "outputs"),
                                    check_missing=TRUE,
                                    min_datasets=Inf,
                                    prefer.foreign=NA,
+                                   keep.all=FALSE,
                                    MoreArgs=NULL,
                                    ...
                                    ) {
diff --git a/R/meta.R b/R/meta.R
index 3cd7b85..8eb0ca3 100644
--- a/R/meta.R
+++ b/R/meta.R
@@ -50,10 +50,6 @@ get_meta_fun <- function(dataset) {
 #'   extension package.)
 #' @param MoreArgs A named list of arguments to be passed when fetching metadata
 #'   for a given function. See details.
-#' @param keep.all When fetching metadata from different datasets, whether to
-#'   keep all metadata columns rather than just those in common
-#'   (default=\code{FALSE})
-#'
 #' @inheritParams cf_partners
 #'
 #' @importFrom dplyr mutate rename rename_with select case_when any_of
diff --git a/R/partners.R b/R/partners.R
index c658e5d..55baa43 100644
--- a/R/partners.R
+++ b/R/partners.R
@@ -18,9 +18,10 @@
 #' @param partners Whether to return inputs or outputs
 #' @param bind.rows Whether to bind data.frames for each dataset together,
 #'   keeping only the common columns (default \code{TRUE} for convenience but
-#'   note that some columns will be dropped by default).
-#' @param keep.all Whether to keep all columns when \code{bind.rows=TRUE}.
-#'   Default \code{FALSE} only keeps shared columns.
+#'   note that some columns will be dropped by unless \code{keep.all=TRUE}).
+#' @param keep.all Whether to keep all columns when processing multiple datasets
+#'   rather than just those in common (default=\code{FALSE} only keeps shared
+#'   columns).
 #' @param MoreArgs Additional arguments in the form of a hierarchical list
 #'   (expert use; see details and examples).
 #'
diff --git a/man/cf_cosine_plot.Rd b/man/cf_cosine_plot.Rd
index e19a743..d386b90 100644
--- a/man/cf_cosine_plot.Rd
+++ b/man/cf_cosine_plot.Rd
@@ -31,6 +31,7 @@ multi_connection_table(
   check_missing = TRUE,
   min_datasets = Inf,
   prefer.foreign = NA,
+  keep.all = FALSE,
   MoreArgs = NULL,
   ...
 )
@@ -96,6 +97,10 @@ to insufficient partner neurons) (default:\code{TRUE}).}
 default value of \code{NA} prefers foreign types when multiple datasets
 including malecns are requested. See details.}
 
+\item{keep.all}{Whether to keep all columns when processing multiple datasets
+rather than just those in common (default=\code{FALSE} only keeps shared
+columns).}
+
 \item{MoreArgs}{Passed to \code{\link{cf_partners}} For expert use only.}
 }
 \value{
@@ -116,8 +121,9 @@ output of \code{multi_connection_table} if you need more control. See
 examples.
 
 \code{multi_connection_table} fetches partner connectivity data
- (the first step in \code{cf_cosine_plot} but then gives you the option e.g.
- to select specific classes of partner neurons. See examples.
+ (the first step in \code{cf_cosine_plot}) but then gives you the option e.g.
+ to select specific classes of partner neurons, fix type names etc. See
+ examples.
 }
 \details{
 \code{group=FALSE} only makes sense for single dataset clustering -
@@ -155,17 +161,19 @@ At present the malecns dataset is the best integrated of all with
  "foreign type" columns referencing the prior flywire female brain and MANC
  male nerve cord datasets. These in turn have been the target of ongoing FANC
  and BANC annotation efforts. Therefore right now the simplest way to ensure
- that types can be matched across datasets is to use \code{prefer.foreign=T}
- when requesting multiple datasets. However when using just the malecns, the
- standard typing for that dataset has some improvements, so
- \code{prefer.foreign=FALSE} would be better. The default setting of
- \code{prefer.foreign=NA} therefore chooses \code{prefer.foreign=TRUE} when
- malecns and at least one other dataset are being requested and \code{FALSE}
- otherwise.
+ that types can be matched across datasets is to use
+ \code{prefer.foreign=TRUE} when requesting multiple datasets. However when
+ using just the malecns, the standard typing for that dataset has some
+ improvements, so \code{prefer.foreign=FALSE} would be better. The default
+ setting of \code{prefer.foreign=NA} therefore chooses
+ \code{prefer.foreign=TRUE} when malecns and at least one other dataset are
+ being requested and \code{FALSE} otherwise.
 
  Nevertheless, if you want really tight control of the type to type mapping
  it is recommended to fetch with \code{prefer.foreign=F, min_datasets=1} and
- then manually review and fix up any types that you know should match.
+ then manually review and fix up any types that you know should match. If you
+ also set \code{keep.all=T} they you can access the foreign types columns
+ as part of your logic for doing this.
 }
 \examples{
 \donttest{
diff --git a/man/cf_meta.Rd b/man/cf_meta.Rd
index a794e2f..b4ef47c 100644
--- a/man/cf_meta.Rd
+++ b/man/cf_meta.Rd
@@ -19,15 +19,15 @@ other input that can be processed by the \code{\link{keys}} function
 
 \item{bind.rows}{Whether to bind data.frames for each dataset together,
 keeping only the common columns (default \code{TRUE} for convenience but
-note that some columns will be dropped by default).}
+note that some columns will be dropped by unless \code{keep.all=TRUE}).}
 
 \item{integer64}{Whether ids should be character vectors (default) or 64 bit
 ints (more compact but a little fragile as they rely on the \code{bit64}
 extension package.)}
 
-\item{keep.all}{When fetching metadata from different datasets, whether to
-keep all metadata columns rather than just those in common
-(default=\code{FALSE})}
+\item{keep.all}{Whether to keep all columns when processing multiple datasets
+rather than just those in common (default=\code{FALSE} only keeps shared
+columns).}
 
 \item{MoreArgs}{A named list of arguments to be passed when fetching metadata
 for a given function. See details.}
diff --git a/man/cf_partners.Rd b/man/cf_partners.Rd
index 4a347ec..f3d250c 100644
--- a/man/cf_partners.Rd
+++ b/man/cf_partners.Rd
@@ -25,13 +25,14 @@ option since neuprint sometimes returns 0 weight edges.}
 
 \item{bind.rows}{Whether to bind data.frames for each dataset together,
 keeping only the common columns (default \code{TRUE} for convenience but
-note that some columns will be dropped by default).}
+note that some columns will be dropped by unless \code{keep.all=TRUE}).}
 
 \item{MoreArgs}{Additional arguments in the form of a hierarchical list
 (expert use; see details and examples).}
 
-\item{keep.all}{Whether to keep all columns when \code{bind.rows=TRUE}.
-Default \code{FALSE} only keeps shared columns.}
+\item{keep.all}{Whether to keep all columns when processing multiple datasets
+rather than just those in common (default=\code{FALSE} only keeps shared
+columns).}
 }
 \value{
 A data.frame or a named list (when \code{bind.rows=FALSE})

From d54b73cc2ee928a81454e7074c6f39bc717b7d4d Mon Sep 17 00:00:00 2001
From: Gregory Jefferis <jefferis@gmail.com>
Date: Mon, 2 Feb 2026 10:16:34 +0000
Subject: [PATCH 5/8] Pass on keep.all in multi_connection_table

---
 R/cosine.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/cosine.R b/R/cosine.R
index 47dd73b..3acab23 100644
--- a/R/cosine.R
+++ b/R/cosine.R
@@ -357,7 +357,8 @@ multi_connection_table <- function(ids, partners=c("inputs", "outputs"),
     l=sapply(partners, simplify = F, function(p)
       multi_connection_table(kk, partners=p, threshold = threshold, group=group,
                              check_missing=F, min_datasets = min_datasets,
-                             prefer.foreign=prefer.foreign, MoreArgs=MoreArgs, ...))
+                             prefer.foreign=prefer.foreign, MoreArgs=MoreArgs,
+                             keep.all=keep.all, ...))
     l=dplyr::bind_rows(l)
     if(check_missing) {
       query_keys <- l %>% group_by(partners) %>%
@@ -382,7 +383,8 @@ multi_connection_table <- function(ids, partners=c("inputs", "outputs"),
        ((length(datasets)>1 && "malecns" %in% datasets) && is.na(prefer.foreign)))
       MoreArgs=list(malecns=list(prefer.foreign=TRUE))
   }
-  x <- cf_partners(kk, threshold = threshold, partners = partners, MoreArgs = MoreArgs, ...)
+  x <- cf_partners(kk, threshold = threshold, partners = partners,
+                   MoreArgs = MoreArgs, keep.all=keep.all, ...)
   if(is.character(group))
     x <- match_types(x, group, partners=partners, min_datasets = min_datasets)
   # mark which column was used for the query

From 916d05941ccf62d336d06838fb4729ebbf09bf0c Mon Sep 17 00:00:00 2001
From: Gregory Jefferis <jefferis@gmail.com>
Date: Tue, 3 Feb 2026 13:35:00 +0000
Subject: [PATCH 6/8] try downloading flywire data in vignette

* I don't know why I've been having trouble with this.
---
 vignettes/AOTU063.Rmd | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/vignettes/AOTU063.Rmd b/vignettes/AOTU063.Rmd
index c008766..d468590 100644
--- a/vignettes/AOTU063.Rmd
+++ b/vignettes/AOTU063.Rmd
@@ -33,17 +33,17 @@ library(dplyr)
 For this analysis we will use the version 630 connectivity / annotation data
 released in June 2023. We will set an option use the lower level fafbseg package
 to ensure this.
+You may need to download the relevant data dumps if you have not done so previously.
 
 ```{r}
-fafbseg::flywire_connectome_data_version(set = 630)
+fafbseg::download_flywire_release_data(version = 630)
 ```
 
-You may need to download the relevant data dumps if you have not done so previously.
-
-```{r, eval=FALSE}
-fafbseg::download_flywire_release_data(version = 630)
+```{r}
+fafbseg::flywire_connectome_data_version(set = 630)
 ```
 
+
 ```{r}
 aotu63=cf_meta(cf_ids(query = '/type:AOTU063.*', datasets = c("flywire","hemibrain")))
 aotu63

From b70ff4f210413c25bb87d3d5d296c3b9ea955381 Mon Sep 17 00:00:00 2001
From: Gregory Jefferis <jefferis@gmail.com>
Date: Tue, 3 Feb 2026 13:47:20 +0000
Subject: [PATCH 7/8] add tibble dependency

---
 DESCRIPTION | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 0c5c748..e523d5d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,7 +31,8 @@ Imports:
     usethis,
     methods,
     withr,
-    utils
+    utils,
+    tibble
 Suggests: 
     malevnc (> 0.3.1),
     fancr (>= 0.5.0),

From ba43814fc3336d47ce5fe5f4bb25fcb8f67243db Mon Sep 17 00:00:00 2001
From: Gregory Jefferis <jefferis@gmail.com>
Date: Tue, 3 Feb 2026 13:59:23 +0000
Subject: [PATCH 8/8] add entry for new vignette

---
 _pkgdown.yml                       | 1 +
 vignettes/extending-coconatfly.Rmd | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/_pkgdown.yml b/_pkgdown.yml
index ab0e2a9..6f7a917 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -8,3 +8,4 @@ articles:
   - getting-started
   - TuTu
   - AOTU063
+  - extending-coconatfly
diff --git a/vignettes/extending-coconatfly.Rmd b/vignettes/extending-coconatfly.Rmd
index 089b2f6..8f6c74b 100644
--- a/vignettes/extending-coconatfly.Rmd
+++ b/vignettes/extending-coconatfly.Rmd
@@ -1,8 +1,8 @@
 ---
-title: "Extending coconatfly with external data sources"
+title: "4. Extending coconatfly with external data sources"
 output: rmarkdown::html_vignette
 vignette: >
-  %\VignetteIndexEntry{Extending coconatfly with external data sources}
+  %\VignetteIndexEntry{4. Extending coconatfly with external data sources}
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
 ---