diff --git a/DESCRIPTION b/DESCRIPTION
index 0c5c748..e523d5d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,7 +31,8 @@ Imports:
     usethis,
     methods,
     withr,
-    utils
+    utils,
+    tibble
 Suggests: 
     malevnc (> 0.3.1),
     fancr (>= 0.5.0),
diff --git a/R/cosine.R b/R/cosine.R
index 7d0c1df..3acab23 100644
--- a/R/cosine.R
+++ b/R/cosine.R
@@ -296,29 +296,58 @@ cf_cosine_plot <- function(ids=NULL, ..., threshold=5,
 }
 
 
-#' @description \code{multi_connection_table} fetches partner connectivity data
-#'   (the first step in \code{cf_cosine_plot} but then gives you the option e.g.
-#'   to select specific classes of partner neurons. See examples.
+#'@description \code{multi_connection_table} fetches partner connectivity data
+#'  (the first step in \code{cf_cosine_plot}) but then gives you the option e.g.
+#'  to select specific classes of partner neurons, fix type names etc. See
+#'  examples.
 #'
-#' @importFrom dplyr distinct all_of
-#' @param check_missing Whether to report if any query neurons are dropped (due
-#'   to insufficient partner neurons) (default:\code{TRUE}).
-#' @param min_datasets How many datasets a type must be in to be included in the
-#'   output. The default of \code{Inf} => all datasets must contain the cell
-#'   type. A negative number defines the number of datasets from which a type
-#'   can be missing. For example \code{-1} would mean that types would still be
-#'   included even if they are missing from one dataset.
+#'@details At present the malecns dataset is the best integrated of all with
+#'  "foreign type" columns referencing the prior flywire female brain and MANC
+#'  male nerve cord datasets. These in turn have been the target of ongoing FANC
+#'  and BANC annotation efforts. Therefore right now the simplest way to ensure
+#'  that types can be matched across datasets is to use
+#'  \code{prefer.foreign=TRUE} when requesting multiple datasets. However when
+#'  using just the malecns, the standard typing for that dataset has some
+#'  improvements, so \code{prefer.foreign=FALSE} would be better. The default
+#'  setting of \code{prefer.foreign=NA} therefore chooses
+#'  \code{prefer.foreign=TRUE} when malecns and at least one other dataset are
+#'  being requested and \code{FALSE} otherwise.
 #'
-#' @rdname cf_cosine_plot
-#' @export
-#' @return \code{multi_connection_table} returns a connectivity dataframe as
-#'   returned by \code{cf_partners} but with an additional column
-#'   \code{partners} which indicates (for each row) whether the partner neurons
-#'   are the input or output neurons.
+#'  Nevertheless, if you want really tight control of the type to type mapping
+#'  it is recommended to fetch with \code{prefer.foreign=F, min_datasets=1} and
+#'  then manually review and fix up any types that you know should match. If you
+#'  also set \code{keep.all=T} they you can access the foreign types columns
+#'  as part of your logic for doing this.
+#'
+#'@importFrom dplyr distinct all_of
+#'@param check_missing Whether to report if any query neurons are dropped (due
+#'  to insufficient partner neurons) (default:\code{TRUE}).
+#'@param min_datasets How many datasets a type must be in to be included in the
+#'  output. The default of \code{Inf} => all datasets must contain the cell
+#'  type. A negative number defines the number of datasets from which a type can
+#'  be missing. For example \code{-1} would mean that types would still be
+#'  included even if they are missing from one dataset.
+#'@param prefer.foreign Whether to use foreign types for male CNS data. The
+#'  default value of \code{NA} prefers foreign types when multiple datasets
+#'  including malecns are requested. See details.
+#'@param MoreArgs Passed to \code{\link{cf_partners}} For expert use only.
+#'@param ... additional arguments passed to \code{\link{cf_partners}}
+#'@inheritParams cf_partners
+#'
+#'@rdname cf_cosine_plot
+#'@export
+#'@return \code{multi_connection_table} returns a connectivity dataframe as
+#'  returned by \code{cf_partners} but with an additional column \code{partners}
+#'  which indicates (for each row) whether the partner neurons are the input or
+#'  output neurons.
 multi_connection_table <- function(ids, partners=c("inputs", "outputs"),
                                    threshold=1L, group='type',
                                    check_missing=TRUE,
-                                   min_datasets=Inf
+                                   min_datasets=Inf,
+                                   prefer.foreign=NA,
+                                   keep.all=FALSE,
+                                   MoreArgs=NULL,
+                                   ...
                                    ) {
   if(isTRUE(group))
     group='type'
@@ -327,7 +356,9 @@ multi_connection_table <- function(ids, partners=c("inputs", "outputs"),
   if(length(partners)>1) {
     l=sapply(partners, simplify = F, function(p)
       multi_connection_table(kk, partners=p, threshold = threshold, group=group,
-                             check_missing=F, min_datasets = min_datasets))
+                             check_missing=F, min_datasets = min_datasets,
+                             prefer.foreign=prefer.foreign, MoreArgs=MoreArgs,
+                             keep.all=keep.all, ...))
     l=dplyr::bind_rows(l)
     if(check_missing) {
       query_keys <- l %>% group_by(partners) %>%
@@ -346,10 +377,14 @@ multi_connection_table <- function(ids, partners=c("inputs", "outputs"),
   }
   kdf=keys2df(kk)
   datasets=unique(kdf$dataset)
-  MoreArgs=list()
-  if(length(datasets)>1 && "malecns" %in% datasets)
-    MoreArgs=list(malecns=list(prefer.foreign=TRUE))
-  x <- cf_partners(kk, threshold = threshold, partners = partners, MoreArgs = MoreArgs)
+  if(is.null(MoreArgs)){
+    MoreArgs=list()
+    if(isTRUE(prefer.foreign) ||
+       ((length(datasets)>1 && "malecns" %in% datasets) && is.na(prefer.foreign)))
+      MoreArgs=list(malecns=list(prefer.foreign=TRUE))
+  }
+  x <- cf_partners(kk, threshold = threshold, partners = partners,
+                   MoreArgs = MoreArgs, keep.all=keep.all, ...)
   if(is.character(group))
     x <- match_types(x, group, partners=partners, min_datasets = min_datasets)
   # mark which column was used for the query
diff --git a/R/meta.R b/R/meta.R
index 3cd7b85..8eb0ca3 100644
--- a/R/meta.R
+++ b/R/meta.R
@@ -50,10 +50,6 @@ get_meta_fun <- function(dataset) {
 #'   extension package.)
 #' @param MoreArgs A named list of arguments to be passed when fetching metadata
 #'   for a given function. See details.
-#' @param keep.all When fetching metadata from different datasets, whether to
-#'   keep all metadata columns rather than just those in common
-#'   (default=\code{FALSE})
-#'
 #' @inheritParams cf_partners
 #'
 #' @importFrom dplyr mutate rename rename_with select case_when any_of
diff --git a/R/partners.R b/R/partners.R
index e3dbb39..55baa43 100644
--- a/R/partners.R
+++ b/R/partners.R
@@ -18,7 +18,10 @@
 #' @param partners Whether to return inputs or outputs
 #' @param bind.rows Whether to bind data.frames for each dataset together,
 #'   keeping only the common columns (default \code{TRUE} for convenience but
-#'   note that some columns will be dropped).
+#'   note that some columns will be dropped by unless \code{keep.all=TRUE}).
+#' @param keep.all Whether to keep all columns when processing multiple datasets
+#'   rather than just those in common (default=\code{FALSE} only keeps shared
+#'   columns).
 #' @param MoreArgs Additional arguments in the form of a hierarchical list
 #'   (expert use; see details and examples).
 #'
@@ -42,7 +45,7 @@
 #'   MoreArgs = list(malecns=list(prefer.foreign=TRUE))
 #' }
 cf_partners <- function(ids, threshold=1L, partners=c("inputs", "outputs"),
-                        bind.rows=TRUE, MoreArgs=list()) {
+                        bind.rows=TRUE, MoreArgs=list(), keep.all=FALSE) {
   partners=match.arg(partners)
   threshold <- checkmate::assert_integerish(
     threshold, lower=0L,len = 1, null.ok = F, all.missing = F)
@@ -127,7 +130,7 @@ cf_partners <- function(ids, threshold=1L, partners=c("inputs", "outputs"),
     res[[n]]=tres
   }
   if(isTRUE(bind.rows)) {
-    res=bind_rows2(res)
+    res=bind_rows2(res, keep.all = keep.all)
     # record the datasets we tried to find
     attr(res, 'datasets')=names(ids)
     res
diff --git a/R/utils.R b/R/utils.R
index 4f0439c..97ed3eb 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -11,11 +11,42 @@ bind_rows2 <- function(l, keep.all=FALSE) {
     l=lapply(l, "[", commoncols)
     l <- do.call(function(...) rbind(..., make.row.names=FALSE), l)
   } else {
-    l <- dplyr::bind_rows(l)
+    l2=fix_mixed_col_types(l)
+    l <- dplyr::bind_rows(l2)
   }
   l
 }
 
+fix_mixed_col_types <- function(l) {
+  dd=dplyr::bind_rows(
+    lapply(l, function(x) tibble::tibble(
+      name=names(x),
+      mode=sapply(x, mode))),
+    .id = 'dfname')
+  tofix <- dd %>%
+    group_by(name) %>%
+    summarise(nmodes=n_distinct(mode),
+              ndfs=n_distinct(dfname),
+              some_character=any(mode=='character')) %>%
+    filter(some_character & nmodes>1)
+  if(nrow(tofix)<1)
+    return(l)
+
+  lapply(l, function(d) {
+    for (nm in intersect(tofix$name, names(d))) {
+      x <- d[[nm]]
+      if (!is.character(x)) {
+        # use id2char rather than as.character to ensure eg 100000 processed ok
+        ix <- try(coconat::id2char(x), silent = T)
+        # but fall back if id2char can't handle it
+        if(inherits(ix, 'try-error')) ix <- as.character(x)
+      }
+      d[[nm]] <- ix
+    }
+    d
+  })
+}
+
 cf_connections <- function() {
   dslist=list()
   npds=c("hemibrain", "manc", "malecns", 'opticlobe')
diff --git a/_pkgdown.yml b/_pkgdown.yml
index ab0e2a9..6f7a917 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -8,3 +8,4 @@ articles:
   - getting-started
   - TuTu
   - AOTU063
+  - extending-coconatfly
diff --git a/man/cf_cosine_plot.Rd b/man/cf_cosine_plot.Rd
index 4010580..d386b90 100644
--- a/man/cf_cosine_plot.Rd
+++ b/man/cf_cosine_plot.Rd
@@ -29,7 +29,11 @@ multi_connection_table(
   threshold = 1L,
   group = "type",
   check_missing = TRUE,
-  min_datasets = Inf
+  min_datasets = Inf,
+  prefer.foreign = NA,
+  keep.all = FALSE,
+  MoreArgs = NULL,
+  ...
 )
 }
 \arguments{
@@ -37,7 +41,7 @@ multi_connection_table(
 wrapped by \code{\link{cf_ids}} \emph{or} a dataframe compatible with the
 \code{\link{keys}} function.}
 
-\item{...}{Additional arguments passed to \code{\link{heatmap}}}
+\item{...}{additional arguments passed to \code{\link{cf_partners}}}
 
 \item{threshold}{return only edges with at least this many matches. 0 is an
 option since neuprint sometimes returns 0 weight edges.}
@@ -77,8 +81,8 @@ for details.}
 
 \item{min_datasets}{How many datasets a type must be in to be included in the
 output. The default of \code{Inf} => all datasets must contain the cell
-type. A negative number defines the number of datasets from which a type
-can be missing. For example \code{-1} would mean that types would still be
+type. A negative number defines the number of datasets from which a type can
+be missing. For example \code{-1} would mean that types would still be
 included even if they are missing from one dataset.}
 
 \item{nas}{What to do with entries that have NAs. Default is to set them to 0
@@ -88,6 +92,16 @@ similarity.}
 
 \item{check_missing}{Whether to report if any query neurons are dropped (due
 to insufficient partner neurons) (default:\code{TRUE}).}
+
+\item{prefer.foreign}{Whether to use foreign types for male CNS data. The
+default value of \code{NA} prefers foreign types when multiple datasets
+including malecns are requested. See details.}
+
+\item{keep.all}{Whether to keep all columns when processing multiple datasets
+rather than just those in common (default=\code{FALSE} only keeps shared
+columns).}
+
+\item{MoreArgs}{Passed to \code{\link{cf_partners}} For expert use only.}
 }
 \value{
 The result of \code{\link{heatmap}} invisibly including the row and
@@ -96,9 +110,9 @@ The result of \code{\link{heatmap}} invisibly including the row and
   matrix.
 
 \code{multi_connection_table} returns a connectivity dataframe as
-  returned by \code{cf_partners} but with an additional column
-  \code{partners} which indicates (for each row) whether the partner neurons
-  are the input or output neurons.
+ returned by \code{cf_partners} but with an additional column \code{partners}
+ which indicates (for each row) whether the partner neurons are the input or
+ output neurons.
 }
 \description{
 \code{cf_cosine_plot} is the workhorse function for within and
@@ -107,8 +121,9 @@ output of \code{multi_connection_table} if you need more control. See
 examples.
 
 \code{multi_connection_table} fetches partner connectivity data
-  (the first step in \code{cf_cosine_plot} but then gives you the option e.g.
-  to select specific classes of partner neurons. See examples.
+ (the first step in \code{cf_cosine_plot}) but then gives you the option e.g.
+ to select specific classes of partner neurons, fix type names etc. See
+ examples.
 }
 \details{
 \code{group=FALSE} only makes sense for single dataset clustering -
@@ -141,6 +156,24 @@ examples.
   argument then you will get an error. This is because \code{cf_cosine_plot}
   has no way of knowing which label corresponds to which neuron, almost
   certainly resulting in incorrect row labels on your dendrogram.
+
+At present the malecns dataset is the best integrated of all with
+ "foreign type" columns referencing the prior flywire female brain and MANC
+ male nerve cord datasets. These in turn have been the target of ongoing FANC
+ and BANC annotation efforts. Therefore right now the simplest way to ensure
+ that types can be matched across datasets is to use
+ \code{prefer.foreign=TRUE} when requesting multiple datasets. However when
+ using just the malecns, the standard typing for that dataset has some
+ improvements, so \code{prefer.foreign=FALSE} would be better. The default
+ setting of \code{prefer.foreign=NA} therefore chooses
+ \code{prefer.foreign=TRUE} when malecns and at least one other dataset are
+ being requested and \code{FALSE} otherwise.
+
+ Nevertheless, if you want really tight control of the type to type mapping
+ it is recommended to fetch with \code{prefer.foreign=F, min_datasets=1} and
+ then manually review and fix up any types that you know should match. If you
+ also set \code{keep.all=T} they you can access the foreign types columns
+ as part of your logic for doing this.
 }
 \examples{
 \donttest{
diff --git a/man/cf_meta.Rd b/man/cf_meta.Rd
index cb6eed4..b4ef47c 100644
--- a/man/cf_meta.Rd
+++ b/man/cf_meta.Rd
@@ -19,15 +19,15 @@ other input that can be processed by the \code{\link{keys}} function
 
 \item{bind.rows}{Whether to bind data.frames for each dataset together,
 keeping only the common columns (default \code{TRUE} for convenience but
-note that some columns will be dropped).}
+note that some columns will be dropped by unless \code{keep.all=TRUE}).}
 
 \item{integer64}{Whether ids should be character vectors (default) or 64 bit
 ints (more compact but a little fragile as they rely on the \code{bit64}
 extension package.)}
 
-\item{keep.all}{When fetching metadata from different datasets, whether to
-keep all metadata columns rather than just those in common
-(default=\code{FALSE})}
+\item{keep.all}{Whether to keep all columns when processing multiple datasets
+rather than just those in common (default=\code{FALSE} only keeps shared
+columns).}
 
 \item{MoreArgs}{A named list of arguments to be passed when fetching metadata
 for a given function. See details.}
diff --git a/man/cf_partners.Rd b/man/cf_partners.Rd
index 5661bef..f3d250c 100644
--- a/man/cf_partners.Rd
+++ b/man/cf_partners.Rd
@@ -9,7 +9,8 @@ cf_partners(
   threshold = 1L,
   partners = c("inputs", "outputs"),
   bind.rows = TRUE,
-  MoreArgs = list()
+  MoreArgs = list(),
+  keep.all = FALSE
 )
 }
 \arguments{
@@ -24,10 +25,14 @@ option since neuprint sometimes returns 0 weight edges.}
 
 \item{bind.rows}{Whether to bind data.frames for each dataset together,
 keeping only the common columns (default \code{TRUE} for convenience but
-note that some columns will be dropped).}
+note that some columns will be dropped by unless \code{keep.all=TRUE}).}
 
 \item{MoreArgs}{Additional arguments in the form of a hierarchical list
 (expert use; see details and examples).}
+
+\item{keep.all}{Whether to keep all columns when processing multiple datasets
+rather than just those in common (default=\code{FALSE} only keeps shared
+columns).}
 }
 \value{
 A data.frame or a named list (when \code{bind.rows=FALSE})
diff --git a/vignettes/AOTU063.Rmd b/vignettes/AOTU063.Rmd
index c008766..d468590 100644
--- a/vignettes/AOTU063.Rmd
+++ b/vignettes/AOTU063.Rmd
@@ -33,17 +33,17 @@ library(dplyr)
 For this analysis we will use the version 630 connectivity / annotation data
 released in June 2023. We will set an option use the lower level fafbseg package
 to ensure this.
+You may need to download the relevant data dumps if you have not done so previously.
 
 ```{r}
-fafbseg::flywire_connectome_data_version(set = 630)
+fafbseg::download_flywire_release_data(version = 630)
 ```
 
-You may need to download the relevant data dumps if you have not done so previously.
-
-```{r, eval=FALSE}
-fafbseg::download_flywire_release_data(version = 630)
+```{r}
+fafbseg::flywire_connectome_data_version(set = 630)
 ```
 
+
 ```{r}
 aotu63=cf_meta(cf_ids(query = '/type:AOTU063.*', datasets = c("flywire","hemibrain")))
 aotu63
diff --git a/vignettes/extending-coconatfly.Rmd b/vignettes/extending-coconatfly.Rmd
index 089b2f6..8f6c74b 100644
--- a/vignettes/extending-coconatfly.Rmd
+++ b/vignettes/extending-coconatfly.Rmd
@@ -1,8 +1,8 @@
 ---
-title: "Extending coconatfly with external data sources"
+title: "4. Extending coconatfly with external data sources"
 output: rmarkdown::html_vignette
 vignette: >
-  %\VignetteIndexEntry{Extending coconatfly with external data sources}
+  %\VignetteIndexEntry{4. Extending coconatfly with external data sources}
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
 ---