From 4f8323c460824721892d5e805bc8bbf382121162 Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Mon, 10 Feb 2025 15:01:26 +0000
Subject: [PATCH 01/25] Error checking

---
 R/Concordance.R                   | 10 +++++++++-
 tests/testthat/test-Concordance.R |  9 +++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/R/Concordance.R b/R/Concordance.R
index f47ed056c..6e896df89 100644
--- a/R/Concordance.R
+++ b/R/Concordance.R
@@ -84,7 +84,15 @@ QuartetConcordance <- function (tree, dataset = NULL, weight = TRUE) {
     warning("Cannot calculate concordance without `dataset`.")
     return(NULL)
   }
-  dataset <- dataset[TipLabels(tree)]
+  if (!inherits(dataset, "phyDat")) {
+    stop("`dataset` must be a phyDat object.")
+  }
+  tipLabels <- intersect(TipLabels(tree), names(dataset))
+  if (!length(tipLabels)) {
+    warning("No overlap between tree labels and dataset.")
+    return(NULL)
+  }
+  dataset <- dataset[tipLabels, drop = FALSE]
   splits <- as.Splits(tree, dataset)
   logiSplits <- vapply(seq_along(splits), function (i) as.logical(splits[[i]]),
                        logical(NTip(dataset)))
diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index c3aecd83a..b4ffdc786 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -8,6 +8,13 @@ test_that("_Concordance() handles null input", {
   expect_warning(expect_null(SharedPhylogeneticConcordance(BalancedTree(8), NULL)))
 })
 
+test_that("_Concordance() handles tip mismatch", {
+  char <- MatrixToPhyDat(cbind(c(a = 0, b = 0, c = 0, d = 1, e = 1)))
+  tree <- BalancedTree(5)
+  expect_warning(expect_null(QuartetConcordance(tree, char)),
+                 "No overlap between tree labels and dataset.")
+})
+
 test_that("QuartetConcordance() works", {
   tree <- BalancedTree(8)
   splits <- as.Splits(tree)
@@ -18,6 +25,8 @@ test_that("QuartetConcordance() works", {
                       0, 0, 1, 1, 2, 2, 3, 3,  0,
                       0, 1, 2, 3, 0, 1, 2, 3,  0), 9,
                     dimnames = list(paste0("t", 1:9), NULL))
+  expect_error(QuartetConcordance(tree, mataset),
+               "`dataset` must be a phyDat object")
   dat <- MatrixToPhyDat(mataset)
   expect_equal(unname(QuartetConcordance(tree, dat[, 1])), rep(1, 5))
   # plot(tree); nodelabels();

From 76f9d2a49896f07bb8c3f810f4b5b53366b73bb0 Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Mon, 10 Feb 2025 15:01:35 +0000
Subject: [PATCH 02/25] Temp Test script

---
 inst/concordance.R | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 inst/concordance.R

diff --git a/inst/concordance.R b/inst/concordance.R
new file mode 100644
index 000000000..7521facbc
--- /dev/null
+++ b/inst/concordance.R
@@ -0,0 +1,22 @@
+devtools::load_all()
+use("ape")
+use("TreeTools")
+
+labels <- letters[1:5]
+tr <- PectinateTree(labels)
+plot(tr)
+nodelabels(adj = c(4, 0.5))
+ char <- MatrixToPhyDat(cbind(
+  c(a = 0, b = 0, c = 0, d = 1, e = 1), # 100% concordant with both
+  c(0, 0, 1, 0, 1), # discordant
+  c(0, 1, 0, 0, 1)  
+  ))
+QuartetConcordance(tr, char, weight = FALSE)
+
+Conc <- lapply(labels, function(lab) {
+  quartet <- DropTip(tr, lab)
+  for (i in 1:3) {
+    message(setdiff(labels, lab), " char ", i, ": ", QuartetConcordance(quartet, char[, i]))
+  }
+})
+  

From 586b9223abf5a2d2c65e058c12926368a7b7009f Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Tue, 11 Feb 2025 14:53:01 +0000
Subject: [PATCH 03/25] Draft of method = "minh"

---
 DESCRIPTION                       |   2 +-
 NEWS.md                           |   4 +
 R/Concordance.R                   | 247 ++++++++++++++++++++++--------
 man/SiteConcordance.Rd            |  40 +++--
 tests/testthat/test-Concordance.R |  41 +++++
 5 files changed, 253 insertions(+), 81 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index bf99b9165..929fe0c13 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: TreeSearch
 Title: Phylogenetic Analysis with Discrete Character Data
-Version: 1.5.1.9005
+Version: 1.5.1.9006
 Authors@R: c(
     person(
       "Martin R.", 'Smith',
diff --git a/NEWS.md b/NEWS.md
index 4b8fa9af3..41ba848ba 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+# TreeSearch 1.5.1.9006 (2025-02)
+
+- `QuartetConcordance(method = "minh")` uses the approach of Minh et al. (2020)
+
 # TreeSearch 1.5.1.9005 (2025-02)
 
 - Support for ordered (additive) characters
diff --git a/R/Concordance.R b/R/Concordance.R
index 6e896df89..1e93593a8 100644
--- a/R/Concordance.R
+++ b/R/Concordance.R
@@ -1,6 +1,6 @@
 #' Calculate site concordance factor
 #' 
-#' The site concordance factor \insertCite{Minh2020}{TreeSearch} is a measure 
+#' The site concordance factor is a measure 
 #' of the strength of support that the dataset presents for a given split in a
 #' tree.
 #' 
@@ -12,21 +12,33 @@
 #' with any tree that groups the first two leaves together to the exclusion
 #' of the second.
 #' 
-#' By default, the reported value weights each site by the number of quartets
-#' it is decisive for.  This value can be interpreted as the proportion of
-#' all decisive quartets that are concordant with a split.
-#' If `weight = FALSE`, the reported value is the mean of the concordance
-#' value for each site.  
+#' The `methods` argument selects from different methods for quantifying the
+#' concordance associated with a given edge of a tree.
+#' 
+#' If `method = "split"` (the default), the concordance is calculated by
+#' counting all quartets that are decisive for a split, i.e. for the split
+#' $A|B$, any quartet that contains two leaves from set $A$ and two from set $B$.
+#' This value can be interpreted as the proportion of all decisive quartets
+#' that are concordant with a split.
+#' If `method = "sitemean"`, the reported value is the mean of the concordance
+#' value for each site.
 #' Consider a split associated with two sites:
 #' one that is concordant with 25% of 96 decisive quartets, and
 #' a second that is concordant with 75% of 4 decisive quartets.
-#' If `weight = TRUE`, the split concordance will be 24 + 3 / 96 + 4 = 27%.
-#' If `weight = FALSE`, the split concordance will be mean(75%, 25%) = 50%.
+#' `method = "split"` returns a value of 24 + 3 / 96 + 4 = 27%.
+#' `method = "sitemean"` returns mean(75%, 25%) = 50%.
+#' 
+#' `method = "minh"` uses the approach of
+#' \insertCite{Minh2020;textual}{TreeSearch}.
+#' Briefly, this interprets each edge as defining *four* clades, and counts
+#' the status of quartets that contain exactly one leaf from each of these
+#' clades.  This is a subset of the quartets considered by other methods.
 #' 
-#' `QuartetConcordance()` is computed exactly, using all quartets, where as
-#' other implementations (e.g. IQ-TREE) follow
+#' 
+#' `QuartetConcordance()` is computed exactly, using all quartets.
+#' Other implementations (e.g. IQ-TREE) follow
 #' \insertCite{@Minh2020;textual}{TreeSearch} in using a random subsample
-#'  of quartets for a faster, if potentially less accurate, computation.
+#' of quartets for a faster computation, potentially at the expense of accuracy.
 #' 
 # `ClusteringConcordance()` and `PhylogeneticConcordance()` respectively report
 # the proportion of clustering information and phylogenetic information 
@@ -44,8 +56,9 @@
 #' 
 #' @template treeParam
 #' @template datasetParam
-#' @param weight Logical specifying whether to weight sites according to the
-#' number of quartets they are decisive for.
+#' @param method Character vector specifying which concordance measures to
+#' calculate.  See details section for available options.
+#' 
 #' 
 #' 
 #' 
@@ -79,7 +92,8 @@
 #' @name SiteConcordance
 #' @family split support functions
 #' @export
-QuartetConcordance <- function (tree, dataset = NULL, weight = TRUE) {
+# TODO support `method = c("split", "sitemean", "minh")`
+QuartetConcordance <- function (tree, dataset = NULL, method = "split") {
   if (is.null(dataset)) {
     warning("Cannot calculate concordance without `dataset`.")
     return(NULL)
@@ -93,62 +107,163 @@ QuartetConcordance <- function (tree, dataset = NULL, weight = TRUE) {
     return(NULL)
   }
   dataset <- dataset[tipLabels, drop = FALSE]
-  splits <- as.Splits(tree, dataset)
-  logiSplits <- vapply(seq_along(splits), function (i) as.logical(splits[[i]]),
-                       logical(NTip(dataset)))
-  
   characters <- PhyDatToMatrix(dataset, ambigNA = TRUE)
   
-  cli_progress_bar(name = "Quartet concordance", total = dim(logiSplits)[[2]])
-  setNames(apply(logiSplits, 2, function (split) {
-    cli_progress_update(1, .envir = parent.frame(2))
-    quarts <- apply(characters, 2, function (char) {
-      tab <- table(split, char)
-      nCol <- dim(tab)[[2]]
-      if (nCol > 1L) {
-        # Consider the case
-        # split  0   1   2
-        # FALSE  2   3   0
-        #  TRUE  0   4   2
-        # 
-        # Concordant quartets with bin i = 1 (character 0) and bin j = 2
-        # (character 1) include any combination of the two taxa from 
-        # [FALSE, 0], and the two taxa chosen from the four in [TRUE, 1],
-        # i.e. choose(2, 2) * choose(4, 2)
-        concordant <- sum(vapply(seq_len(nCol), function (i) {
-          inBinI <- tab[1, i]
-          iChoices <- choose(inBinI, 2)
-          sum(vapply(seq_len(nCol)[-i], function (j) {
-            inBinJ <- tab[2, j]
-            iChoices * choose(inBinJ, 2)
-            }, 1))
-        }, 1))
-        
-        # To be discordant, we must select a pair of taxa from TT and from FF;
-        # and the character states must group each T with an F
-        # e.g. T0 T1  F0 F1
-        # T0 T1 F0 F2 would not be discordant - just uninformative
-        discordant <- sum(apply(combn(nCol, 2), 2, function (ij) prod(tab[, ij])))
-        
-        # Only quartets that include two T and two F can be decisive
-        # Quartets must also include two pairs of characters
-        decisive <- concordant + discordant
-        
-        # Return the numerator and denominatory of equation 2 in
-        # Minh et al. 2020
-        c(concordant, decisive)
+  if (method == "minh") {
+    if (attr(tree, "order") != "preorder") {
+      stop("Tree must be in preorder; try `tree <- Preorder(tree)`")
+    }
+    edge <- tree[["edge"]]
+    parent <- edge[, 1]
+    child <- edge[, 2]
+    nTip <- NTip(tree)
+    nodes <- nTip + seq_len(nTip - 1)
+    parentEdge <- match(nodes, child)
+    descended <- DescendantTips(parent, child)
+    childEdges <- vapply(nodes, function(x)
+      which(parent == x), integer(2))
+    daughters <- matrix(child[childEdges], 2)
+    rootNode <- nTip + 1
+    rootChildren <- child[parent == rootNode]
+    nonRootChildren <- setdiff(nodes, c(rootNode, rootChildren))
+    
+    # TODO pull out into a standalone function, perhaps in TreeTools
+    # nodes[-1] omits the root node, which defines no quartet
+    quarters <- cbind(
+      if (all(rootChildren > nTip)) {
+        ret <- integer(nTip)
+        groups <- as.numeric(childEdges[, rootChildren - nTip])
+        for (i in 0:3) {
+          ret[descended[groups[[i + 1]], ]] <- i
+        }
+        `dimnames<-`(cbind(ret), list(TipLabels(tree), rootChildren[[1]]))
+      },
+      `dimnames<-`(vapply(nonRootChildren, function(n) {
+        ret <- integer(nTip)
+        if (parent[[parentEdge[[n - nTip]]]] == rootNode) {
+          
+        }
+        ret[apply(descended[childEdges[, parent[parentEdge[n - nTip]] - nTip], ],
+                  2, any)] <- 1L
+        ce <- childEdges[, n - nTip]
+        # Overprint with own children
+        ret[descended[ce[[1]], ]] <- 2L
+        ret[descended[ce[[2]], ]] <- 3L
+        ret
+      }, integer(nTip)), list(TipLabels(tree), nonRootChildren))
+    )
+    
+    cli_progress_bar(name = "Quartet concordance", total = dim(quarters)[[2]])
+    on.exit(cli_progress_done(.envir = parent.frame(2)))
+    concDeci <- apply(quarters, 2, function (q) {
+      #cli_progress_update(1, .envir = parent.frame(2))
+      quarts <- apply(characters, 2, function (char) {
+        tab <- table(q, char)
+        nCol <- dim(tab)[[2]]
+        if (nCol > 1L) {
+          
+          # RULES:
+          # We must pick one leaf per row
+          # To be parsimony informative, we must pick two leaves from each of
+          #   two columns
+          # To be concordant, leaves 1 and 2 must come from the same column
+          
+          # Example table:
+          #   q  0   1   2
+          #   A  2   3   0
+          #   B  2   3   2
+          #   C  0   1   2
+          #   D  2   1   2
+          # 
+          
+          concordant <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
+            i <- ij[[1]]
+            j <- ij[[2]]
+            # Taxa from A and B from column i, C and D from j
+            prod(tab[1:2, i], tab[3:4, j]) +
+              # Taxa from A and B from column j, C and D from i
+              prod(tab[1:2, j], tab[3:4, i])
+          }))
+          
+          decisive <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
+            # With three columns, we'll encounter i = 1, 1, 2; j = 2, 3, 3
+            i <- ij[[1]]
+            j <- ij[[2]]
+            sum(apply(combn(4, 2), 2, function(kl) {
+              # We can pick taxa AB, AC, AD, BC, BD, CD from column i,
+              # and the other pair from col j
+              pair1 <- kl
+              pair2 <- (1:4)[-kl]
+              prod(tab[pair1, i], tab[pair2, j])
+            }))
+          }))
+          
+          c(concordant, decisive)
+        } else {
+          c(0L, 0L)
+        }
+      })
+      rowSums(quarts)
+    })
+    concDeci[1, ] / concDeci[2, ]
+  } else {
+    splits <- as.Splits(tree, dataset)
+    logiSplits <- vapply(seq_along(splits), function (i) as.logical(splits[[i]]),
+                         logical(NTip(dataset)))
+    cli_progress_bar(name = "Quartet concordance", total = dim(logiSplits)[[2]])
+    on.exit(cli_progress_done(.envir = parent.frame(2)))
+    setNames(apply(logiSplits, 2, function (split) {
+      cli_progress_update(1, .envir = parent.frame(2))
+      quarts <- apply(characters, 2, function (char) {
+        tab <- table(split, char)
+        nCol <- dim(tab)[[2]]
+        if (nCol > 1L) {
+          # Consider the case
+          # split  0   1   2
+          # FALSE  2   3   0
+          #  TRUE  0   4   2
+          # 
+          # Concordant quartets with bin i = 1 (character 0) and bin j = 2
+          # (character 1) include any combination of the two taxa from 
+          # [FALSE, 0], and the two taxa chosen from the four in [TRUE, 1],
+          # i.e. choose(2, 2) * choose(4, 2)
+          concordant <- sum(vapply(seq_len(nCol), function (i) {
+            inBinI <- tab[1, i]
+            iChoices <- choose(inBinI, 2)
+            sum(vapply(seq_len(nCol)[-i], function (j) {
+              inBinJ <- tab[2, j]
+              iChoices * choose(inBinJ, 2)
+              }, 1))
+          }, 1))
+          
+          # To be discordant, we must select a pair of taxa from TT and from FF;
+          # and the character states must group each T with an F
+          # e.g. T0 T1  F0 F1
+          # T0 T1 F0 F2 would not be discordant - just uninformative
+          discordant <- sum(apply(combn(nCol, 2), 2, function (ij) prod(tab[, ij])))
+          
+          # Only quartets that include two T and two F can be decisive
+          # Quartets must also include two pairs of characters
+          decisive <- concordant + discordant
+          
+          # Return the numerator and denominatory of equation 2 in
+          # Minh et al. 2020
+          c(concordant, decisive)
+        } else {
+          c(0L, 0L)
+        }
+      })
+      if (method == "split") {
+        quartSums <- rowSums(quarts)
+        ifelse(is.nan(quartSums[[2]]), NA_real_, quartSums[[1]] / quartSums[[2]])
+      } else if (method == "sitemean") {
+        mean(ifelse(is.nan(quarts[2, ]), NA_real_, quarts[1, ] / quarts[2, ]),
+             na.rm = TRUE)
       } else {
-        c(0L, 0L)
+        stop("Unrecognized method: ", method)
       }
-    })
-    if (isTRUE(weight)) {
-      quartSums <- rowSums(quarts)
-      ifelse(is.nan(quartSums[[2]]), NA_real_, quartSums[[1]] / quartSums[[2]])
-    } else {
-      mean(ifelse(is.nan(quarts[2, ]), NA_real_, quarts[1, ] / quarts[2, ]),
-           na.rm = TRUE)
-    }
-  }), names(splits))
+    }), names(splits))
+  }
 }
 
 #' @importFrom TreeDist Entropy
diff --git a/man/SiteConcordance.Rd b/man/SiteConcordance.Rd
index 85e38d090..8f3ad1df3 100644
--- a/man/SiteConcordance.Rd
+++ b/man/SiteConcordance.Rd
@@ -9,7 +9,7 @@
 \alias{SharedPhylogeneticConcordance}
 \title{Calculate site concordance factor}
 \usage{
-QuartetConcordance(tree, dataset = NULL, weight = TRUE)
+QuartetConcordance(tree, dataset = NULL, method = "split")
 
 ClusteringConcordance(tree, dataset)
 
@@ -23,13 +23,14 @@ SharedPhylogeneticConcordance(tree, dataset)
 \item{tree}{A tree of class \code{\link{phylo}}.}
 
 \item{dataset}{A phylogenetic data matrix of \pkg{phangorn} class
-\code{phyDat}, whose names correspond to the labels of any accompanying tree.}
+\code{phyDat}, whose names correspond to the labels of any accompanying tree.
+Perhaps load into R using \code{\link[TreeTools]{ReadCharacters}}.}
 
-\item{weight}{Logical specifying whether to weight sites according to the
-number of quartets they are decisive for.}
+\item{method}{Character vector specifying which concordance measures to
+calculate.  See details section for available options.}
 }
 \description{
-The site concordance factor \insertCite{Minh2020}{TreeSearch} is a measure
+The site concordance factor is a measure
 of the strength of support that the dataset presents for a given split in a
 tree.
 }
@@ -42,21 +43,32 @@ But a quartet with characters \verb{0 0 1 1} is decisive, and is concordant
 with any tree that groups the first two leaves together to the exclusion
 of the second.
 
-By default, the reported value weights each site by the number of quartets
-it is decisive for.  This value can be interpreted as the proportion of
-all decisive quartets that are concordant with a split.
-If \code{weight = FALSE}, the reported value is the mean of the concordance
+The \code{methods} argument selects from different methods for quantifying the
+concordance associated with a given edge of a tree.
+
+If \code{method = "split"} (the default), the concordance is calculated by
+counting all quartets that are decisive for a split, i.e. for the split
+$A|B$, any quartet that contains two leaves from set $A$ and two from set $B$.
+This value can be interpreted as the proportion of all decisive quartets
+that are concordant with a split.
+If \code{method = "sitemean"}, the reported value is the mean of the concordance
 value for each site.
 Consider a split associated with two sites:
 one that is concordant with 25\% of 96 decisive quartets, and
 a second that is concordant with 75\% of 4 decisive quartets.
-If \code{weight = TRUE}, the split concordance will be 24 + 3 / 96 + 4 = 27\%.
-If \code{weight = FALSE}, the split concordance will be mean(75\%, 25\%) = 50\%.
+\code{method = "split"} returns a value of 24 + 3 / 96 + 4 = 27\%.
+\code{method = "sitemean"} returns mean(75\%, 25\%) = 50\%.
+
+\code{method = "minh"} uses the approach of
+\insertCite{Minh2020;textual}{TreeSearch}.
+Briefly, this interprets each edge as defining \emph{four} clades, and counts
+the status of quartets that contain exactly one leaf from each of these
+clades.  This is a subset of the quartets considered by other methods.
 
-\code{QuartetConcordance()} is computed exactly, using all quartets, where as
-other implementations (e.g. IQ-TREE) follow
+\code{QuartetConcordance()} is computed exactly, using all quartets.
+Other implementations (e.g. IQ-TREE) follow
 \insertCite{@Minh2020;textual}{TreeSearch} in using a random subsample
-of quartets for a faster, if potentially less accurate, computation.
+of quartets for a faster computation, potentially at the expense of accuracy.
 
 \strong{NOTE:} These functions are under development. They are incompletely
 tested, and may change without notice.
diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index b4ffdc786..d2b718e41 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -15,6 +15,46 @@ test_that("_Concordance() handles tip mismatch", {
                  "No overlap between tree labels and dataset.")
 })
 
+test_that("QuartetConcordance(method = minh)", {
+  tree <- ape::read.tree(text = "(a, (b, (c, (d, ((e, f), (g, h))))));")
+  mataset <- matrix(c(0, 0, 0, 0, 1, 1, 1, 1,  0,
+                      0, 1, 1, 0, 0, 0, 2, 2,  0,
+                      0, 0, 0, 1, 0, 1, 1, 1,  0,
+                      0, 0, 0, 0, 1, 1, 2, 2,  0,
+                      0, 0, 1, 1, 2, 2, 3, 3,  0,
+                      0, 1, 2, 3, 0, 1, 2, 3,  0), 9,
+                    dimnames = list(paste0("t", 1:9), NULL))
+  dat <- MatrixToPhyDat(mataset)
+  
+  # plot(tree); nodelabels();
+  minh10 <- rep(1:4, each = 2)
+  minh11 <- c(1, 2, 3, 3, rep(4, 4))
+  minh15 <- c(rep(1, 4), rep(2, 2), 3, 4)
+  
+  # We must pick one leaf per row
+  # To be parsimony informative, we must pick two leaves from each of two columns
+  # To be concordant, leaves 1 and 2 must come from the same column
+  table(minh10, PhyDatToMatrix(dat[, 1])[1:8, 1])
+  concordant10.1 <- 2*2*2*2
+  informative10.1 <- 2*2*2*2
+  
+  table(minh10, PhyDatToMatrix(dat[, 2])[1:8, 1])
+  concordant10.2 <- 0
+  informative10.2 <- 0
+  
+  
+  table(minh15, PhyDatToMatrix(dat[, 2])[1:8, 1])
+  concordant15.2 <- 2*2*1*1
+  informative15.2 <- 2*2*1*1
+
+  table(minh11, PhyDatToMatrix(dat[, 2])[1:8, 1])
+  concordant11.2 <- 0
+  informative11.2 <- 2
+  QuartetConcordance(tree, dat[, 1], method = "minh")
+  expect_equal(unname(QuartetConcordance(tree, dat[, 1])), rep(1, 5))
+})
+  
+
 test_that("QuartetConcordance() works", {
   tree <- BalancedTree(8)
   splits <- as.Splits(tree)
@@ -29,6 +69,7 @@ test_that("QuartetConcordance() works", {
                "`dataset` must be a phyDat object")
   dat <- MatrixToPhyDat(mataset)
   expect_equal(unname(QuartetConcordance(tree, dat[, 1])), rep(1, 5))
+
   # plot(tree); nodelabels();
   expect_equal(QuartetConcordance(tree, dat[, 2]),
                c("10" = 1/9, "11" = 0, "12" = 0,

From 70aece81b8c56fe2f8eb338ff06fe306300f1bf3 Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Tue, 11 Feb 2025 14:56:43 +0000
Subject: [PATCH 04/25] Emphatically failing

---
 tests/testthat/test-Concordance.R | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index d2b718e41..0c08c0c4f 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -18,15 +18,22 @@ test_that("_Concordance() handles tip mismatch", {
 test_that("QuartetConcordance(method = minh)", {
   tree <- ape::read.tree(text = "(a, (b, (c, (d, ((e, f), (g, h))))));")
   mataset <- matrix(c(0, 0, 0, 0, 1, 1, 1, 1,  0,
-                      0, 1, 1, 0, 0, 0, 2, 2,  0,
-                      0, 0, 0, 1, 0, 1, 1, 1,  0,
+                      0, 0, 1, 1, 1, 1, 1, 1,  0,
+                      0, 0, 1, 1, 1, 1, 2, 2,  0,
+                      1, 0, 0, 0, 1, 1, 1, 1,  0,
+                      1, 0, 0, 0, 0, 1, 1, 1,  0,
                       0, 0, 0, 0, 1, 1, 2, 2,  0,
                       0, 0, 1, 1, 2, 2, 3, 3,  0,
                       0, 1, 2, 3, 0, 1, 2, 3,  0), 9,
-                    dimnames = list(paste0("t", 1:9), NULL))
+                    dimnames = list(letters[1:9], NULL))
   dat <- MatrixToPhyDat(mataset)
   
+  expect_error(QuartetConcordance(tree, dat, method = "minh"),
+               "must be in preorder")
+  tree <- Preorder(tree)
+  
   # plot(tree); nodelabels();
+  QuartetConcordance(tree, dat, method = "minh")
   minh10 <- rep(1:4, each = 2)
   minh11 <- c(1, 2, 3, 3, rep(4, 4))
   minh15 <- c(rep(1, 4), rep(2, 2), 3, 4)

From 3b2e8927af84211339fa65cb3af4021f010c2b39 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Mon, 10 Feb 2025 16:00:18 +0000
Subject: [PATCH 05/25] Integrate into unit test

---
 inst/concordance.R                | 22 -------------------
 tests/testthat/test-Concordance.R | 35 ++++++++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 23 deletions(-)
 delete mode 100644 inst/concordance.R

diff --git a/inst/concordance.R b/inst/concordance.R
deleted file mode 100644
index 7521facbc..000000000
--- a/inst/concordance.R
+++ /dev/null
@@ -1,22 +0,0 @@
-devtools::load_all()
-use("ape")
-use("TreeTools")
-
-labels <- letters[1:5]
-tr <- PectinateTree(labels)
-plot(tr)
-nodelabels(adj = c(4, 0.5))
- char <- MatrixToPhyDat(cbind(
-  c(a = 0, b = 0, c = 0, d = 1, e = 1), # 100% concordant with both
-  c(0, 0, 1, 0, 1), # discordant
-  c(0, 1, 0, 0, 1)  
-  ))
-QuartetConcordance(tr, char, weight = FALSE)
-
-Conc <- lapply(labels, function(lab) {
-  quartet <- DropTip(tr, lab)
-  for (i in 1:3) {
-    message(setdiff(labels, lab), " char ", i, ": ", QuartetConcordance(quartet, char[, i]))
-  }
-})
-  
diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index 0c08c0c4f..60e0b7f0e 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -62,7 +62,40 @@ test_that("QuartetConcordance(method = minh)", {
 })
   
 
-test_that("QuartetConcordance() works", {
+test_that("QuartetConcordance() calculates correct values - weighting", {
+  labels <- letters[1:5]
+  tr <- PectinateTree(labels)
+  # plot(tr)
+  # nodelabels(adj = c(4, 0.5))
+  
+  char <- MatrixToPhyDat(cbind(
+    c(a = 0, b = 0, c = 0, d = 1, e = 1),
+    c(0, 0, 1, 0, 1),
+    c(0, 1, 0, 0, 1)
+  ))
+  
+  # Is quartet concordant with character?
+  # Quartets labelled by omitted leaf
+  C <- 1    # Concordant
+  D <- 0    # Discordant
+  I <- NaN  # Irrelevant
+  expected_concordance <- cbind(
+    ch1 = c(a = C, b = C, c = C, d = I, e = I), 
+    ch2 = c(D, D, I, C, I),
+    ch3 = c(D, I, D, D, I))
+  
+  conc <- vapply(labels, function(lab) {
+    quartet <- DropTip(tr, lab)
+    vapply(1:3, function(i) QuartetConcordance(quartet, char[, i]), double(1))
+  }, c(ch1 = NaN, ch2 = NaN, ch3 = NaN))
+  expect_equal(conc, t(expected_concordance))
+  
+  
+  QuartetConcordance(tr, char, weight = FALSE)
+  
+})
+
+test_that("QuartetConcordance() calculates correct values", {
   tree <- BalancedTree(8)
   splits <- as.Splits(tree)
   mataset <- matrix(c(0, 0, 0, 0, 1, 1, 1, 1,  0,

From 0fb6938b158683f8952e9c5b79aaa032a44a3ccb Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Tue, 11 Feb 2025 15:59:25 +0000
Subject: [PATCH 06/25] Multiple copies of characters

---
 tests/testthat/test-Concordance.R | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index 60e0b7f0e..717e756b8 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -65,14 +65,13 @@ test_that("QuartetConcordance(method = minh)", {
 test_that("QuartetConcordance() calculates correct values - weighting", {
   labels <- letters[1:5]
   tr <- PectinateTree(labels)
-  # plot(tr)
-  # nodelabels(adj = c(4, 0.5))
+  # plot(tr); nodelabels(adj = c(4, 0.5))
   
   char <- MatrixToPhyDat(cbind(
     c(a = 0, b = 0, c = 0, d = 1, e = 1),
     c(0, 0, 1, 0, 1),
     c(0, 1, 0, 0, 1)
-  ))
+  )[, c(1:3, 1)])
   
   # Is quartet concordant with character?
   # Quartets labelled by omitted leaf
@@ -80,19 +79,34 @@ test_that("QuartetConcordance() calculates correct values - weighting", {
   D <- 0    # Discordant
   I <- NaN  # Irrelevant
   expected_concordance <- cbind(
-    ch1 = c(a = C, b = C, c = C, d = I, e = I), 
+    ch1 = c(a = C, b = C, c = C, d = I, e = I),
     ch2 = c(D, D, I, C, I),
-    ch3 = c(D, I, D, D, I))
+    ch3 = c(D, I, D, D, I),
+    ch4 = c(a = C, b = C, c = C, d = I, e = I))
   
   conc <- vapply(labels, function(lab) {
     quartet <- DropTip(tr, lab)
-    vapply(1:3, function(i) QuartetConcordance(quartet, char[, i]), double(1))
-  }, c(ch1 = NaN, ch2 = NaN, ch3 = NaN))
+    vapply(1:4, function(i) QuartetConcordance(quartet, char[, i]), double(1))
+  }, c(ch1 = NaN, ch2 = NaN, ch3 = NaN, ch4 = NaN))
   expect_equal(conc, t(expected_concordance))
   
+  split_relevant <- list(
+    "8" = letters[3:5],
+    "9" = letters[1:3])
   
-  QuartetConcordance(tr, char, weight = FALSE)
+  expect_equal(
+    QuartetConcordance(tr, char, weight = FALSE), # 8 = 0.75; 9 = 0.5
+    vapply(split_relevant, function(splitQ) {
+      sum(expected_concordance[splitQ, ], na.rm = TRUE) /
+        sum(!is.nan(expected_concordance[splitQ, ]))
+    }, double(1))
+  )
   
+  # Same proportions, different number of chars -> weights
+  char10 <- char[, rep(1:4, each = 10)]
+  expect_equal(
+    QuartetConcordance(tr, char, weight = FALSE),
+    QuartetConcordance(tr, char10, weight = FALSE))
 })
 
 test_that("QuartetConcordance() calculates correct values", {

From a69db66e12d6cfe02219546ed1b6803faa3f4267 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Tue, 11 Feb 2025 16:42:08 +0000
Subject: [PATCH 07/25] import

---
 NAMESPACE       | 1 +
 R/Concordance.R | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index 98ce38262..5cb839a65 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -142,6 +142,7 @@ importFrom(TreeTools,CladisticInfo)
 importFrom(TreeTools,CompatibleSplits)
 importFrom(TreeTools,ConstrainedNJ)
 importFrom(TreeTools,DescendantEdges)
+importFrom(TreeTools,DescendantTips)
 importFrom(TreeTools,DoubleFactorial)
 importFrom(TreeTools,DropTip)
 importFrom(TreeTools,EdgeAncestry)
diff --git a/R/Concordance.R b/R/Concordance.R
index 1e93593a8..a32f68a6c 100644
--- a/R/Concordance.R
+++ b/R/Concordance.R
@@ -88,7 +88,7 @@
 #' @importFrom ape keep.tip
 #' @importFrom cli cli_progress_bar cli_progress_update
 #' @importFrom utils combn
-#' @importFrom TreeTools as.Splits PhyDatToMatrix TipLabels
+#' @importFrom TreeTools as.Splits DescendantTips PhyDatToMatrix TipLabels
 #' @name SiteConcordance
 #' @family split support functions
 #' @export

From 4fc794f694e19521e1d2738302cdb523d3f28a12 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Thu, 13 Feb 2025 08:21:05 +0000
Subject: [PATCH 08/25] =?UTF-8?q?weight=E2=86=92method?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/testthat/test-Concordance.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index 717e756b8..cce7d6a80 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -95,7 +95,7 @@ test_that("QuartetConcordance() calculates correct values - weighting", {
     "9" = letters[1:3])
   
   expect_equal(
-    QuartetConcordance(tr, char, weight = FALSE), # 8 = 0.75; 9 = 0.5
+    QuartetConcordance(tr, char, method = "split"), # 8 = 0.75; 9 = 0.5
     vapply(split_relevant, function(splitQ) {
       sum(expected_concordance[splitQ, ], na.rm = TRUE) /
         sum(!is.nan(expected_concordance[splitQ, ]))
@@ -105,8 +105,8 @@ test_that("QuartetConcordance() calculates correct values - weighting", {
   # Same proportions, different number of chars -> weights
   char10 <- char[, rep(1:4, each = 10)]
   expect_equal(
-    QuartetConcordance(tr, char, weight = FALSE),
-    QuartetConcordance(tr, char10, weight = FALSE))
+    QuartetConcordance(tr, char, method = "split"),
+    QuartetConcordance(tr, char10, method = "split"))
 })
 
 test_that("QuartetConcordance() calculates correct values", {

From 4c23582b9d6e9bfee9b9a76f56bf80431c33469d Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Thu, 13 Feb 2025 08:24:23 +0000
Subject: [PATCH 09/25] Test Minh concordance

---
 tests/testthat/test-Concordance.R | 58 ++++++++++++++++---------------
 1 file changed, 30 insertions(+), 28 deletions(-)

diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index cce7d6a80..6f6dc7fbe 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -24,7 +24,8 @@ test_that("QuartetConcordance(method = minh)", {
                       1, 0, 0, 0, 0, 1, 1, 1,  0,
                       0, 0, 0, 0, 1, 1, 2, 2,  0,
                       0, 0, 1, 1, 2, 2, 3, 3,  0,
-                      0, 1, 2, 3, 0, 1, 2, 3,  0), 9,
+                      0, 1, 2, 3, 0, 1, 2, 3,  0,
+                      0, 1, 2, 0, 0, 2, 2, 3,  0), 9,
                     dimnames = list(letters[1:9], NULL))
   dat <- MatrixToPhyDat(mataset)
   
@@ -33,34 +34,35 @@ test_that("QuartetConcordance(method = minh)", {
   tree <- Preorder(tree)
   
   # plot(tree); nodelabels();
-  QuartetConcordance(tree, dat, method = "minh")
-  minh10 <- rep(1:4, each = 2)
-  minh11 <- c(1, 2, 3, 3, rep(4, 4))
-  minh15 <- c(rep(1, 4), rep(2, 2), 3, 4)
-  
-  # We must pick one leaf per row
-  # To be parsimony informative, we must pick two leaves from each of two columns
-  # To be concordant, leaves 1 and 2 must come from the same column
-  table(minh10, PhyDatToMatrix(dat[, 1])[1:8, 1])
-  concordant10.1 <- 2*2*2*2
-  informative10.1 <- 2*2*2*2
-  
-  table(minh10, PhyDatToMatrix(dat[, 2])[1:8, 1])
-  concordant10.2 <- 0
-  informative10.2 <- 0
-  
-  
-  table(minh15, PhyDatToMatrix(dat[, 2])[1:8, 1])
-  concordant15.2 <- 2*2*1*1
-  informative15.2 <- 2*2*1*1
-
-  table(minh11, PhyDatToMatrix(dat[, 2])[1:8, 1])
-  concordant11.2 <- 0
-  informative11.2 <- 2
-  QuartetConcordance(tree, dat[, 1], method = "minh")
-  expect_equal(unname(QuartetConcordance(tree, dat[, 1])), rep(1, 5))
+  expect_concordance <- function(i, expectation) {
+    expect_equal(QuartetConcordance(tree, dat[, i], method = "minh"),
+                 setNames(expectation, 11:15))
+  }
+  # Expectations computed by working through tables manually
+  expect_concordance(1, c(NaN, NaN, 1, NaN, NaN))
+  expect_concordance(2, c(1, NaN, NaN, NaN, NaN))
+  expect_concordance(3, c(1, NaN, NaN, NaN, 1))
+  expect_concordance(4, c(0, 0, 1, NaN, NaN))
+  expect_concordance(5, c(0, 0, 4 / 6, 0, 1))
+  expect_concordance(6, rep(NaN, 5))
+  expect_concordance(7, c(1, NaN, NaN, NaN, NaN))
+  expect_concordance(8, c(NaN, NaN, 0, NaN, NaN))
+  expect_concordance(9, c(NaN, 0, 1 / 2, 0, NaN))
+  
+  # Values calculated from summing results above
+  expect_equal(unname(QuartetConcordance(tree, dat, method = "minh")), 
+               c(5 + 3 + 1,
+                 0,
+                 12 + 8 + 4 + 1,
+                 0,
+                 4 + 3) /
+                c(5 + 3 + 4 + 3 + 1,
+                  4 + 3 + 2,
+                  12 + 8 + 6 + 2 + 2,
+                  6 + 2,
+                  4 + 3),
+               tolerance = 0.1)
 })
-  
 
 test_that("QuartetConcordance() calculates correct values - weighting", {
   labels <- letters[1:5]

From 43dc12eb62bbcf130cf978607b1064f9fa3e4dff Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Thu, 13 Feb 2025 08:59:30 +0000
Subject: [PATCH 10/25] test vs IQTree

---
 data-raw/quartet-concordance-tests.R | 52 ++++++++++++++++++++++++++++
 tests/testthat/test-Concordance.R    |  7 ++--
 2 files changed, 57 insertions(+), 2 deletions(-)
 create mode 100644 data-raw/quartet-concordance-tests.R

diff --git a/data-raw/quartet-concordance-tests.R b/data-raw/quartet-concordance-tests.R
new file mode 100644
index 000000000..c4386fb07
--- /dev/null
+++ b/data-raw/quartet-concordance-tests.R
@@ -0,0 +1,52 @@
+library("TreeTools")
+tree <- ape::read.tree(text = "(a, (b, (c, (d, ((e, f), (g, h))))));")
+mataset <- matrix(c(0, 0, 0, 0, 1, 1, 1, 1,  0,
+                    0, 0, 1, 1, 1, 1, 1, 1,  0,
+                    0, 0, 1, 1, 1, 1, 2, 2,  0,
+                    1, 0, 0, 0, 1, 1, 1, 1,  0,
+                    1, 0, 0, 0, 0, 1, 1, 1,  0,
+                    0, 0, 0, 0, 1, 1, 2, 2,  0,
+                    0, 0, 1, 1, 2, 2, 3, 3,  0,
+                    0, 1, 2, 3, 0, 1, 2, 3,  0,
+                    0, 1, 2, 0, 0, 2, 2, 3,  0), 9,
+                  dimnames = list(letters[1:9], NULL))
+dat <- MatrixToPhyDat(mataset[1:8, ])
+iqtreeDir <- stop("Path to folder that contains iqtree2.exe")
+treeFile <- paste0(iqtreeDir, "tree.nwk")
+dataFile <- paste0(iqtreeDir, "data.nex")
+charFile <- paste0(iqtreeDir, "data%i.nex")
+
+write.tree(tree, file = treeFile)
+
+# Expected output per character
+vapply(1:9, function(i) {
+  charIFile <- sprintf(charFile, i)
+  write.nexus.data(dat[, i], file = charIFile, format = "STANDARD")
+  dataLines <- readLines(charIFile)
+  dataLines[[5]] <- sub("MISSING=? GAP=- INTERLEAVE=NO ", "", dataLines[[5]],
+                        fixed = TRUE)
+  writeLines(dataLines, charIFile)
+  system2(paste0(iqtreeDir, "iqtree2.exe"),
+          paste("-t", treeFile, "-s", charIFile,
+                "--scf 100000",
+                "-nt 4"
+          ), stdout = NULL)
+  cf <- read.tree(paste0(treeFile, ".cf.tree"))
+  
+  as.numeric(cf[["node.label"]][-(1:2)])
+}, double(5))
+
+# Expected output across entire dataset
+write.nexus.data(dat, file = dataFile, format = "STANDARD")
+dataLines <- readLines(dataFile)
+dataLines[[5]] <- sub("MISSING=? GAP=- INTERLEAVE=NO ", "", dataLines[[5]], fixed = TRUE)
+writeLines(dataLines, dataFile)
+system2(paste0(iqtreeDir, "iqtree2.exe"),
+        paste("-t", treeFile, "-s", dataFile,
+              "--scf 1000000",
+              "-nt 4"
+        ), stdout = NULL)
+cf <- read.tree(paste0(treeFile, ".cf.tree"))
+plot(cf, show.node.label = TRUE, use.edge.length = FALSE)
+
+as.numeric(cf[["node.label"]][-(1:2)])
diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index 6f6dc7fbe..ae5c6401b 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -49,6 +49,7 @@ test_that("QuartetConcordance(method = minh)", {
   expect_concordance(8, c(NaN, NaN, 0, NaN, NaN))
   expect_concordance(9, c(NaN, 0, 1 / 2, 0, NaN))
   
+  expect_concordance(6, rep(NaN, 5))
   # Values calculated from summing results above
   expect_equal(unname(QuartetConcordance(tree, dat, method = "minh")), 
                c(5 + 3 + 1,
@@ -60,8 +61,10 @@ test_that("QuartetConcordance(method = minh)", {
                   4 + 3 + 2,
                   12 + 8 + 6 + 2 + 2,
                   6 + 2,
-                  4 + 3),
-               tolerance = 0.1)
+                  4 + 3))
+  
+  expect_equal(unname(QuartetConcordance(tree, dat, method = "minh")), 
+               c(56.7, 0, 85.4, 0, 62.5), tolerance = 0.01)
 })
 
 test_that("QuartetConcordance() calculates correct values - weighting", {

From d5c5c9b11ae42d18b3a8aa93bc2d6015418f1aea Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Thu, 13 Feb 2025 10:08:28 +0000
Subject: [PATCH 11/25] Lanfear2024

---
 R/Concordance.R     |  6 +++++-
 inst/REFERENCES.bib | 11 +++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/R/Concordance.R b/R/Concordance.R
index a32f68a6c..796e1bb29 100644
--- a/R/Concordance.R
+++ b/R/Concordance.R
@@ -29,7 +29,8 @@
 #' `method = "sitemean"` returns mean(75%, 25%) = 50%.
 #' 
 #' `method = "minh"` uses the approach of
-#' \insertCite{Minh2020;textual}{TreeSearch}.
+#' \insertCite{Minh2020;textual}{TreeSearch} to compute the site concordance
+#' factor.
 #' Briefly, this interprets each edge as defining *four* clades, and counts
 #' the status of quartets that contain exactly one leaf from each of these
 #' clades.  This is a subset of the quartets considered by other methods.
@@ -49,6 +50,9 @@
 #TODO More thought / explanation needed.
 #' 
 #TODO Finally, `ProfileConcordance()` (to follow)
+#'
+#' For an overview of the use and interpretation of concordance factors,
+#' see \insertCite{Lanfear2024}{TreeSearch}.
 #' 
 #' **NOTE:** These functions are under development. They are incompletely
 #' tested, and may change without notice.
diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib
index 130091ffc..a14f1876f 100644
--- a/inst/REFERENCES.bib
+++ b/inst/REFERENCES.bib
@@ -311,6 +311,17 @@ @article{Kumar2012
   number = {2}
 }
 
+@article{Lanfear2024,
+  title = {The Meaning and Measure of Concordance Factors in Phylogenomics},
+  author = {Lanfear, Robert and Hahn, Matthew W},
+  year = {2024},
+  journal = {Molecular Biology and Evolution},
+  volume = {41},
+  number = {11},
+  pages = {msae214},
+  doi = {10.1093/molbev/msae214}
+}
+
 @article{Maddison1991,
   title = {Null models for the number of evolutionary steps in a character on a phylogenetic tree},
   author = {Maddison, Wayne P. and Slatkin, M},

From 44a2763e638f21766d8d7dccf23964fc7bcc95f5 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Thu, 13 Feb 2025 10:08:46 +0000
Subject: [PATCH 12/25] Update SiteConcordance.Rd

---
 man/SiteConcordance.Rd | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/man/SiteConcordance.Rd b/man/SiteConcordance.Rd
index 8f3ad1df3..53422a9dc 100644
--- a/man/SiteConcordance.Rd
+++ b/man/SiteConcordance.Rd
@@ -60,7 +60,8 @@ a second that is concordant with 75\% of 4 decisive quartets.
 \code{method = "sitemean"} returns mean(75\%, 25\%) = 50\%.
 
 \code{method = "minh"} uses the approach of
-\insertCite{Minh2020;textual}{TreeSearch}.
+\insertCite{Minh2020;textual}{TreeSearch} to compute the site concordance
+factor.
 Briefly, this interprets each edge as defining \emph{four} clades, and counts
 the status of quartets that contain exactly one leaf from each of these
 clades.  This is a subset of the quartets considered by other methods.
@@ -70,6 +71,9 @@ Other implementations (e.g. IQ-TREE) follow
 \insertCite{@Minh2020;textual}{TreeSearch} in using a random subsample
 of quartets for a faster computation, potentially at the expense of accuracy.
 
+For an overview of the use and interpretation of concordance factors,
+see \insertCite{Lanfear2024}{TreeSearch}.
+
 \strong{NOTE:} These functions are under development. They are incompletely
 tested, and may change without notice.
 Complete documentation and discussion will follow in due course.

From 9ba63ffd0f73ede51e33f0fb54c39481b8cd1755 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Thu, 13 Feb 2025 10:38:44 +0000
Subject: [PATCH 13/25] First bash at IQ-TREE impl

Maths not right yet
---
 R/Concordance.R                   | 68 +++++++++++++++++++++++--------
 tests/testthat/test-Concordance.R | 22 +++++++---
 2 files changed, 69 insertions(+), 21 deletions(-)

diff --git a/R/Concordance.R b/R/Concordance.R
index 796e1bb29..9dacc1112 100644
--- a/R/Concordance.R
+++ b/R/Concordance.R
@@ -34,6 +34,9 @@
 #' Briefly, this interprets each edge as defining *four* clades, and counts
 #' the status of quartets that contain exactly one leaf from each of these
 #' clades.  This is a subset of the quartets considered by other methods.
+#' `method = "iqtree"` uses the \insertCite{Minh2020;textual}{TreeSearch}
+#' approach as [implemented in IQ-TREE](
+#' https://github.com/iqtree/iqtree2/issues/415).
 #' 
 #' 
 #' `QuartetConcordance()` is computed exactly, using all quartets.
@@ -113,7 +116,7 @@ QuartetConcordance <- function (tree, dataset = NULL, method = "split") {
   dataset <- dataset[tipLabels, drop = FALSE]
   characters <- PhyDatToMatrix(dataset, ambigNA = TRUE)
   
-  if (method == "minh") {
+  if (method %in% c("minh", "iqtree")) {
     if (attr(tree, "order") != "preorder") {
       stop("Tree must be in preorder; try `tree <- Preorder(tree)`")
     }
@@ -165,11 +168,9 @@ QuartetConcordance <- function (tree, dataset = NULL, method = "split") {
         tab <- table(q, char)
         nCol <- dim(tab)[[2]]
         if (nCol > 1L) {
-          
           # RULES:
           # We must pick one leaf per row
-          # To be parsimony informative, we must pick two leaves from each of
-          #   two columns
+          # Only parsimony informative are decisive
           # To be concordant, leaves 1 and 2 must come from the same column
           
           # Example table:
@@ -189,19 +190,54 @@ QuartetConcordance <- function (tree, dataset = NULL, method = "split") {
               prod(tab[1:2, j], tab[3:4, i])
           }))
           
-          decisive <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
-            # With three columns, we'll encounter i = 1, 1, 2; j = 2, 3, 3
-            i <- ij[[1]]
-            j <- ij[[2]]
-            sum(apply(combn(4, 2), 2, function(kl) {
-              # We can pick taxa AB, AC, AD, BC, BD, CD from column i,
-              # and the other pair from col j
-              pair1 <- kl
-              pair2 <- (1:4)[-kl]
-              prod(tab[pair1, i], tab[pair2, j])
+          if (method == "minh") {
+            # To be parsimony informative, we must pick two leaves from each of
+            #   two columns
+            
+            
+            decisive <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
+              # With three columns, we'll encounter i = 1, 1, 2; j = 2, 3, 3
+              i <- ij[[1]]
+              j <- ij[[2]]
+              sum(apply(combn(4, 2), 2, function(kl) {
+                # We can pick taxa AB, AC, AD, BC, BD, CD from column i,
+                # and the other pair from col j
+                pair1 <- kl
+                pair2 <- (1:4)[-kl]
+                prod(tab[pair1, i], tab[pair2, j])
+              }))
             }))
-          }))
-          
+          } else {
+            # Under the IQ-definition, to be parsimony informative,
+            # we must pick two leaves from one column.
+            # Hence 0012 is parsimony informative (!)
+            
+            parsInf <- sum(vapply(seq_len(nCol), function(i) {
+              sum(apply(combn(4, 2), 2, function(kl) {
+                # We can pick taxa AB, AC, AD, BC, BD, CD from column i,
+                # and the other pair from any other column
+                pair1 <- kl
+                prod(tab[pair1, i], rowSums(tab[-pair1, -i, drop = FALSE]))
+              }))
+            }, double(1)))
+            
+            # inclusion-exclusion principle:
+            countedTwice <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
+              # With three columns, we'll encounter i = 1, 1, 2; j = 2, 3, 3
+              i <- ij[[1]]
+              j <- ij[[2]]
+              sum(apply(combn(4, 2), 2, function(kl) {
+                # We can pick taxa AB, AC, AD, BC, BD, CD from column i,
+                # and the other pair from col j
+                pair1 <- kl
+                pair2 <- (1:4)[-kl]
+                prod(tab[pair1, i], tab[pair2, j])
+              }))
+            }))
+            
+            decisive <- parsInf - countedTwice
+            
+          }
           c(concordant, decisive)
         } else {
           c(0L, 0L)
diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index ae5c6401b..823f458a7 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -34,9 +34,11 @@ test_that("QuartetConcordance(method = minh)", {
   tree <- Preorder(tree)
   
   # plot(tree); nodelabels();
-  expect_concordance <- function(i, expectation) {
-    expect_equal(QuartetConcordance(tree, dat[, i], method = "minh"),
-                 setNames(expectation, 11:15))
+  expect_concordance <- function(i, expectation, iq = "minh") {
+    expect_equal(
+      QuartetConcordance(tree, dat[, i],
+                         method = ifelse(iq == "iq", "iqtree", "minh")),
+      setNames(expectation, 11:15))
   }
   # Expectations computed by working through tables manually
   expect_concordance(1, c(NaN, NaN, 1, NaN, NaN))
@@ -49,7 +51,6 @@ test_that("QuartetConcordance(method = minh)", {
   expect_concordance(8, c(NaN, NaN, 0, NaN, NaN))
   expect_concordance(9, c(NaN, 0, 1 / 2, 0, NaN))
   
-  expect_concordance(6, rep(NaN, 5))
   # Values calculated from summing results above
   expect_equal(unname(QuartetConcordance(tree, dat, method = "minh")), 
                c(5 + 3 + 1,
@@ -63,8 +64,19 @@ test_that("QuartetConcordance(method = minh)", {
                   6 + 2,
                   4 + 3))
   
-  expect_equal(unname(QuartetConcordance(tree, dat, method = "minh")), 
+  # Expectations computed by iq-tree
+  expect_concordance(iq = "iq", 1, c(NaN, NaN, 1, NaN, NaN))
+  expect_concordance(iq = "iq", 2, c(1, NaN, NaN, NaN, NaN))
+  expect_concordance(iq = "iq", 3, c(0.6, NaN, NaN, NaN, 0.5))
+  expect_concordance(iq = "iq", 4, c(0, 0, 2 / 3, NaN, NaN))
+  expect_concordance(iq = "iq", 5, c(0, 0, 1 / 3, 0, 3 / 8))
+  expect_concordance(iq = "iq", 6, rep(NaN, 5))
+  expect_concordance(iq = "iq", 7, c(1 / 5, NaN, NaN, NaN, NaN))
+  expect_concordance(iq = "iq", 8, c(NaN, NaN, 0, NaN, NaN))
+  expect_concordance(iq = "iq", 9, c(NaN, 0, 8.29, 0, NaN))
+  expect_equal(unname(QuartetConcordance(tree, dat, method = "iqtree")), 
                c(56.7, 0, 85.4, 0, 62.5), tolerance = 0.01)
+  
 })
 
 test_that("QuartetConcordance() calculates correct values - weighting", {

From cab7f64cb83feb0ea8eeabf20d7e92013230f758 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Thu, 13 Feb 2025 11:28:58 +0000
Subject: [PATCH 14/25] Update SiteConcordance.Rd

---
 man/SiteConcordance.Rd | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/man/SiteConcordance.Rd b/man/SiteConcordance.Rd
index 53422a9dc..55113b651 100644
--- a/man/SiteConcordance.Rd
+++ b/man/SiteConcordance.Rd
@@ -65,6 +65,8 @@ factor.
 Briefly, this interprets each edge as defining \emph{four} clades, and counts
 the status of quartets that contain exactly one leaf from each of these
 clades.  This is a subset of the quartets considered by other methods.
+\code{method = "iqtree"} uses the \insertCite{Minh2020;textual}{TreeSearch}
+approach as \href{https://github.com/iqtree/iqtree2/issues/415}{implemented in IQ-TREE}.
 
 \code{QuartetConcordance()} is computed exactly, using all quartets.
 Other implementations (e.g. IQ-TREE) follow

From d32d3349612b39dd3c7ca87304a0a4aee2f87631 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Thu, 13 Feb 2025 11:29:08 +0000
Subject: [PATCH 15/25] Correct tests

---
 tests/testthat/test-Concordance.R | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index 823f458a7..69ed1f3ae 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -65,17 +65,17 @@ test_that("QuartetConcordance(method = minh)", {
                   4 + 3))
   
   # Expectations computed by iq-tree
-  expect_concordance(iq = "iq", 1, c(NaN, NaN, 1, NaN, NaN))
-  expect_concordance(iq = "iq", 2, c(1, NaN, NaN, NaN, NaN))
-  expect_concordance(iq = "iq", 3, c(0.6, NaN, NaN, NaN, 0.5))
-  expect_concordance(iq = "iq", 4, c(0, 0, 2 / 3, NaN, NaN))
+  expect_concordance(iq = "iq", 1, c(0, 0, 1, 0, 0))
+  expect_concordance(iq = "iq", 2, c(1, 0, 0, 0, 0))
+  expect_concordance(iq = "iq", 3, c(0.6, 0, 0, 0, 0.5))
+  expect_concordance(iq = "iq", 4, c(0, 0, 2 / 3, 0, 0))
   expect_concordance(iq = "iq", 5, c(0, 0, 1 / 3, 0, 3 / 8))
-  expect_concordance(iq = "iq", 6, rep(NaN, 5))
-  expect_concordance(iq = "iq", 7, c(1 / 5, NaN, NaN, NaN, NaN))
-  expect_concordance(iq = "iq", 8, c(NaN, NaN, 0, NaN, NaN))
-  expect_concordance(iq = "iq", 9, c(NaN, 0, 8.29, 0, NaN))
+  expect_concordance(iq = "iq", 6, c(0, 0, 0, 0, 0))
+  expect_concordance(iq = "iq", 7, c(1 / 5, 0, 0, 0, 0))
+  expect_concordance(iq = "iq", 8, rep(0, 5))
+  expect_concordance(iq = "iq", 9, c(0, 0, 1 / 12, 0, 0))
   expect_equal(unname(QuartetConcordance(tree, dat, method = "iqtree")), 
-               c(56.7, 0, 85.4, 0, 62.5), tolerance = 0.01)
+               c(56.7, 0, 85.4, 0, 62.5) / 100, tolerance = 0.01)
   
 })
 

From 85693aa817a83374d505a6b9b9ebc5469ca3c497 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Thu, 13 Feb 2025 11:29:21 +0000
Subject: [PATCH 16/25] Even simpler (!?)

---
 R/Concordance.R | 30 ++++--------------------------
 1 file changed, 4 insertions(+), 26 deletions(-)

diff --git a/R/Concordance.R b/R/Concordance.R
index 9dacc1112..692ee80f0 100644
--- a/R/Concordance.R
+++ b/R/Concordance.R
@@ -208,34 +208,12 @@ QuartetConcordance <- function (tree, dataset = NULL, method = "split") {
               }))
             }))
           } else {
-            # Under the IQ-definition, to be parsimony informative,
-            # we must pick two leaves from one column.
-            # Hence 0012 is parsimony informative (!)
+            # IQ-TREE seems to use "variant" in place of "parsimony informative"
             
-            parsInf <- sum(vapply(seq_len(nCol), function(i) {
-              sum(apply(combn(4, 2), 2, function(kl) {
-                # We can pick taxa AB, AC, AD, BC, BD, CD from column i,
-                # and the other pair from any other column
-                pair1 <- kl
-                prod(tab[pair1, i], rowSums(tab[-pair1, -i, drop = FALSE]))
-              }))
-            }, double(1)))
-            
-            # inclusion-exclusion principle:
-            countedTwice <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
-              # With three columns, we'll encounter i = 1, 1, 2; j = 2, 3, 3
-              i <- ij[[1]]
-              j <- ij[[2]]
-              sum(apply(combn(4, 2), 2, function(kl) {
-                # We can pick taxa AB, AC, AD, BC, BD, CD from column i,
-                # and the other pair from col j
-                pair1 <- kl
-                pair2 <- (1:4)[-kl]
-                prod(tab[pair1, i], tab[pair2, j])
-              }))
-            }))
+            nPatterns <- prod(rowSums(tab))
+            allSame <- sum(apply(tab, 2, prod))
             
-            decisive <- parsInf - countedTwice
+            decisive <- nPatterns# - allSame
             
           }
           c(concordant, decisive)

From a6ad8f8fa999a32ce87d1aa871a055d09a30bb27 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Thu, 13 Feb 2025 12:31:39 +0000
Subject: [PATCH 17/25] Random quartet sampling required
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Still doesn't quite align – why not?
---
 R/Concordance.R                   | 133 +++++++++++++++++-------------
 R/data_manipulation.R             |   2 +-
 tests/testthat/test-Concordance.R |  47 +++++++++--
 3 files changed, 120 insertions(+), 62 deletions(-)

diff --git a/R/Concordance.R b/R/Concordance.R
index 692ee80f0..0298930ae 100644
--- a/R/Concordance.R
+++ b/R/Concordance.R
@@ -100,7 +100,8 @@
 #' @family split support functions
 #' @export
 # TODO support `method = c("split", "sitemean", "minh")`
-QuartetConcordance <- function (tree, dataset = NULL, method = "split") {
+QuartetConcordance <- function (tree, dataset = NULL, method = "split",
+                                n = 250) {
   if (is.null(dataset)) {
     warning("Cannot calculate concordance without `dataset`.")
     return(NULL)
@@ -116,7 +117,7 @@ QuartetConcordance <- function (tree, dataset = NULL, method = "split") {
   dataset <- dataset[tipLabels, drop = FALSE]
   characters <- PhyDatToMatrix(dataset, ambigNA = TRUE)
   
-  if (method %in% c("minh", "iqtree")) {
+  if (method %in% c("minh", "minhq", "iqtree")) {
     if (attr(tree, "order") != "preorder") {
       stop("Tree must be in preorder; try `tree <- Preorder(tree)`")
     }
@@ -162,68 +163,88 @@ QuartetConcordance <- function (tree, dataset = NULL, method = "split") {
     
     cli_progress_bar(name = "Quartet concordance", total = dim(quarters)[[2]])
     on.exit(cli_progress_done(.envir = parent.frame(2)))
-    concDeci <- apply(quarters, 2, function (q) {
-      #cli_progress_update(1, .envir = parent.frame(2))
-      quarts <- apply(characters, 2, function (char) {
-        tab <- table(q, char)
-        nCol <- dim(tab)[[2]]
-        if (nCol > 1L) {
-          # RULES:
-          # We must pick one leaf per row
-          # Only parsimony informative are decisive
-          # To be concordant, leaves 1 and 2 must come from the same column
-          
-          # Example table:
-          #   q  0   1   2
-          #   A  2   3   0
-          #   B  2   3   2
-          #   C  0   1   2
-          #   D  2   1   2
-          # 
-          
-          concordant <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
-            i <- ij[[1]]
-            j <- ij[[2]]
-            # Taxa from A and B from column i, C and D from j
-            prod(tab[1:2, i], tab[3:4, j]) +
-              # Taxa from A and B from column j, C and D from i
-              prod(tab[1:2, j], tab[3:4, i])
-          }))
-          
-          if (method == "minh") {
-            # To be parsimony informative, we must pick two leaves from each of
-            #   two columns
+    if (method == "minh") {
+      sCF <- apply(quarters, 2, function(abcd) {
+        resample <- function(x) x[sample.int(length(x), 1)]
+        leaves <- replicate(n, {
+          vapply(0:3, function(q) resample(which(abcd == q)), integer(1))
+        })
+        cfQ <- apply(leaves, 2, function(q) {
+          qCh <- `mode<-`(characters[q, , drop = FALSE], "numeric")
+          concordant <- sum(qCh[1, ] == qCh[2, ] &
+            qCh[2, ] != qCh[3, ] &
+            qCh[3, ] == qCh[4, ])
+          decisive <- dim(qCh)[[2]]
+          decisive <- sum(colSums(apply(qCh + 1, 2, tabulate, 4) >= 2) > 1)
+          decisive <- sum(colSums(apply(qCh + 1, 2, tabulate, 4) >= 2) > 0)
+          concordant / decisive
+        })
+        mean(cfQ, na.rm = TRUE)
+      })
+    } else {
+      concDeci <- apply(quarters, 2, function (q) {
+        #cli_progress_update(1, .envir = parent.frame(2))
+        quarts <- apply(characters, 2, function (char) {
+          tab <- table(q, char)
+          nCol <- dim(tab)[[2]]
+          if (nCol > 1L) {
+            # RULES:
+            # We must pick one leaf per row
+            # Only parsimony informative are decisive
+            # To be concordant, leaves 1 and 2 must come from the same column
             
+            # Example table:
+            #   q  0   1   2
+            #   A  2   3   0
+            #   B  2   3   2
+            #   C  0   1   2
+            #   D  2   1   2
+            # 
             
-            decisive <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
-              # With three columns, we'll encounter i = 1, 1, 2; j = 2, 3, 3
+            concordant <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
               i <- ij[[1]]
               j <- ij[[2]]
-              sum(apply(combn(4, 2), 2, function(kl) {
-                # We can pick taxa AB, AC, AD, BC, BD, CD from column i,
-                # and the other pair from col j
-                pair1 <- kl
-                pair2 <- (1:4)[-kl]
-                prod(tab[pair1, i], tab[pair2, j])
-              }))
+              # Taxa from A and B from column i, C and D from j
+              prod(tab[1:2, i], tab[3:4, j]) +
+                # Taxa from A and B from column j, C and D from i
+                prod(tab[1:2, j], tab[3:4, i])
             }))
-          } else {
-            # IQ-TREE seems to use "variant" in place of "parsimony informative"
-            
-            nPatterns <- prod(rowSums(tab))
-            allSame <- sum(apply(tab, 2, prod))
-            
-            decisive <- nPatterns# - allSame
             
+            if (method == "minhq") {
+              # To be parsimony informative, we must pick two leaves from each of
+              #   two columns
+              
+              
+              decisive <- sum(apply(combn(seq_len(nCol), 2), 2, function(ij) {
+                # With three columns, we'll encounter i = 1, 1, 2; j = 2, 3, 3
+                i <- ij[[1]]
+                j <- ij[[2]]
+                sum(apply(combn(4, 2), 2, function(kl) {
+                  # We can pick taxa AB, AC, AD, BC, BD, CD from column i,
+                  # and the other pair from col j
+                  pair1 <- kl
+                  pair2 <- (1:4)[-kl]
+                  prod(tab[pair1, i], tab[pair2, j])
+                }))
+              }))
+            } else {
+              # IQ-TREE seems to use "variant" in place of "parsimony informative"
+              
+              nPatterns <- prod(rowSums(tab))
+              allSame <- sum(apply(tab, 2, prod))
+              
+              decisive <- nPatterns# - allSame
+              
+            }
+            c(concordant, decisive)
+          } else {
+            c(0L, 0L)
           }
-          c(concordant, decisive)
-        } else {
-          c(0L, 0L)
-        }
+        })
+        rowSums(quarts)
       })
-      rowSums(quarts)
-    })
-    concDeci[1, ] / concDeci[2, ]
+      concDeci[1, ] / concDeci[2, ]
+    }
   } else {
     splits <- as.Splits(tree, dataset)
     logiSplits <- vapply(seq_along(splits), function (i) as.logical(splits[[i]]),
diff --git a/R/data_manipulation.R b/R/data_manipulation.R
index d3d4b8b4c..109659ed3 100644
--- a/R/data_manipulation.R
+++ b/R/data_manipulation.R
@@ -22,7 +22,7 @@
 #'   
 #'   - `informative`: logical specifying which characters contain any
 #'     phylogenetic information.
-#'   
+#'
 #'   - `bootstrap`: The character vector 
 #'     \code{c("info.amounts", "split.sizes")}, indicating attributes to sample
 #'      when bootstrapping the dataset (e.g. in Ratchet searches).
diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index 69ed1f3ae..0030176aa 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -15,7 +15,7 @@ test_that("_Concordance() handles tip mismatch", {
                  "No overlap between tree labels and dataset.")
 })
 
-test_that("QuartetConcordance(method = minh)", {
+test_that("QuartetConcordance(method = minhq)", {
   tree <- ape::read.tree(text = "(a, (b, (c, (d, ((e, f), (g, h))))));")
   mataset <- matrix(c(0, 0, 0, 0, 1, 1, 1, 1,  0,
                       0, 0, 1, 1, 1, 1, 1, 1,  0,
@@ -29,15 +29,15 @@ test_that("QuartetConcordance(method = minh)", {
                     dimnames = list(letters[1:9], NULL))
   dat <- MatrixToPhyDat(mataset)
   
-  expect_error(QuartetConcordance(tree, dat, method = "minh"),
+  expect_error(QuartetConcordance(tree, dat, method = "minhq"),
                "must be in preorder")
   tree <- Preorder(tree)
   
   # plot(tree); nodelabels();
-  expect_concordance <- function(i, expectation, iq = "minh") {
+  expect_concordance <- function(i, expectation, iq = "minhq") {
     expect_equal(
       QuartetConcordance(tree, dat[, i],
-                         method = ifelse(iq == "iq", "iqtree", "minh")),
+                         method = ifelse(iq == "iq", "iqtree", "minhq")),
       setNames(expectation, 11:15))
   }
   # Expectations computed by working through tables manually
@@ -52,7 +52,7 @@ test_that("QuartetConcordance(method = minh)", {
   expect_concordance(9, c(NaN, 0, 1 / 2, 0, NaN))
   
   # Values calculated from summing results above
-  expect_equal(unname(QuartetConcordance(tree, dat, method = "minh")), 
+  expect_equal(unname(QuartetConcordance(tree, dat, method = "minhq")), 
                c(5 + 3 + 1,
                  0,
                  12 + 8 + 4 + 1,
@@ -76,6 +76,43 @@ test_that("QuartetConcordance(method = minh)", {
   expect_concordance(iq = "iq", 9, c(0, 0, 1 / 12, 0, 0))
   expect_equal(unname(QuartetConcordance(tree, dat, method = "iqtree")), 
                c(56.7, 0, 85.4, 0, 62.5) / 100, tolerance = 0.01)
+})
+
+test_that("QuartetConcordance(method = minh)", {
+  tree <- Preorder(
+    ape::read.tree(text = "(a, (b, (c, (d, ((e, f), (g, h))))));"))
+  mataset <- matrix(c(0, 0, 0, 0, 1, 1, 1, 1,  0,
+                      0, 0, 1, 1, 1, 1, 1, 1,  0,
+                      0, 0, 1, 1, 1, 1, 2, 2,  0,
+                      1, 0, 0, 0, 1, 1, 1, 1,  0,
+                      1, 0, 0, 0, 0, 1, 1, 1,  0,
+                      0, 0, 0, 0, 1, 1, 2, 2,  0,
+                      0, 0, 1, 1, 2, 2, 3, 3,  0,
+                      0, 1, 2, 3, 0, 1, 2, 3,  0,
+                      0, 1, 2, 0, 0, 2, 2, 3,  0), 9,
+                    dimnames = list(letters[1:9], NULL))
+  dat <- MatrixToPhyDat(mataset)
+  
+  # plot(tree); nodelabels();
+  expect_concordance <- function(i, expectation) {
+    expect_equal(
+      QuartetConcordance(tree, dat[, i], method = "minh", n = 1250),
+      setNames(expectation, 11:15), tolerance = 0.05)
+  }
+
+  # Expectations computed by iq-tree
+  expect_concordance(1, c(0, 0, 1, 0, 0))
+  expect_concordance(2, c(1, 0, 0, 0, 0))
+  expect_concordance(3, c(0.6, 0, 0, 0, 0.5))
+  expect_concordance(4, c(0, 0, 2 / 3, 0, 0))
+  expect_concordance(5, c(0, 0, 1 / 3, 0, 3 / 8))
+  expect_concordance(6, c(0, 0, 0, 0, 0))
+  expect_concordance(7, c(1 / 5, 0, 0, 0, 0))
+  expect_concordance(8, rep(0, 5))
+  expect_concordance(9, c(0, 0, 1 / 12, 0, 0))
+  expect_equal(tolerance = 0.05,
+    unname(QuartetConcordance(tree, dat, method = "minh", n = 1234)),
+    c(56.7, 0, 85.4, 0, 62.5) / 100)
   
 })
 

From ad2079d0e164f2c4be4884e40e3be81ac16505bf Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Mon, 17 Feb 2025 10:55:09 +0000
Subject: [PATCH 18/25] Check multifurcating trees

---
 tests/testthat/test-Concordance.R | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index 0030176aa..9e0b37e92 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -76,6 +76,12 @@ test_that("QuartetConcordance(method = minhq)", {
   expect_concordance(iq = "iq", 9, c(0, 0, 1 / 12, 0, 0))
   expect_equal(unname(QuartetConcordance(tree, dat, method = "iqtree")), 
                c(56.7, 0, 85.4, 0, 62.5) / 100, tolerance = 0.01)
+  
+  collapsed <- CollapseNode(tree, c(12, 13))
+  expect_equal(
+    QuartetConcordance(collapsed, dat, method = "iqtree"), 
+    QuartetConcordance(tree, dat, method = "iqtree")[-c(2, 3)]
+  )
 })
 
 test_that("QuartetConcordance(method = minh)", {
@@ -114,6 +120,11 @@ test_that("QuartetConcordance(method = minh)", {
     unname(QuartetConcordance(tree, dat, method = "minh", n = 1234)),
     c(56.7, 0, 85.4, 0, 62.5) / 100)
   
+  collapsed <- CollapseNode(tree, c(12, 13))
+  expect_equal(tolerance = 0.05,
+    QuartetConcordance(collapsed, dat, method = "minh", n = 1234),
+    QuartetConcordance(tree, dat, method = "minh", n = 1234)[-(2:3)]
+  )
 })
 
 test_that("QuartetConcordance() calculates correct values - weighting", {

From 520e51dec681ddb52752d885b67c09b54209224a Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Mon, 17 Feb 2025 11:09:09 +0000
Subject: [PATCH 19/25] Support multifurcating trees

---
 R/Concordance.R | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/R/Concordance.R b/R/Concordance.R
index 0298930ae..fcbd5c5f9 100644
--- a/R/Concordance.R
+++ b/R/Concordance.R
@@ -125,13 +125,13 @@ QuartetConcordance <- function (tree, dataset = NULL, method = "split",
     parent <- edge[, 1]
     child <- edge[, 2]
     nTip <- NTip(tree)
-    nodes <- nTip + seq_len(nTip - 1)
+    nodes <- min(parent):max(parent)
     parentEdge <- match(nodes, child)
     descended <- DescendantTips(parent, child)
-    childEdges <- vapply(nodes, function(x)
-      which(parent == x), integer(2))
-    daughters <- matrix(child[childEdges], 2)
-    rootNode <- nTip + 1
+    childEdges <- lapply(nodes, function(x) which(parent == x))
+    daughters <- lapply(childEdges, function(e) child[e])
+    
+    rootNode <- nTip + which(is.na(parentEdge))
     rootChildren <- child[parent == rootNode]
     nonRootChildren <- setdiff(nodes, c(rootNode, rootChildren))
     
@@ -140,7 +140,7 @@ QuartetConcordance <- function (tree, dataset = NULL, method = "split",
     quarters <- cbind(
       if (all(rootChildren > nTip)) {
         ret <- integer(nTip)
-        groups <- as.numeric(childEdges[, rootChildren - nTip])
+        groups <- childEdges[[rootChildren - nTip]]
         for (i in 0:3) {
           ret[descended[groups[[i + 1]], ]] <- i
         }
@@ -149,11 +149,13 @@ QuartetConcordance <- function (tree, dataset = NULL, method = "split",
       `dimnames<-`(vapply(nonRootChildren, function(n) {
         ret <- integer(nTip)
         if (parent[[parentEdge[[n - nTip]]]] == rootNode) {
-          
+          stop("Ought something to be here?")
         }
-        ret[apply(descended[childEdges[, parent[parentEdge[n - nTip]] - nTip], ],
-                  2, any)] <- 1L
-        ce <- childEdges[, n - nTip]
+        ret[apply(
+          descended[childEdges[[parent[parentEdge[n - nTip]] - nTip]], ],
+          2,
+          any)] <- 1L
+        ce <- childEdges[[n - nTip]]
         # Overprint with own children
         ret[descended[ce[[1]], ]] <- 2L
         ret[descended[ce[[2]], ]] <- 3L

From bdacf87fa8dc8831ea78401abcf072fe729e56dc Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Mon, 17 Feb 2025 11:23:55 +0000
Subject: [PATCH 20/25] names will differ

---
 tests/testthat/test-Concordance.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index 9e0b37e92..eb0367b32 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -79,8 +79,8 @@ test_that("QuartetConcordance(method = minhq)", {
   
   collapsed <- CollapseNode(tree, c(12, 13))
   expect_equal(
-    QuartetConcordance(collapsed, dat, method = "iqtree"), 
-    QuartetConcordance(tree, dat, method = "iqtree")[-c(2, 3)]
+    unname(QuartetConcordance(collapsed, dat, method = "iqtree")),
+    unname(QuartetConcordance(tree, dat, method = "iqtree"))[-c(2, 3)]
   )
 })
 

From d5e487e43be727bc45e5840e7a36d99a791c026e Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Mon, 17 Feb 2025 11:24:03 +0000
Subject: [PATCH 21/25] Standalone quartering function

---
 R/Concordance.R | 91 +++++++++++++++++++++++++------------------------
 1 file changed, 47 insertions(+), 44 deletions(-)

diff --git a/R/Concordance.R b/R/Concordance.R
index fcbd5c5f9..5c93e62b5 100644
--- a/R/Concordance.R
+++ b/R/Concordance.R
@@ -118,50 +118,7 @@ QuartetConcordance <- function (tree, dataset = NULL, method = "split",
   characters <- PhyDatToMatrix(dataset, ambigNA = TRUE)
   
   if (method %in% c("minh", "minhq", "iqtree")) {
-    if (attr(tree, "order") != "preorder") {
-      stop("Tree must be in preorder; try `tree <- Preorder(tree)`")
-    }
-    edge <- tree[["edge"]]
-    parent <- edge[, 1]
-    child <- edge[, 2]
-    nTip <- NTip(tree)
-    nodes <- min(parent):max(parent)
-    parentEdge <- match(nodes, child)
-    descended <- DescendantTips(parent, child)
-    childEdges <- lapply(nodes, function(x) which(parent == x))
-    daughters <- lapply(childEdges, function(e) child[e])
-    
-    rootNode <- nTip + which(is.na(parentEdge))
-    rootChildren <- child[parent == rootNode]
-    nonRootChildren <- setdiff(nodes, c(rootNode, rootChildren))
-    
-    # TODO pull out into a standalone function, perhaps in TreeTools
-    # nodes[-1] omits the root node, which defines no quartet
-    quarters <- cbind(
-      if (all(rootChildren > nTip)) {
-        ret <- integer(nTip)
-        groups <- childEdges[[rootChildren - nTip]]
-        for (i in 0:3) {
-          ret[descended[groups[[i + 1]], ]] <- i
-        }
-        `dimnames<-`(cbind(ret), list(TipLabels(tree), rootChildren[[1]]))
-      },
-      `dimnames<-`(vapply(nonRootChildren, function(n) {
-        ret <- integer(nTip)
-        if (parent[[parentEdge[[n - nTip]]]] == rootNode) {
-          stop("Ought something to be here?")
-        }
-        ret[apply(
-          descended[childEdges[[parent[parentEdge[n - nTip]] - nTip]], ],
-          2,
-          any)] <- 1L
-        ce <- childEdges[[n - nTip]]
-        # Overprint with own children
-        ret[descended[ce[[1]], ]] <- 2L
-        ret[descended[ce[[2]], ]] <- 3L
-        ret
-      }, integer(nTip)), list(TipLabels(tree), nonRootChildren))
-    )
+    quarters <- .TreeQuarters(tree)
     
     cli_progress_bar(name = "Quartet concordance", total = dim(quarters)[[2]])
     on.exit(cli_progress_done(.envir = parent.frame(2)))
@@ -397,6 +354,52 @@ PhylogeneticConcordance <- function (tree, dataset) {
   support[, 1] / support[, 2]
 }
 
+.TreeQuarters <- function(tree) {
+  if (attr(tree, "order") != "preorder") {
+    stop("Tree must be in preorder; try `tree <- Preorder(tree)`")
+  }
+  edge <- tree[["edge"]]
+  parent <- edge[, 1]
+  child <- edge[, 2]
+  nTip <- NTip(tree)
+  nodes <- min(parent):max(parent)
+  parentEdge <- match(nodes, child)
+  descended <- DescendantTips(parent, child)
+  childEdges <- lapply(nodes, function(x) which(parent == x))
+  daughters <- lapply(childEdges, function(e) child[e])
+  
+  rootNode <- nTip + which(is.na(parentEdge))
+  rootChildren <- child[parent == rootNode]
+  nonRootChildren <- setdiff(nodes, c(rootNode, rootChildren))
+  
+  # Return:
+  cbind(
+    if (all(rootChildren > nTip)) {
+      ret <- integer(nTip)
+      groups <- childEdges[[rootChildren - nTip]]
+      for (i in 0:3) {
+        ret[descended[groups[[i + 1]], ]] <- i
+      }
+      `dimnames<-`(cbind(ret), list(TipLabels(tree), rootChildren[[1]]))
+    },
+    `dimnames<-`(vapply(nonRootChildren, function(n) {
+      ret <- integer(nTip)
+      if (parent[[parentEdge[[n - nTip]]]] == rootNode) {
+        stop("Ought something to be here?")
+      }
+      ret[apply(
+        descended[childEdges[[parent[parentEdge[n - nTip]] - nTip]], ],
+        2,
+        any)] <- 1L
+      ce <- childEdges[[n - nTip]]
+      # Overprint with own children
+      ret[descended[ce[[1]], ]] <- 2L
+      ret[descended[ce[[2]], ]] <- 3L
+      ret
+    }, integer(nTip)), list(TipLabels(tree), nonRootChildren))
+  )
+}
+
 #' @rdname SiteConcordance
 # Mutual clustering information of each split with the split implied by each character
 #' @importFrom TreeDist ClusteringEntropy MutualClusteringInfo

From 020546ace928e99891d3e09e641b4bb59451dea0 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Mon, 17 Feb 2025 12:14:54 +0000
Subject: [PATCH 22/25] Test quarters

Not clear what should be expected in polytomy though!
---
 tests/testthat/test-Concordance.R | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test-Concordance.R b/tests/testthat/test-Concordance.R
index eb0367b32..012991acf 100644
--- a/tests/testthat/test-Concordance.R
+++ b/tests/testthat/test-Concordance.R
@@ -15,8 +15,27 @@ test_that("_Concordance() handles tip mismatch", {
                  "No overlap between tree labels and dataset.")
 })
 
+test_that(".TreeQuarters() gives complete list", {
+  tree <- ape::read.tree(text = "(a, (b, (c, (d, ((e, f), (g, h))))));")
+  coll <- CollapseNode(Preorder(tree), 12:13)
+  expect_error(.TreeQuarters(tree), "must be in preorder")
+  tree <- Preorder(tree)
+  
+  expect_equal(
+    .TreeQuarters(tree),
+    cbind("11" = c(a = 0, b = 1, c = 2, d = 3, e = 3, f = 3, g = 3, h = 3),
+          "12" = c(0, 0, 1, 2, 3, 3, 3, 3),
+          "13" = c(0, 0, 0, 1, 2, 2, 3, 3),
+          "14" = c(rep(0, 4), 2, 3, 1, 1),
+          "15" = c(rep(0, 4), 1, 1, 2, 3))
+  )
+  expect_equal(unname(.TreeQuarters(coll)),
+               unname(.TreeQuarters(Preorder(tree))[, c(1, 4, 5)]))
+})
+
 test_that("QuartetConcordance(method = minhq)", {
   tree <- ape::read.tree(text = "(a, (b, (c, (d, ((e, f), (g, h))))));")
+  tree <- Preorder(tree)
   mataset <- matrix(c(0, 0, 0, 0, 1, 1, 1, 1,  0,
                       0, 0, 1, 1, 1, 1, 1, 1,  0,
                       0, 0, 1, 1, 1, 1, 2, 2,  0,
@@ -29,10 +48,6 @@ test_that("QuartetConcordance(method = minhq)", {
                     dimnames = list(letters[1:9], NULL))
   dat <- MatrixToPhyDat(mataset)
   
-  expect_error(QuartetConcordance(tree, dat, method = "minhq"),
-               "must be in preorder")
-  tree <- Preorder(tree)
-  
   # plot(tree); nodelabels();
   expect_concordance <- function(i, expectation, iq = "minhq") {
     expect_equal(

From fa3f5ad2b95a395e66efa8f5e4cf9da3a853df94 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Fri, 4 Apr 2025 15:46:23 +0100
Subject: [PATCH 23/25] Read as PhyDat

---
 man-roxygen/datasetParam.r      | 2 +-
 man/AdditionTree.Rd             | 2 +-
 man/CharacterLength.Rd          | 2 +-
 man/ConcordantInformation.Rd    | 2 +-
 man/Consistency.Rd              | 2 +-
 man/MaximizeParsimony.Rd        | 2 +-
 man/PlotCharacter.Rd            | 2 +-
 man/SiteConcordance.Rd          | 4 ++--
 man/SuccessiveApproximations.Rd | 2 +-
 man/TaxonInfluence.Rd           | 2 +-
 man/TreeLength.Rd               | 2 +-
 man/TreeSearch.Rd               | 2 +-
 12 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/man-roxygen/datasetParam.r b/man-roxygen/datasetParam.r
index 5038c6a27..81bc53e03 100644
--- a/man-roxygen/datasetParam.r
+++ b/man-roxygen/datasetParam.r
@@ -1,4 +1,4 @@
 #' @param dataset A phylogenetic data matrix of \pkg{phangorn} class
 #' \code{phyDat}, whose names correspond to the labels of any accompanying tree.
-#' Perhaps load into R using \code{\link[TreeTools]{ReadCharacters}}.
+#' Perhaps load into R using \code{\link[TreeTools]{ReadAsPhyDat}}.
 # Defined in TreeTools.  Please propagate any changes there.
diff --git a/man/AdditionTree.Rd b/man/AdditionTree.Rd
index 88fe9b76c..0fbddc4b9 100644
--- a/man/AdditionTree.Rd
+++ b/man/AdditionTree.Rd
@@ -9,7 +9,7 @@ AdditionTree(dataset, concavity = Inf, constraint, sequence)
 \arguments{
 \item{dataset}{A phylogenetic data matrix of \pkg{phangorn} class
 \code{phyDat}, whose names correspond to the labels of any accompanying tree.
-Perhaps load into R using \code{\link[TreeTools]{ReadCharacters}}.}
+Perhaps load into R using \code{\link[TreeTools]{ReadAsPhyDat}}.}
 
 \item{concavity}{Numeric specifying concavity constant for implied step
 weighting.
diff --git a/man/CharacterLength.Rd b/man/CharacterLength.Rd
index 5f217f6c5..1d73c6f5a 100644
--- a/man/CharacterLength.Rd
+++ b/man/CharacterLength.Rd
@@ -17,7 +17,7 @@ FastCharacterLength(tree, dataset)
 
 \item{dataset}{A phylogenetic data matrix of \pkg{phangorn} class
 \code{phyDat}, whose names correspond to the labels of any accompanying tree.
-Perhaps load into R using \code{\link[TreeTools]{ReadCharacters}}.}
+Perhaps load into R using \code{\link[TreeTools]{ReadAsPhyDat}}.}
 
 \item{compress}{Logical specifying whether to retain the compression of a
 \code{phyDat} object or to return a vector specifying to each individual
diff --git a/man/ConcordantInformation.Rd b/man/ConcordantInformation.Rd
index b1d10935d..8cb5ce3de 100644
--- a/man/ConcordantInformation.Rd
+++ b/man/ConcordantInformation.Rd
@@ -17,7 +17,7 @@ ConcordantInfo(tree, dataset)
 
 \item{dataset}{A phylogenetic data matrix of \pkg{phangorn} class
 \code{phyDat}, whose names correspond to the labels of any accompanying tree.
-Perhaps load into R using \code{\link[TreeTools]{ReadCharacters}}.}
+Perhaps load into R using \code{\link[TreeTools]{ReadAsPhyDat}}.}
 }
 \value{
 \code{ConcordantInformation()} returns a named vector with elements:
diff --git a/man/Consistency.Rd b/man/Consistency.Rd
index 2af757710..7720a5928 100644
--- a/man/Consistency.Rd
+++ b/man/Consistency.Rd
@@ -9,7 +9,7 @@ Consistency(dataset, tree, compress = FALSE)
 \arguments{
 \item{dataset}{A phylogenetic data matrix of \pkg{phangorn} class
 \code{phyDat}, whose names correspond to the labels of any accompanying tree.
-Perhaps load into R using \code{\link[TreeTools]{ReadCharacters}}.}
+Perhaps load into R using \code{\link[TreeTools]{ReadAsPhyDat}}.}
 
 \item{tree}{A tree of class \code{\link{phylo}}.}
 
diff --git a/man/MaximizeParsimony.Rd b/man/MaximizeParsimony.Rd
index 8de3f5d24..5256a440d 100644
--- a/man/MaximizeParsimony.Rd
+++ b/man/MaximizeParsimony.Rd
@@ -48,7 +48,7 @@ EasyTreesy()
 \arguments{
 \item{dataset}{A phylogenetic data matrix of \pkg{phangorn} class
 \code{phyDat}, whose names correspond to the labels of any accompanying tree.
-Perhaps load into R using \code{\link[TreeTools]{ReadCharacters}}.}
+Perhaps load into R using \code{\link[TreeTools]{ReadAsPhyDat}}.}
 
 \item{tree}{(optional) A bifurcating tree of class \code{\link{phylo}},
 containing only the tips listed in \code{dataset}, from which the search
diff --git a/man/PlotCharacter.Rd b/man/PlotCharacter.Rd
index 094859f81..c7b917c88 100644
--- a/man/PlotCharacter.Rd
+++ b/man/PlotCharacter.Rd
@@ -26,7 +26,7 @@ PlotCharacter(
 
 \item{dataset}{A phylogenetic data matrix of \pkg{phangorn} class
 \code{phyDat}, whose names correspond to the labels of any accompanying tree.
-Perhaps load into R using \code{\link[TreeTools]{ReadCharacters}}.}
+Perhaps load into R using \code{\link[TreeTools]{ReadAsPhyDat}}.}
 
 \item{char}{Index of character to plot.}
 
diff --git a/man/SiteConcordance.Rd b/man/SiteConcordance.Rd
index 55113b651..40f0269bd 100644
--- a/man/SiteConcordance.Rd
+++ b/man/SiteConcordance.Rd
@@ -9,7 +9,7 @@
 \alias{SharedPhylogeneticConcordance}
 \title{Calculate site concordance factor}
 \usage{
-QuartetConcordance(tree, dataset = NULL, method = "split")
+QuartetConcordance(tree, dataset = NULL, method = "split", n = 250)
 
 ClusteringConcordance(tree, dataset)
 
@@ -24,7 +24,7 @@ SharedPhylogeneticConcordance(tree, dataset)
 
 \item{dataset}{A phylogenetic data matrix of \pkg{phangorn} class
 \code{phyDat}, whose names correspond to the labels of any accompanying tree.
-Perhaps load into R using \code{\link[TreeTools]{ReadCharacters}}.}
+Perhaps load into R using \code{\link[TreeTools]{ReadAsPhyDat}}.}
 
 \item{method}{Character vector specifying which concordance measures to
 calculate.  See details section for available options.}
diff --git a/man/SuccessiveApproximations.Rd b/man/SuccessiveApproximations.Rd
index 774dc45b7..d87888e28 100644
--- a/man/SuccessiveApproximations.Rd
+++ b/man/SuccessiveApproximations.Rd
@@ -26,7 +26,7 @@ SuccessiveWeights(tree, dataset)
 
 \item{dataset}{A phylogenetic data matrix of \pkg{phangorn} class
 \code{phyDat}, whose names correspond to the labels of any accompanying tree.
-Perhaps load into R using \code{\link[TreeTools]{ReadCharacters}}.}
+Perhaps load into R using \code{\link[TreeTools]{ReadAsPhyDat}}.}
 
 \item{outgroup}{if not NULL, taxa on which the tree should be rooted}
 
diff --git a/man/TaxonInfluence.Rd b/man/TaxonInfluence.Rd
index fdfe41d85..ef7510e63 100644
--- a/man/TaxonInfluence.Rd
+++ b/man/TaxonInfluence.Rd
@@ -19,7 +19,7 @@ TaxonInfluence(
 \arguments{
 \item{dataset}{A phylogenetic data matrix of \pkg{phangorn} class
 \code{phyDat}, whose names correspond to the labels of any accompanying tree.
-Perhaps load into R using \code{\link[TreeTools]{ReadCharacters}}.}
+Perhaps load into R using \code{\link[TreeTools]{ReadAsPhyDat}}.}
 
 \item{tree}{Optimal tree or summary tree (of class "phylo") or list of trees
 (of class "list" or "multiPhylo") against which results should be evaluated.
diff --git a/man/TreeLength.Rd b/man/TreeLength.Rd
index 075a2b63c..1481ca255 100644
--- a/man/TreeLength.Rd
+++ b/man/TreeLength.Rd
@@ -31,7 +31,7 @@ uniformly sampled.}
 
 \item{dataset}{A phylogenetic data matrix of \pkg{phangorn} class
 \code{phyDat}, whose names correspond to the labels of any accompanying tree.
-Perhaps load into R using \code{\link[TreeTools]{ReadCharacters}}.}
+Perhaps load into R using \code{\link[TreeTools]{ReadAsPhyDat}}.}
 
 \item{concavity}{Determines the degree to which extra steps beyond the first
 are penalized.  Specify a numeric value to use implied weighting
diff --git a/man/TreeSearch.Rd b/man/TreeSearch.Rd
index 33b0658ff..84680758a 100644
--- a/man/TreeSearch.Rd
+++ b/man/TreeSearch.Rd
@@ -56,7 +56,7 @@ DoNothing(...)
 
 \item{dataset}{A phylogenetic data matrix of \pkg{phangorn} class
 \code{phyDat}, whose names correspond to the labels of any accompanying tree.
-Perhaps load into R using \code{\link[TreeTools]{ReadCharacters}}.}
+Perhaps load into R using \code{\link[TreeTools]{ReadAsPhyDat}}.}
 
 \item{TreeScorer}{function to score a given tree.
 The function will be passed three parameters, corresponding to the

From 0827e86296f22f384ef3e1be6ec70bd68802c382 Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Wed, 9 Apr 2025 14:14:43 +0100
Subject: [PATCH 24/25] /

Link  https://github.com/iqtree/iqtree2/issues/415
---
 data-raw/quartet-concordance-tests.R | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/data-raw/quartet-concordance-tests.R b/data-raw/quartet-concordance-tests.R
index c4386fb07..24bae23dd 100644
--- a/data-raw/quartet-concordance-tests.R
+++ b/data-raw/quartet-concordance-tests.R
@@ -11,14 +11,15 @@ mataset <- matrix(c(0, 0, 0, 0, 1, 1, 1, 1,  0,
                     0, 1, 2, 0, 0, 2, 2, 3,  0), 9,
                   dimnames = list(letters[1:9], NULL))
 dat <- MatrixToPhyDat(mataset[1:8, ])
-iqtreeDir <- stop("Path to folder that contains iqtree2.exe")
-treeFile <- paste0(iqtreeDir, "tree.nwk")
-dataFile <- paste0(iqtreeDir, "data.nex")
-charFile <- paste0(iqtreeDir, "data%i.nex")
+iqtreeDir <- stop("Path to folder that contains iqtree3.exe")
+treeFile <- paste0(iqtreeDir, "/tree.nwk")
+dataFile <- paste0(iqtreeDir, "/data.nex")
+charFile <- paste0(iqtreeDir, "/data%i.nex")
 
 write.tree(tree, file = treeFile)
 
 # Expected output per character
+# See: https://github.com/iqtree/iqtree2/issues/415
 vapply(1:9, function(i) {
   charIFile <- sprintf(charFile, i)
   write.nexus.data(dat[, i], file = charIFile, format = "STANDARD")
@@ -26,7 +27,7 @@ vapply(1:9, function(i) {
   dataLines[[5]] <- sub("MISSING=? GAP=- INTERLEAVE=NO ", "", dataLines[[5]],
                         fixed = TRUE)
   writeLines(dataLines, charIFile)
-  system2(paste0(iqtreeDir, "iqtree2.exe"),
+  system2(paste0(iqtreeDir, "/iqtree3.exe"),
           paste("-t", treeFile, "-s", charIFile,
                 "--scf 100000",
                 "-nt 4"

From d25abefbe9a2aa45d244ca36d5737952e9bbc7b8 Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Wed, 9 Apr 2025 14:21:53 +0100
Subject: [PATCH 25/25] Version: 1.5.1.9007

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 929fe0c13..f113bc582 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: TreeSearch
 Title: Phylogenetic Analysis with Discrete Character Data
-Version: 1.5.1.9006
+Version: 1.5.1.9007
 Authors@R: c(
     person(
       "Martin R.", 'Smith',