From 216d319be70ca002d62ae29ff5e33c0439fd7e31 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 25 Nov 2025 08:08:21 +0000
Subject: [PATCH 1/6] Initial plan


From a50029d3beca929519e3c06f835629866eae4a93 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 25 Nov 2025 08:17:56 +0000
Subject: [PATCH 2/6] Fix MaximumLength crash with datasets having >8 states

Co-authored-by: ms609 <1695515+ms609@users.noreply.github.com>
---
 R/length_range.R                   | 16 ++++++++++-----
 tests/testthat/test-length_range.R | 33 ++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/R/length_range.R b/R/length_range.R
index 21abe73e5..66a7e8d0e 100644
--- a/R/length_range.R
+++ b/R/length_range.R
@@ -128,7 +128,7 @@ MinimumLength.numeric <- function (x, compress = NA) {
       } else {
         squish <- which.max(occurrences)
         tokensUsed <- tokensUsed + 1L
-        tokens <- tokens[, tokens[!squish], drop = FALSE]
+        tokens <- tokens[, !tokens[squish, ], drop = FALSE]
       }
     }
     lastDim <- dim(tokens)
@@ -178,8 +178,13 @@ MaximumLength.numeric <- function(x, compress = NA) {
   nInapp <- sum(x == 0)
   regions <- max(1, nInapp - 1)
   steps <- 0
-  if (sum(counts > 0) > 1) {
+  if (length(counts) > 0 && sum(counts > 0) > 1) {
     nState <- floor(log2(length(counts))) + 1L
+    # Guard against overflow: nState > 30 would cause nToken to overflow
+    # integer capacity and create prohibitively large matrices
+    if (nState > 30L) {
+      stop("MaximumLength() does not support more than 30 character states.")
+    }
     nToken <- 2 ^ nState - 1
     counts <- c(counts, double(nToken - length(counts)))
     
@@ -188,9 +193,10 @@ MaximumLength.numeric <- function(x, compress = NA) {
     tokenSums <- colSums(tokens)
     active <- c(rep(TRUE, nToken - 1), FALSE)
     
-    bitTokens <- as.raw(seq_len(nToken))
-    nonIntersect <- outer(bitTokens, bitTokens, `&`) == 00
-    unions <- matrix(tokenSums[as.integer(outer(bitTokens, bitTokens, `|`))],
+    # Use integer bitwise operations to avoid raw overflow when nToken > 255
+    bitTokens <- seq_len(nToken)
+    nonIntersect <- outer(bitTokens, bitTokens, bitwAnd) == 0L
+    unions <- matrix(tokenSums[outer(bitTokens, bitTokens, bitwOr)],
                      nToken, nToken)
     .Merge <- function(a, b) sum(2 ^ (which(tokens[, a] | tokens[, b]) - 1))
     loopCount <- 0
diff --git a/tests/testthat/test-length_range.R b/tests/testthat/test-length_range.R
index 3e18156cf..62c4288e1 100644
--- a/tests/testthat/test-length_range.R
+++ b/tests/testthat/test-length_range.R
@@ -151,3 +151,36 @@ test_that("MaximumLength() handles inapplicable tokens", {
   expect_equal(MaximumLength("00112233{01}{23}{012}----"), 3 + 3 + 1 + 2)
 })
 
+test_that("MaximumLength() handles many states (>8) without overflow", {
+  # Regression test for crash with datasets having more than 8 character states
+  # When nToken > 255, as.raw() would overflow; now uses bitwAnd/bitwOr instead
+  
+  # Test with 10 states (nToken = 1023)
+  # This should complete without error (the specific result is secondary)
+  manyStates <- c(1, 2, 4, 8, 16, 32, 64, 128, 256, 512)  # 10 distinct states
+  expect_silent(result <- MaximumLength.numeric(manyStates))
+  expect_equal(result, 9)  # max steps for 10 distinct tokens
+  
+  # Test with combination of states including inapplicable (0)
+  manyStatesWithInapp <- c(0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 0)
+  expect_silent(result <- MaximumLength.numeric(manyStatesWithInapp))
+  expect_equal(result, 9)  # 9 steps + 0 extra for inapplicable regions
+  
+  # Test with large combined token value (all 10 states present in one tip)
+  # This is the scenario that triggers the crash with Vinther2008 dataset
+  largeToken <- c(1, 1023)  # 1 = state 0, 1023 = all states 0-9
+  expect_silent(result <- MaximumLength.numeric(largeToken))
+  expect_equal(result, 0)  # state 0 is subset of 1023, so no extra steps needed
+})
+
+test_that("MaximumLength() works with Vinther2008 dataset", {
+  # Regression test for the original crash report
+  # This dataset has 10 states (0-9) which caused overflow
+  data("inapplicable.datasets")
+  expect_silent(result <- MaximumLength(inapplicable.phyData[["Vinther2008"]]))
+  expect_true(is.numeric(result))
+  expect_true(all(result >= 0))
+})
+
+
+

From c83a98bc0658dccea2787ad50f53dc8d0a6b5265 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 25 Nov 2025 08:19:20 +0000
Subject: [PATCH 3/6] Improve test comments for clarity

Co-authored-by: ms609 <1695515+ms609@users.noreply.github.com>
---
 tests/testthat/test-length_range.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/test-length_range.R b/tests/testthat/test-length_range.R
index 62c4288e1..83f236728 100644
--- a/tests/testthat/test-length_range.R
+++ b/tests/testthat/test-length_range.R
@@ -157,7 +157,8 @@ test_that("MaximumLength() handles many states (>8) without overflow", {
   
   # Test with 10 states (nToken = 1023)
   # This should complete without error (the specific result is secondary)
-  manyStates <- c(1, 2, 4, 8, 16, 32, 64, 128, 256, 512)  # 10 distinct states
+  # 10 distinct character states (powers of 2 for bit representation)
+  manyStates <- c(1, 2, 4, 8, 16, 32, 64, 128, 256, 512)
   expect_silent(result <- MaximumLength.numeric(manyStates))
   expect_equal(result, 9)  # max steps for 10 distinct tokens
   
@@ -168,7 +169,8 @@ test_that("MaximumLength() handles many states (>8) without overflow", {
   
   # Test with large combined token value (all 10 states present in one tip)
   # This is the scenario that triggers the crash with Vinther2008 dataset
-  largeToken <- c(1, 1023)  # 1 = state 0, 1023 = all states 0-9
+  # 1 = only state 0, 1023 = all states 0-9 combined (2^10 - 1)
+  largeToken <- c(1, 1023)
   expect_silent(result <- MaximumLength.numeric(largeToken))
   expect_equal(result, 0)  # state 0 is subset of 1023, so no extra steps needed
 })

From 7777d5aa58da0df4a4f20225eaa42182a4cbebd7 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Tue, 25 Nov 2025 10:30:36 +0000
Subject: [PATCH 4/6] tidy

---
 tests/testthat/test-length_range.R | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/testthat/test-length_range.R b/tests/testthat/test-length_range.R
index 83f236728..70f9c7869 100644
--- a/tests/testthat/test-length_range.R
+++ b/tests/testthat/test-length_range.R
@@ -168,7 +168,7 @@ test_that("MaximumLength() handles many states (>8) without overflow", {
   expect_equal(result, 9)  # 9 steps + 0 extra for inapplicable regions
   
   # Test with large combined token value (all 10 states present in one tip)
-  # This is the scenario that triggers the crash with Vinther2008 dataset
+  # This is the scenario that triggered #203
   # 1 = only state 0, 1023 = all states 0-9 combined (2^10 - 1)
   largeToken <- c(1, 1023)
   expect_silent(result <- MaximumLength.numeric(largeToken))
@@ -176,13 +176,9 @@ test_that("MaximumLength() handles many states (>8) without overflow", {
 })
 
 test_that("MaximumLength() works with Vinther2008 dataset", {
-  # Regression test for the original crash report
   # This dataset has 10 states (0-9) which caused overflow
   data("inapplicable.datasets")
   expect_silent(result <- MaximumLength(inapplicable.phyData[["Vinther2008"]]))
   expect_true(is.numeric(result))
   expect_true(all(result >= 0))
 })
-
-
-

From 72723d907e307e6ed9120eafa8de3ff9da8d5d2f Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Tue, 25 Nov 2025 10:35:46 +0000
Subject: [PATCH 5/6] Test failure

---
 tests/testthat/test-length_range.R | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/testthat/test-length_range.R b/tests/testthat/test-length_range.R
index 70f9c7869..0ae93ac32 100644
--- a/tests/testthat/test-length_range.R
+++ b/tests/testthat/test-length_range.R
@@ -173,6 +173,9 @@ test_that("MaximumLength() handles many states (>8) without overflow", {
   largeToken <- c(1, 1023)
   expect_silent(result <- MaximumLength.numeric(largeToken))
   expect_equal(result, 0)  # state 0 is subset of 1023, so no extra steps needed
+  
+  expect_error(expect_warning(MaximumLength.numeric(2 ^ (0:31))),
+               "does not support more than 30 character states")
 })
 
 test_that("MaximumLength() works with Vinther2008 dataset", {

From 45ff0362fb7259529121b55c8444ee9b0349f1fd Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Tue, 25 Nov 2025 12:00:29 +0000
Subject: [PATCH 6/6] Update NEWS.md

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 06e8453af..a08a957e6 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,6 @@
 # TreeSearch 1.7.0.9000 (development)
 
-No changes yet.
+- Fix regression in `MaximumLength()`.
 
 
 # TreeSearch 1.7.0 (2025-08-22)