From 216d319be70ca002d62ae29ff5e33c0439fd7e31 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 25 Nov 2025 08:08:21 +0000 Subject: [PATCH 1/6] Initial plan From a50029d3beca929519e3c06f835629866eae4a93 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 25 Nov 2025 08:17:56 +0000 Subject: [PATCH 2/6] Fix MaximumLength crash with datasets having >8 states Co-authored-by: ms609 <1695515+ms609@users.noreply.github.com> --- R/length_range.R | 16 ++++++++++----- tests/testthat/test-length_range.R | 33 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/R/length_range.R b/R/length_range.R index 21abe73e5..66a7e8d0e 100644 --- a/R/length_range.R +++ b/R/length_range.R @@ -128,7 +128,7 @@ MinimumLength.numeric <- function (x, compress = NA) { } else { squish <- which.max(occurrences) tokensUsed <- tokensUsed + 1L - tokens <- tokens[, tokens[!squish], drop = FALSE] + tokens <- tokens[, !tokens[squish, ], drop = FALSE] } } lastDim <- dim(tokens) @@ -178,8 +178,13 @@ MaximumLength.numeric <- function(x, compress = NA) { nInapp <- sum(x == 0) regions <- max(1, nInapp - 1) steps <- 0 - if (sum(counts > 0) > 1) { + if (length(counts) > 0 && sum(counts > 0) > 1) { nState <- floor(log2(length(counts))) + 1L + # Guard against overflow: nState > 30 would cause nToken to overflow + # integer capacity and create prohibitively large matrices + if (nState > 30L) { + stop("MaximumLength() does not support more than 30 character states.") + } nToken <- 2 ^ nState - 1 counts <- c(counts, double(nToken - length(counts))) @@ -188,9 +193,10 @@ MaximumLength.numeric <- function(x, compress = NA) { tokenSums <- colSums(tokens) active <- c(rep(TRUE, nToken - 1), FALSE) - bitTokens <- as.raw(seq_len(nToken)) - nonIntersect <- outer(bitTokens, bitTokens, `&`) == 00 - unions <- matrix(tokenSums[as.integer(outer(bitTokens, bitTokens, `|`))], + # Use integer bitwise operations to avoid raw overflow when nToken > 255 + bitTokens <- seq_len(nToken) + nonIntersect <- outer(bitTokens, bitTokens, bitwAnd) == 0L + unions <- matrix(tokenSums[outer(bitTokens, bitTokens, bitwOr)], nToken, nToken) .Merge <- function(a, b) sum(2 ^ (which(tokens[, a] | tokens[, b]) - 1)) loopCount <- 0 diff --git a/tests/testthat/test-length_range.R b/tests/testthat/test-length_range.R index 3e18156cf..62c4288e1 100644 --- a/tests/testthat/test-length_range.R +++ b/tests/testthat/test-length_range.R @@ -151,3 +151,36 @@ test_that("MaximumLength() handles inapplicable tokens", { expect_equal(MaximumLength("00112233{01}{23}{012}----"), 3 + 3 + 1 + 2) }) +test_that("MaximumLength() handles many states (>8) without overflow", { + # Regression test for crash with datasets having more than 8 character states + # When nToken > 255, as.raw() would overflow; now uses bitwAnd/bitwOr instead + + # Test with 10 states (nToken = 1023) + # This should complete without error (the specific result is secondary) + manyStates <- c(1, 2, 4, 8, 16, 32, 64, 128, 256, 512) # 10 distinct states + expect_silent(result <- MaximumLength.numeric(manyStates)) + expect_equal(result, 9) # max steps for 10 distinct tokens + + # Test with combination of states including inapplicable (0) + manyStatesWithInapp <- c(0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 0) + expect_silent(result <- MaximumLength.numeric(manyStatesWithInapp)) + expect_equal(result, 9) # 9 steps + 0 extra for inapplicable regions + + # Test with large combined token value (all 10 states present in one tip) + # This is the scenario that triggers the crash with Vinther2008 dataset + largeToken <- c(1, 1023) # 1 = state 0, 1023 = all states 0-9 + expect_silent(result <- MaximumLength.numeric(largeToken)) + expect_equal(result, 0) # state 0 is subset of 1023, so no extra steps needed +}) + +test_that("MaximumLength() works with Vinther2008 dataset", { + # Regression test for the original crash report + # This dataset has 10 states (0-9) which caused overflow + data("inapplicable.datasets") + expect_silent(result <- MaximumLength(inapplicable.phyData[["Vinther2008"]])) + expect_true(is.numeric(result)) + expect_true(all(result >= 0)) +}) + + + From c83a98bc0658dccea2787ad50f53dc8d0a6b5265 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 25 Nov 2025 08:19:20 +0000 Subject: [PATCH 3/6] Improve test comments for clarity Co-authored-by: ms609 <1695515+ms609@users.noreply.github.com> --- tests/testthat/test-length_range.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-length_range.R b/tests/testthat/test-length_range.R index 62c4288e1..83f236728 100644 --- a/tests/testthat/test-length_range.R +++ b/tests/testthat/test-length_range.R @@ -157,7 +157,8 @@ test_that("MaximumLength() handles many states (>8) without overflow", { # Test with 10 states (nToken = 1023) # This should complete without error (the specific result is secondary) - manyStates <- c(1, 2, 4, 8, 16, 32, 64, 128, 256, 512) # 10 distinct states + # 10 distinct character states (powers of 2 for bit representation) + manyStates <- c(1, 2, 4, 8, 16, 32, 64, 128, 256, 512) expect_silent(result <- MaximumLength.numeric(manyStates)) expect_equal(result, 9) # max steps for 10 distinct tokens @@ -168,7 +169,8 @@ test_that("MaximumLength() handles many states (>8) without overflow", { # Test with large combined token value (all 10 states present in one tip) # This is the scenario that triggers the crash with Vinther2008 dataset - largeToken <- c(1, 1023) # 1 = state 0, 1023 = all states 0-9 + # 1 = only state 0, 1023 = all states 0-9 combined (2^10 - 1) + largeToken <- c(1, 1023) expect_silent(result <- MaximumLength.numeric(largeToken)) expect_equal(result, 0) # state 0 is subset of 1023, so no extra steps needed }) From 7777d5aa58da0df4a4f20225eaa42182a4cbebd7 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Tue, 25 Nov 2025 10:30:36 +0000 Subject: [PATCH 4/6] tidy --- tests/testthat/test-length_range.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/testthat/test-length_range.R b/tests/testthat/test-length_range.R index 83f236728..70f9c7869 100644 --- a/tests/testthat/test-length_range.R +++ b/tests/testthat/test-length_range.R @@ -168,7 +168,7 @@ test_that("MaximumLength() handles many states (>8) without overflow", { expect_equal(result, 9) # 9 steps + 0 extra for inapplicable regions # Test with large combined token value (all 10 states present in one tip) - # This is the scenario that triggers the crash with Vinther2008 dataset + # This is the scenario that triggered #203 # 1 = only state 0, 1023 = all states 0-9 combined (2^10 - 1) largeToken <- c(1, 1023) expect_silent(result <- MaximumLength.numeric(largeToken)) @@ -176,13 +176,9 @@ test_that("MaximumLength() handles many states (>8) without overflow", { }) test_that("MaximumLength() works with Vinther2008 dataset", { - # Regression test for the original crash report # This dataset has 10 states (0-9) which caused overflow data("inapplicable.datasets") expect_silent(result <- MaximumLength(inapplicable.phyData[["Vinther2008"]])) expect_true(is.numeric(result)) expect_true(all(result >= 0)) }) - - - From 72723d907e307e6ed9120eafa8de3ff9da8d5d2f Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Tue, 25 Nov 2025 10:35:46 +0000 Subject: [PATCH 5/6] Test failure --- tests/testthat/test-length_range.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/testthat/test-length_range.R b/tests/testthat/test-length_range.R index 70f9c7869..0ae93ac32 100644 --- a/tests/testthat/test-length_range.R +++ b/tests/testthat/test-length_range.R @@ -173,6 +173,9 @@ test_that("MaximumLength() handles many states (>8) without overflow", { largeToken <- c(1, 1023) expect_silent(result <- MaximumLength.numeric(largeToken)) expect_equal(result, 0) # state 0 is subset of 1023, so no extra steps needed + + expect_error(expect_warning(MaximumLength.numeric(2 ^ (0:31))), + "does not support more than 30 character states") }) test_that("MaximumLength() works with Vinther2008 dataset", { From 45ff0362fb7259529121b55c8444ee9b0349f1fd Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Tue, 25 Nov 2025 12:00:29 +0000 Subject: [PATCH 6/6] Update NEWS.md --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 06e8453af..a08a957e6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # TreeSearch 1.7.0.9000 (development) -No changes yet. +- Fix regression in `MaximumLength()`. # TreeSearch 1.7.0 (2025-08-22)