From 3bfcb7fe7eb5b4bdb114058f61ec08a99894e060 Mon Sep 17 00:00:00 2001 From: Tobias Sargeant Date: Sun, 28 Dec 2025 15:07:15 +1100 Subject: [PATCH] fix: resolve indexing issues in traceback and bump to 0.5.1 --- Cargo.toml | 2 +- pyproject.toml | 2 +- src/lib.rs | 29 ++++++++++++----------------- tests/test_seq_smith.py | 22 ++++++++++++++++++++-- uv.lock | 2 +- 5 files changed, 35 insertions(+), 22 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 16bb9df..15c3f4a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "seq_smith" -version = "0.5.0" +version = "0.5.1" edition = "2021" [dependencies] diff --git a/pyproject.toml b/pyproject.toml index bf306ec..e7663a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "seq-smith" -version = "0.5.0" +version = "0.5.1" authors = [ { name = "Tobias Sargeant", email = "tobias.sargeant@gmail.com" }, ] diff --git a/src/lib.rs b/src/lib.rs index 349d584..81a1627 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -407,12 +407,10 @@ fn traceback( let mut count = 0; loop { // Statistics calculation - let residue_a = params.sa[s_col as usize]; - let residue_b = params.sb[s_row as usize]; - if residue_a == residue_b { + if params.sa[s_col as usize] == params.sb[s_row as usize] { stats.num_exact_matches += 1; } else { - let score = params.match_score(residue_a as usize, residue_b as usize); + let score = params.match_score(s_row as usize, s_col as usize); if score > 0 { stats.num_positive_mismatches += 1; } else { @@ -1098,8 +1096,8 @@ fn _top_k_ungapped_local_align_core( let sb_end = candidate.sb_start + candidate.len; let overlap = alignments.iter().any(|prev| { - let p_sa_start = (prev.fragments[0].sa_start - 1) as usize; // 0-indexed - let p_sb_start = (prev.fragments[0].sb_start - 1) as usize; // 0-indexed + let p_sa_start = prev.fragments[0].sa_start as usize; + let p_sb_start = prev.fragments[0].sb_start as usize; let p_sa_end = p_sa_start + prev.fragments[0].len as usize; let p_sb_end = p_sb_start + prev.fragments[0].len as usize; @@ -1119,25 +1117,22 @@ fn _top_k_ungapped_local_align_core( for i in 0..candidate.len { let r = candidate.sb_start + i; let c = candidate.sa_start + i; - let val = params.match_score(c, r); - if params.sa[c] == params.sb[r] { + if params.sb[r] == params.sa[c] { stats.num_exact_matches += 1; - } else if val > 0 { + } else if params.match_score(r, c) > 0 { stats.num_positive_mismatches += 1; } else { stats.num_negative_mismatches += 1; } } - let frag = AlignmentFragment { - fragment_type: FragmentType::Match, - sa_start: (candidate.sa_start + 1) as i32, - sb_start: (candidate.sb_start + 1) as i32, - len: candidate.len as i32, - }; - alignments.push(Alignment { - fragments: vec![frag], + fragments: vec![AlignmentFragment { + fragment_type: FragmentType::Match, + sa_start: candidate.sa_start as i32, + sb_start: candidate.sb_start as i32, + len: candidate.len as i32, + }], score: candidate.score, stats: stats, }); diff --git a/tests/test_seq_smith.py b/tests/test_seq_smith.py index 00aa62d..4911f2e 100644 --- a/tests/test_seq_smith.py +++ b/tests/test_seq_smith.py @@ -502,8 +502,26 @@ def test_top_k_ungapped_simple() -> None: assert alignments[1].score == 8 starts = sorted([(a.fragments[0].sa_start, a.fragments[0].sb_start) for a in alignments]) - assert starts[0] == (1, 1) # 1-based index in fragments - assert starts[1] == (9, 9) + assert starts[0] == (0, 0) + assert starts[1] == (8, 8) + + +def test_top_k_ungapped_overlap() -> None: + # Use custom alphabet to support Z/W if needed, or just use ACGT + alphabet = "ACGT" + seqa = encode("AAAATTTTCCCCAAAATTTTCCCCAAAATTTTCCCC", alphabet) + seqb = encode("AAAAGGGGCCCC", alphabet) + + # matrix: match=2, mismatch=-5 + score_matrix = make_score_matrix(alphabet, match_score=2, mismatch_score=-5) + + alignments = top_k_ungapped_local_align(seqa, seqb, score_matrix, k=5, filter_overlap_b=False) + + assert len(alignments) == 5 # 6 total alignments, but k=5 + assert all(a.score == 8 for a in alignments) + starts = sorted([(a.fragments[0].sa_start, a.fragments[0].sb_start) for a in alignments]) + for c, r in starts: + assert seqa[c : c + 4] == seqb[r : r + 4] def test_top_k_ungapped_overlapping_candidates(common_data: AlignmentData) -> None: diff --git a/uv.lock b/uv.lock index 0828b28..fff5f06 100644 --- a/uv.lock +++ b/uv.lock @@ -1019,7 +1019,7 @@ wheels = [ [[package]] name = "seq-smith" -version = "0.4.0" +version = "0.5.1" source = { editable = "." } dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },