Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "seq_smith"
version = "0.5.0"
version = "0.5.1"
edition = "2021"

[dependencies]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "seq-smith"
version = "0.5.0"
version = "0.5.1"
authors = [
{ name = "Tobias Sargeant", email = "tobias.sargeant@gmail.com" },
]
Expand Down
29 changes: 12 additions & 17 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,12 +407,10 @@ fn traceback(
let mut count = 0;
loop {
// Statistics calculation
let residue_a = params.sa[s_col as usize];
let residue_b = params.sb[s_row as usize];
if residue_a == residue_b {
if params.sa[s_col as usize] == params.sb[s_row as usize] {
stats.num_exact_matches += 1;
} else {
let score = params.match_score(residue_a as usize, residue_b as usize);
let score = params.match_score(s_row as usize, s_col as usize);
if score > 0 {
stats.num_positive_mismatches += 1;
} else {
Expand Down Expand Up @@ -1098,8 +1096,8 @@ fn _top_k_ungapped_local_align_core(
let sb_end = candidate.sb_start + candidate.len;

let overlap = alignments.iter().any(|prev| {
let p_sa_start = (prev.fragments[0].sa_start - 1) as usize; // 0-indexed
let p_sb_start = (prev.fragments[0].sb_start - 1) as usize; // 0-indexed
let p_sa_start = prev.fragments[0].sa_start as usize;
let p_sb_start = prev.fragments[0].sb_start as usize;
let p_sa_end = p_sa_start + prev.fragments[0].len as usize;
let p_sb_end = p_sb_start + prev.fragments[0].len as usize;

Expand All @@ -1119,25 +1117,22 @@ fn _top_k_ungapped_local_align_core(
for i in 0..candidate.len {
let r = candidate.sb_start + i;
let c = candidate.sa_start + i;
let val = params.match_score(c, r);
if params.sa[c] == params.sb[r] {
if params.sb[r] == params.sa[c] {
stats.num_exact_matches += 1;
} else if val > 0 {
} else if params.match_score(r, c) > 0 {
stats.num_positive_mismatches += 1;
} else {
stats.num_negative_mismatches += 1;
}
Comment on lines +1120 to 1126
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This block can be made more efficient. The residues are fetched for the equality check, and then fetched again inside params.match_score. You can fetch them once and reuse the values to directly access the score matrix, avoiding the overhead of the match_score method call and redundant lookups within this hot loop.

                    let residue_a = params.sa[c];
                    let residue_b = params.sb[r];
                    if residue_a == residue_b {
                        stats.num_exact_matches += 1;
                    } else {
                        let score = params.score_matrix[[residue_a as usize, residue_b as usize]];
                        if score > 0 {
                            stats.num_positive_mismatches += 1;
                        } else {
                            stats.num_negative_mismatches += 1;
                        }
                    }

}

let frag = AlignmentFragment {
fragment_type: FragmentType::Match,
sa_start: (candidate.sa_start + 1) as i32,
sb_start: (candidate.sb_start + 1) as i32,
len: candidate.len as i32,
};

alignments.push(Alignment {
fragments: vec![frag],
fragments: vec![AlignmentFragment {
fragment_type: FragmentType::Match,
sa_start: candidate.sa_start as i32,
sb_start: candidate.sb_start as i32,
len: candidate.len as i32,
}],
score: candidate.score,
stats: stats,
});
Expand Down
22 changes: 20 additions & 2 deletions tests/test_seq_smith.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,8 +502,26 @@ def test_top_k_ungapped_simple() -> None:
assert alignments[1].score == 8

starts = sorted([(a.fragments[0].sa_start, a.fragments[0].sb_start) for a in alignments])
assert starts[0] == (1, 1) # 1-based index in fragments
assert starts[1] == (9, 9)
assert starts[0] == (0, 0)
assert starts[1] == (8, 8)


def test_top_k_ungapped_overlap() -> None:
# Use custom alphabet to support Z/W if needed, or just use ACGT
alphabet = "ACGT"
seqa = encode("AAAATTTTCCCCAAAATTTTCCCCAAAATTTTCCCC", alphabet)
seqb = encode("AAAAGGGGCCCC", alphabet)

# matrix: match=2, mismatch=-5
score_matrix = make_score_matrix(alphabet, match_score=2, mismatch_score=-5)

alignments = top_k_ungapped_local_align(seqa, seqb, score_matrix, k=5, filter_overlap_b=False)

assert len(alignments) == 5 # 6 total alignments, but k=5
assert all(a.score == 8 for a in alignments)
starts = sorted([(a.fragments[0].sa_start, a.fragments[0].sb_start) for a in alignments])
for c, r in starts:
assert seqa[c : c + 4] == seqb[r : r + 4]


def test_top_k_ungapped_overlapping_candidates(common_data: AlignmentData) -> None:
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading