From c8f5f873936032e8369f01a2de0d5e41b30c3d10 Mon Sep 17 00:00:00 2001 From: maxrossi91 Date: Fri, 19 Nov 2021 12:07:39 -0500 Subject: [PATCH 1/5] Add option to avoid building tests --- CMakeLists.txt | 99 ++++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 48 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6311a40..b26c7a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -85,56 +85,59 @@ target_link_libraries(leviosam ${SDSL_LIB}) target_link_libraries(leviosam pthread) target_link_libraries(leviosam z) - -# Download and unpack googletest at configure time -configure_file(CMakeLists.txt.in googletest-download/CMakeLists.txt) -execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download ) -if(result) - message(FATAL_ERROR "CMake step for googletest failed: ${result}") -endif() -execute_process(COMMAND ${CMAKE_COMMAND} --build . - RESULT_VARIABLE result - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download ) -if(result) - message(FATAL_ERROR "Build step for googletest failed: ${result}") +option(BUILD_TESTS "Set OFF to not compile the tests" ON) + +if(${BUILD_TESTS}) + # Download and unpack googletest at configure time + configure_file(CMakeLists.txt.in googletest-download/CMakeLists.txt) + execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download ) + if(result) + message(FATAL_ERROR "CMake step for googletest failed: ${result}") + endif() + execute_process(COMMAND ${CMAKE_COMMAND} --build . + RESULT_VARIABLE result + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download ) + if(result) + message(FATAL_ERROR "Build step for googletest failed: ${result}") + endif() + + # Prevent overriding the parent project's compiler/linker + # settings on Windows + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + + enable_testing() + + # Add googletest directly to our build. This defines + # the gtest and gtest_main targets. + add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/googletest-src + ${CMAKE_CURRENT_BINARY_DIR}/googletest-build + EXCLUDE_FROM_ALL) + + + add_executable(leviosam_test src/leviosam_test.cpp) + target_link_libraries(leviosam_test lvsam) + target_link_libraries(leviosam_test ${HTS_LIB}) + target_link_libraries(leviosam_test ${SDSL_LIB}) + target_link_libraries(leviosam_test gtest gtest_main) + + add_test(NAME leviosam_test COMMAND leviosam_test + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/testdata + ) + + + add_executable(chain_test src/chain_test.cpp) + target_link_libraries(chain_test lvsam) + target_link_libraries(chain_test ${HTS_LIB}) + target_link_libraries(chain_test ${SDSL_LIB}) + target_link_libraries(chain_test gtest gtest_main) + + add_test(NAME chain_test COMMAND chain_test + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/testdata + ) endif() -# Prevent overriding the parent project's compiler/linker -# settings on Windows -set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) - -enable_testing() - -# Add googletest directly to our build. This defines -# the gtest and gtest_main targets. -add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/googletest-src - ${CMAKE_CURRENT_BINARY_DIR}/googletest-build - EXCLUDE_FROM_ALL) - - -add_executable(leviosam_test src/leviosam_test.cpp) -target_link_libraries(leviosam_test lvsam) -target_link_libraries(leviosam_test ${HTS_LIB}) -target_link_libraries(leviosam_test ${SDSL_LIB}) -target_link_libraries(leviosam_test gtest gtest_main) - -add_test(NAME leviosam_test COMMAND leviosam_test - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/testdata -) - - -add_executable(chain_test src/chain_test.cpp) -target_link_libraries(chain_test lvsam) -target_link_libraries(chain_test ${HTS_LIB}) -target_link_libraries(chain_test ${SDSL_LIB}) -target_link_libraries(chain_test gtest gtest_main) - -add_test(NAME chain_test COMMAND chain_test - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/testdata -) - INSTALL(TARGETS leviosam DESTINATION bin) INSTALL(FILES src/leviosam.hpp DESTINATION include) From d006ff3e9d4c65827ea0550e8903a6476873d49e Mon Sep 17 00:00:00 2001 From: maxrossi91 Date: Tue, 23 Nov 2021 03:53:41 -0500 Subject: [PATCH 2/5] Update serialize and load for compatibility with sdsl --- src/leviosam.hpp | 18 +++++++++--------- src/leviosam_utils.cpp | 8 ++++---- src/leviosam_utils.hpp | 8 ++++---- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/leviosam.hpp b/src/leviosam.hpp index ca2f64d..89ebc6d 100644 --- a/src/leviosam.hpp +++ b/src/leviosam.hpp @@ -99,7 +99,7 @@ namespace lift { // Serialization class Name2IdxMap: public std::unordered_map { public: - size_t serialize(std::ofstream& out) { + size_t serialize(std::ostream& out) { size_t size = 0; size_t map_size = this->size(); out.write(reinterpret_cast(&map_size), sizeof(map_size)); @@ -113,7 +113,7 @@ class Name2IdxMap: public std::unordered_map { return size; } - void load(std::ifstream& in) { + void load(std::istream& in) { size_t map_size; in.read(reinterpret_cast(&map_size), sizeof(map_size)); for (auto i = 0; i < map_size; ++i) { @@ -133,7 +133,7 @@ class Name2IdxMap: public std::unordered_map { class Name2NameMap : public std::unordered_map { public: - size_t serialize(std::ofstream& out) { + size_t serialize(std::ostream& out) { size_t size = 0; size_t map_size = this->size(); out.write(reinterpret_cast(&map_size), sizeof(map_size)); @@ -150,7 +150,7 @@ class Name2NameMap : public std::unordered_map { return size; } - void load(std::ifstream& in) { + void load(std::istream& in) { size_t map_size; in.read(reinterpret_cast(&map_size), sizeof(map_size)); for (auto i = 0; i < map_size; ++i) { @@ -179,7 +179,7 @@ class Lift { init_rs_sls(); } - Lift(std::ifstream& in) { + Lift(std::istream& in) { this->load(in); } @@ -371,7 +371,7 @@ class Lift { // } // Save to stream - size_t serialize(std::ofstream& out) const { + size_t serialize(std::ostream& out) const { size_t size = 0; size += ins.serialize(out); size += del.serialize(out); @@ -416,7 +416,7 @@ class LiftMap { ~LiftMap() {} - LiftMap(std::ifstream& in) { + LiftMap(std::istream& in) { this->load(in); } @@ -691,7 +691,7 @@ class LiftMap { } // saves to stream - size_t serialize(std::ofstream& out) { + size_t serialize(std::ostream& out) { size_t size = 0; size_t nelems = lmap.size(); out.write(reinterpret_cast(&nelems), sizeof(nelems)); @@ -707,7 +707,7 @@ class LiftMap { } // loads from stream - void load(std::ifstream& in) { + void load(std::istream& in) { size_t nelems; in.read(reinterpret_cast(&nelems), sizeof(nelems)); for (auto i = 0; i < nelems; ++i) { diff --git a/src/leviosam_utils.cpp b/src/leviosam_utils.cpp index 49d4aed..e15d776 100644 --- a/src/leviosam_utils.cpp +++ b/src/leviosam_utils.cpp @@ -293,7 +293,7 @@ FastqRecord::~FastqRecord() { /* Write a FastqRecord object to a FASTQ file */ -int FastqRecord::write(std::ofstream& out_fq, std::string name) { +int FastqRecord::write(std::ostream& out_fq, std::string name) { if (aln != NULL) { seq_str = get_read(aln); std::string qual_seq(""); @@ -320,7 +320,7 @@ int FastqRecord::write(std::ofstream& out_fq, std::string name) { */ fastq_map read_deferred_bam( samFile* dsam_fp, samFile* out_dsam_fp, sam_hdr_t* hdr, - std::ofstream& out_r1_fp, std::ofstream& out_r2_fp + std::ostream& out_r1_fp, std::ostream& out_r2_fp ) { fastq_map reads1, reads2; bam1_t* aln = bam_init1(); @@ -511,7 +511,7 @@ sam_hdr_t* fai_to_hdr(std::string fai_fn, const sam_hdr_t* const hdr_orig) { // Serialize a `vector>` object size_t serialize_lengthmap( - std::ofstream& out, std::vector> length_map + std::ostream& out, std::vector> length_map ) { size_t size = 0; size_t nelems = length_map.size(); @@ -529,7 +529,7 @@ size_t serialize_lengthmap( // Load a serialized `vector>` object -std::vector> load_lengthmap(std::ifstream& in) { +std::vector> load_lengthmap(std::istream& in) { size_t map_size; in.read(reinterpret_cast(&map_size), sizeof(map_size)); std::vector> length_map; diff --git a/src/leviosam_utils.hpp b/src/leviosam_utils.hpp index e448a3b..ad562f2 100644 --- a/src/leviosam_utils.hpp +++ b/src/leviosam_utils.hpp @@ -58,7 +58,7 @@ class FastqRecord { } } - int write(std::ofstream& out_fq, std::string name); + int write(std::ostream& out_fq, std::string name); std::string seq_str; std::string qual_str; bam1_t* aln = NULL; @@ -70,7 +70,7 @@ fastq_map read_unpaired_fq( const std::string& fq_fname); fastq_map read_deferred_bam( samFile* dsam_fp, samFile* out_dsam_fp, sam_hdr_t* hdr, - std::ofstream& out_r1_fp, std::ofstream& out_r2_fp); + std::ostream& out_r1_fp, std::ostream& out_r2_fp); class WriteDeferred { public: @@ -126,9 +126,9 @@ sam_hdr_t* lengthmap_to_hdr( std::vector> lm, const sam_hdr_t* const hdr_orig); -std::vector> load_lengthmap(std::ifstream& in); +std::vector> load_lengthmap(std::istream& in); size_t serialize_lengthmap( - std::ofstream& out, + std::ostream& out, std::vector> length_map); std::string make_cmd(int argc, char** argv); From 00ee9133bdf2a7185d06b133b7f667af0c678e69 Mon Sep 17 00:00:00 2001 From: maxrossi91 Date: Wed, 8 Dec 2021 16:16:05 -0500 Subject: [PATCH 3/5] Add Lift builder class --- src/leviosam.hpp | 141 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 44 deletions(-) diff --git a/src/leviosam.hpp b/src/leviosam.hpp index 89ebc6d..4569661 100644 --- a/src/leviosam.hpp +++ b/src/leviosam.hpp @@ -169,6 +169,85 @@ class Name2NameMap : public std::unordered_map { } }; +// Builder class for lift construction +class Lift_builder +{ + friend class Lift; + +protected: + sdsl::bit_vector m_ibv; + sdsl::bit_vector m_dbv; + sdsl::bit_vector m_sbv; + size_t m_size = 0; // Size of the contig + size_t m_x = 0; + int m_ppos = 0; + int m_prev_is_ins = 0; // 1 if last variant is an insertion +public: + Lift_builder() + { + + } + + Lift_builder(const size_t l) + { + init(l); + } + + inline void init(const size_t l) + { + m_ibv = sdsl::bit_vector(l*2); + m_dbv = sdsl::bit_vector(l*2); + m_sbv = sdsl::bit_vector(l*2); + m_size = l; + m_x = 0; + m_ppos = 0; + m_prev_is_ins = 0; + } + + inline int prev_is_ins() const {return m_prev_is_ins;} + inline int contig_size() const {return m_size;} + + inline void set( + const size_t rpos, // Position in the reference + const int var_type, // Type of the variant + const int rlen, // Length of the reference allele variant + const int alen // Length of the alternate allele variant + ) + { + m_x += (rpos - m_ppos); // x should now be pointing to *start* of current variant wrt alignment + m_ppos = rpos; // m_ppos now pointing to *start* of current variant + if (var_type == VCF_INDEL) { + ++m_ppos; + ++m_x; + m_prev_is_ins = 0; + if (rlen < alen) { // ins + for (size_t i = 0; i < alen - rlen; ++i) { + m_ibv[m_x++] = 1; + } + m_prev_is_ins = 1; + } else if (rlen > alen) { // del + for (size_t i = 0; i < rlen - alen; ++i) { + m_dbv[m_x++] = 1; + ++m_ppos; // s1 advances, so m_ppos advances + } + } + --m_x; + --m_ppos; + } else if (var_type == VCF_SNP) { + m_sbv[m_x] = 1; // no need to increment x here? + } + } + + inline void finalize() + { + if (m_ppos > m_size) { + die( "something went wrong, we went past bounds of s1 sequence. exiting...\n"); + } + m_ibv.resize(m_x + (m_size - m_ppos)); + m_dbv.resize(m_x + (m_size - m_ppos)); + m_sbv.resize(m_x + (m_size - m_ppos)); + } +}; // liftover data structure for a single sequence class Lift { @@ -192,6 +271,16 @@ class Lift { init_rs_sls(); } + // construct liftover from builder + Lift(Lift_builder& builder) : + ins(builder.m_ibv), + del(builder.m_dbv), + snp(builder.m_sbv) + { + // create rank and select data-structures for everything + init_rs_sls(); + } + Lift(const Lift& rhs) : ins(rhs.ins), del(rhs.del), snp(rhs.snp) { init_rs_sls(); } @@ -463,15 +552,13 @@ class LiftMap { int lmap_idx = 0; // current index of lmap - sdsl::bit_vector ibv, dbv, sbv; // ins, del, snp + Lift_builder builder; bool no_sample = (sample_name == ""); if (no_sample) fprintf(stderr, "no sample given, assuming GT=1 for all variants\n"); if (!no_sample && bcf_hdr_set_samples(hdr, sample_name.c_str(), 0)) die("error: sample does not exist!\n"); bcf1_t* rec = bcf_init(); - size_t x = 0; int rid = -1; - int ppos = 0; int tppos = 0; size_t l = 0; int hap = stoi(haplotype); @@ -480,13 +567,8 @@ class LiftMap { if (rec->rid != rid) { if (rid != -1) { // save the bitvector // condense the bit vectors and add to map - if (ppos > l) { - die( "something went wrong, we went past bounds of s1 sequence. exiting...\n"); - } - ibv.resize(x + (l - ppos)); - dbv.resize(x + (l - ppos)); - sbv.resize(x + (l - ppos)); - lmap.push_back(Lift(ibv,dbv,sbv)); + builder.finalize(); + lmap.push_back(Lift(builder)); } rid = rec->rid; if (get_names) { @@ -508,11 +590,7 @@ class LiftMap { } // initialize bit vectors for this contig // TODO: maybe set vec size to something less than 2l - ibv = sdsl::bit_vector(l*2); - dbv = sdsl::bit_vector(l*2); - sbv = sdsl::bit_vector(l*2); - x = 0; - ppos = 0; + builder.init(l); tppos = 0; // end of last variant processed } int32_t* gt_arr = NULL; @@ -551,40 +629,15 @@ class LiftMap { } // at this point, ppos and x should point to *end* of last variant in s1 & s2 alignment, resp. - x += (rec->pos - ppos); // x should now be pointing to *start* of current variant wrt alignment - ppos = rec->pos; // ppos now pointing to *start* of current variant - if (var_type == VCF_INDEL) { - ++ppos; - ++x; - prev_is_ins = 0; - if (rlen < alen) { // ins - for (size_t i = 0; i < alen - rlen; ++i) { - ibv[x++] = 1; - } - prev_is_ins = 1; - } else if (rlen > alen) { // del - for (size_t i = 0; i < rlen - alen; ++i) { - dbv[x++] = 1; - ++ppos; // s1 advances, so ppos advances - } - } - --x; - --ppos; - } else if (var_type == VCF_SNP) { - sbv[x] = 1; // no need to increment x here? - } + builder.set(rec->pos,var_type,rlen,alen); + prev_is_ins = builder.prev_is_ins(); tppos = rec->pos + rec->rlen - 1; // tppos points to end of this variant } else { continue; } } - if (ppos > l) { - die("something went wrong, we went past bounds of s1 sequence. exiting...\n"); - } - ibv.resize(x + (l - ppos)); - dbv.resize(x + (l - ppos)); - sbv.resize(x + (l - ppos)); - lmap.push_back(Lift(ibv,dbv,sbv)); + builder.finalize(); + lmap.push_back(Lift(builder)); length_map = ls; } From 37c589a266ad466e16d25a180524b90ad6d27f51 Mon Sep 17 00:00:00 2001 From: maxrossi91 Date: Sat, 4 Mar 2023 03:35:53 -0500 Subject: [PATCH 4/5] Fix after merge issues --- src/leviosam_utils.cpp | 9 ++++----- src/leviosam_utils.hpp | 10 +--------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/src/leviosam_utils.cpp b/src/leviosam_utils.cpp index 64aac57..9f4a3ad 100644 --- a/src/leviosam_utils.cpp +++ b/src/leviosam_utils.cpp @@ -98,13 +98,12 @@ void WriteDeferred::print_info() { if (bed_defer_dest.get_fn().size() > 0){ std::cerr << " - In BED deferred (dest) " << bed_defer_dest.get_fn() << "\n"; } - if (bed_remove_source.get_fn().size() > 0){ - std::cerr << " - BED removed (source) " << bed_remove_source.get_fn() << "\n"; + if (bed_commit_source.get_fn().size() > 0){ + std::cerr << " - Not in BED commit (source) " << bed_commit_source.get_fn() << "\n"; } - if (bed_remove_dest.get_fn().size() > 0){ - std::cerr << " - BED removed (dest) " << bed_remove_dest.get_fn() << "\n"; + if (bed_commit_dest.get_fn().size() > 0){ + std::cerr << " - Not in BED commit (dest) " << bed_commit_dest.get_fn() << "\n"; } - return false; } /* Returns true if an alignment is excluded (committed) diff --git a/src/leviosam_utils.hpp b/src/leviosam_utils.hpp index 1cadecf..4de497e 100644 --- a/src/leviosam_utils.hpp +++ b/src/leviosam_utils.hpp @@ -65,20 +65,12 @@ class FastqRecord { } } - int write(std::ostream& out_fq, std::string name); + int write(ogzstream& out_fq, std::string name); std::string seq_str; std::string qual_str; bam1_t* aln = NULL; }; -typedef robin_hood::unordered_map fastq_map; - -fastq_map read_unpaired_fq( - const std::string& fq_fname); -fastq_map read_deferred_bam( - samFile* dsam_fp, samFile* out_dsam_fp, sam_hdr_t* hdr, - std::ostream& out_r1_fp, std::ostream& out_r2_fp); - class WriteDeferred { public: WriteDeferred() : write_deferred(false) {}; From 2cb7c1b68dd8c84fc1b64f65c13f6bfd2f51e5d0 Mon Sep 17 00:00:00 2001 From: maxrossi91 Date: Sat, 4 Mar 2023 06:45:29 -0500 Subject: [PATCH 5/5] Fix memcpy-param-overlap --- src/leviosam_utils.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/leviosam_utils.cpp b/src/leviosam_utils.cpp index 9f4a3ad..3079088 100644 --- a/src/leviosam_utils.cpp +++ b/src/leviosam_utils.cpp @@ -219,13 +219,6 @@ void update_cigar( memmove(aln->data + cigar_st + n_cigar4, aln->data + cigar_st + fake_bytes, orig_len - (cigar_st + fake_bytes)); - // If new n_cigar is greater, copy the real CIGAR to the right place. - // Skipped this if new n_cigar is smaller than the original value. - if (n_cigar4 > fake_bytes){ - memcpy(aln->data + cigar_st, - aln->data + (n_cigar4 - fake_bytes) + 8, - n_cigar4); - } aln->core.n_cigar = new_cigar.size(); } for (int i = 0; i < aln->core.n_cigar; i++){