Skip to content
83 changes: 43 additions & 40 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -174,55 +174,58 @@ target_link_libraries(leviosam ${SDSL_LIB})
target_link_libraries(leviosam pthread)
target_link_libraries(leviosam z)

option(BUILD_TESTS "Set OFF to not compile the tests" ON)

if(${BUILD_TESTS})
# Download and unpack googletest at configure time
configure_file(CMakeLists.txt.in googletest-download/CMakeLists.txt)
execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download )
if(result)
message(FATAL_ERROR "CMake step for googletest failed: ${result}")
endif()
execute_process(COMMAND ${CMAKE_COMMAND} --build .
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download )
if(result)
message(FATAL_ERROR "Build step for googletest failed: ${result}")
endif()

# Download and unpack googletest at configure time
configure_file(CMakeLists.txt.in googletest-download/CMakeLists.txt)
execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download )
if(result)
message(FATAL_ERROR "CMake step for googletest failed: ${result}")
endif()
execute_process(COMMAND ${CMAKE_COMMAND} --build .
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download )
if(result)
message(FATAL_ERROR "Build step for googletest failed: ${result}")
endif()

# Prevent overriding the parent project's compiler/linker
# settings on Windows
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
# Prevent overriding the parent project's compiler/linker
# settings on Windows
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)

enable_testing()
enable_testing()

# Add googletest directly to our build. This defines
# the gtest and gtest_main targets.
add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/googletest-src
${CMAKE_CURRENT_BINARY_DIR}/googletest-build
EXCLUDE_FROM_ALL)
# Add googletest directly to our build. This defines
# the gtest and gtest_main targets.
add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/googletest-src
${CMAKE_CURRENT_BINARY_DIR}/googletest-build
EXCLUDE_FROM_ALL)


add_executable(leviosam_test src/leviosam_test.cpp)
target_link_libraries(leviosam_test lvsam)
target_link_libraries(leviosam_test ${HTS_LIB})
target_link_libraries(leviosam_test ${SDSL_LIB})
target_link_libraries(leviosam_test gtest gtest_main)
add_executable(leviosam_test src/leviosam_test.cpp)
target_link_libraries(leviosam_test lvsam)
target_link_libraries(leviosam_test ${HTS_LIB})
target_link_libraries(leviosam_test ${SDSL_LIB})
target_link_libraries(leviosam_test gtest gtest_main)

add_test(NAME leviosam_test COMMAND leviosam_test
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/testdata
)
add_test(NAME leviosam_test COMMAND leviosam_test
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/testdata
)


add_executable(chain_test src/chain_test.cpp)
target_link_libraries(chain_test lvsam)
target_link_libraries(chain_test ${HTS_LIB})
target_link_libraries(chain_test ${SDSL_LIB})
target_link_libraries(chain_test gtest gtest_main)
add_executable(chain_test src/chain_test.cpp)
target_link_libraries(chain_test lvsam)
target_link_libraries(chain_test ${HTS_LIB})
target_link_libraries(chain_test ${SDSL_LIB})
target_link_libraries(chain_test gtest gtest_main)

add_test(NAME chain_test COMMAND chain_test
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/testdata
)
add_test(NAME chain_test COMMAND chain_test
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/testdata
)
endif()

INSTALL(TARGETS leviosam DESTINATION bin)
INSTALL(FILES src/leviosam.hpp DESTINATION include)
Expand Down
159 changes: 106 additions & 53 deletions src/leviosam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ namespace lift {
// Serialization
class Name2IdxMap: public std::unordered_map<std::string,int> {
public:
size_t serialize(std::ofstream& out) {
size_t serialize(std::ostream& out) {
size_t size = 0;
size_t map_size = this->size();
out.write(reinterpret_cast<char*>(&map_size), sizeof(map_size));
Expand All @@ -117,7 +117,7 @@ class Name2IdxMap: public std::unordered_map<std::string,int> {
return size;
}

void load(std::ifstream& in) {
void load(std::istream& in) {
size_t map_size;
in.read(reinterpret_cast<char*>(&map_size), sizeof(map_size));
for (auto i = 0; i < map_size; ++i) {
Expand All @@ -137,7 +137,7 @@ class Name2IdxMap: public std::unordered_map<std::string,int> {

class Name2NameMap : public std::unordered_map<std::string,std::string> {
public:
size_t serialize(std::ofstream& out) {
size_t serialize(std::ostream& out) {
size_t size = 0;
size_t map_size = this->size();
out.write(reinterpret_cast<char*>(&map_size), sizeof(map_size));
Expand All @@ -154,7 +154,7 @@ class Name2NameMap : public std::unordered_map<std::string,std::string> {
return size;
}

void load(std::ifstream& in) {
void load(std::istream& in) {
size_t map_size;
in.read(reinterpret_cast<char*>(&map_size), sizeof(map_size));
for (auto i = 0; i < map_size; ++i) {
Expand All @@ -173,6 +173,85 @@ class Name2NameMap : public std::unordered_map<std::string,std::string> {
}
};

// Builder class for lift construction
class Lift_builder
{
friend class Lift;

protected:
sdsl::bit_vector m_ibv;
sdsl::bit_vector m_dbv;
sdsl::bit_vector m_sbv;
size_t m_size = 0; // Size of the contig
size_t m_x = 0;
int m_ppos = 0;
int m_prev_is_ins = 0; // 1 if last variant is an insertion
public:
Lift_builder()
{

}

Lift_builder(const size_t l)
{
init(l);
}

inline void init(const size_t l)
{
m_ibv = sdsl::bit_vector(l*2);
m_dbv = sdsl::bit_vector(l*2);
m_sbv = sdsl::bit_vector(l*2);
m_size = l;
m_x = 0;
m_ppos = 0;
m_prev_is_ins = 0;
}

inline int prev_is_ins() const {return m_prev_is_ins;}
inline int contig_size() const {return m_size;}

inline void set(
const size_t rpos, // Position in the reference
const int var_type, // Type of the variant
const int rlen, // Length of the reference allele variant
const int alen // Length of the alternate allele variant
)
{
m_x += (rpos - m_ppos); // x should now be pointing to *start* of current variant wrt alignment
m_ppos = rpos; // m_ppos now pointing to *start* of current variant
if (var_type == VCF_INDEL) {
++m_ppos;
++m_x;
m_prev_is_ins = 0;
if (rlen < alen) { // ins
for (size_t i = 0; i < alen - rlen; ++i) {
m_ibv[m_x++] = 1;
}
m_prev_is_ins = 1;
} else if (rlen > alen) { // del
for (size_t i = 0; i < rlen - alen; ++i) {
m_dbv[m_x++] = 1;
++m_ppos; // s1 advances, so m_ppos advances
}
}
--m_x;
--m_ppos;
} else if (var_type == VCF_SNP) {
m_sbv[m_x] = 1; // no need to increment x here?
}
}

inline void finalize()
{
if (m_ppos > m_size) {
die( "something went wrong, we went past bounds of s1 sequence. exiting...\n");
}
m_ibv.resize(m_x + (m_size - m_ppos));
m_dbv.resize(m_x + (m_size - m_ppos));
m_sbv.resize(m_x + (m_size - m_ppos));
}
};

// liftover data structure for a single sequence
class Lift {
Expand All @@ -183,7 +262,7 @@ class Lift {
init_rs_sls();
}

Lift(std::ifstream& in) {
Lift(std::istream& in) {
this->load(in);
}

Expand All @@ -196,6 +275,16 @@ class Lift {
init_rs_sls();
}

// construct liftover from builder
Lift(Lift_builder& builder) :
ins(builder.m_ibv),
del(builder.m_dbv),
snp(builder.m_sbv)
{
// create rank and select data-structures for everything
init_rs_sls();
}

Lift(const Lift& rhs) : ins(rhs.ins), del(rhs.del), snp(rhs.snp) {
init_rs_sls();
}
Expand Down Expand Up @@ -375,7 +464,7 @@ class Lift {
// }

// Save to stream
size_t serialize(std::ofstream& out) const {
size_t serialize(std::ostream& out) const {
size_t size = 0;
size += ins.serialize(out);
size += del.serialize(out);
Expand Down Expand Up @@ -420,7 +509,7 @@ class LiftMap {

~LiftMap() {}

LiftMap(std::ifstream& in) {
LiftMap(std::istream& in) {
this->load(in);
}

Expand Down Expand Up @@ -467,15 +556,13 @@ class LiftMap {
int lmap_idx = 0; // current index of lmap


sdsl::bit_vector ibv, dbv, sbv; // ins, del, snp
Lift_builder builder;
bool no_sample = (sample_name == "");
if (no_sample) fprintf(stderr, "no sample given, assuming GT=1 for all variants\n");
if (!no_sample && bcf_hdr_set_samples(hdr, sample_name.c_str(), 0))
die("error: sample does not exist!\n");
bcf1_t* rec = bcf_init();
size_t x = 0;
int rid = -1;
int ppos = 0;
int tppos = 0;
size_t l = 0;
int hap = stoi(haplotype);
Expand All @@ -484,13 +571,8 @@ class LiftMap {
if (rec->rid != rid) {
if (rid != -1) { // save the bitvector
// condense the bit vectors and add to map
if (ppos > l) {
die( "something went wrong, we went past bounds of s1 sequence. exiting...\n");
}
ibv.resize(x + (l - ppos));
dbv.resize(x + (l - ppos));
sbv.resize(x + (l - ppos));
lmap.push_back(Lift(ibv,dbv,sbv));
builder.finalize();
lmap.push_back(Lift(builder));
}
rid = rec->rid;
if (get_names) {
Expand All @@ -512,11 +594,7 @@ class LiftMap {
}
// initialize bit vectors for this contig
// TODO: maybe set vec size to something less than 2l
ibv = sdsl::bit_vector(l*2);
dbv = sdsl::bit_vector(l*2);
sbv = sdsl::bit_vector(l*2);
x = 0;
ppos = 0;
builder.init(l);
tppos = 0; // end of last variant processed
}
int32_t* gt_arr = NULL;
Expand Down Expand Up @@ -555,40 +633,15 @@ class LiftMap {
}

// at this point, ppos and x should point to *end* of last variant in s1 & s2 alignment, resp.
x += (rec->pos - ppos); // x should now be pointing to *start* of current variant wrt alignment
ppos = rec->pos; // ppos now pointing to *start* of current variant
if (var_type == VCF_INDEL) {
++ppos;
++x;
prev_is_ins = 0;
if (rlen < alen) { // ins
for (size_t i = 0; i < alen - rlen; ++i) {
ibv[x++] = 1;
}
prev_is_ins = 1;
} else if (rlen > alen) { // del
for (size_t i = 0; i < rlen - alen; ++i) {
dbv[x++] = 1;
++ppos; // s1 advances, so ppos advances
}
}
--x;
--ppos;
} else if (var_type == VCF_SNP) {
sbv[x] = 1; // no need to increment x here?
}
builder.set(rec->pos,var_type,rlen,alen);
prev_is_ins = builder.prev_is_ins();
tppos = rec->pos + rec->rlen - 1; // tppos points to end of this variant
} else {
continue;
}
}
if (ppos > l) {
die("something went wrong, we went past bounds of s1 sequence. exiting...\n");
}
ibv.resize(x + (l - ppos));
dbv.resize(x + (l - ppos));
sbv.resize(x + (l - ppos));
lmap.push_back(Lift(ibv,dbv,sbv));
builder.finalize();
lmap.push_back(Lift(builder));
length_map = ls;
}

Expand Down Expand Up @@ -695,7 +748,7 @@ class LiftMap {
}

// saves to stream
size_t serialize(std::ofstream& out) {
size_t serialize(std::ostream& out) {
size_t size = 0;
size_t nelems = lmap.size();
out.write(reinterpret_cast<char*>(&nelems), sizeof(nelems));
Expand All @@ -711,7 +764,7 @@ class LiftMap {
}

// loads from stream
void load(std::ifstream& in) {
void load(std::istream& in) {
size_t nelems;
in.read(reinterpret_cast<char*>(&nelems), sizeof(nelems));
for (auto i = 0; i < nelems; ++i) {
Expand Down
Loading