Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions src/chain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,46 @@ bool ChainMap::interval_map_sanity_check() {
return true;
}

/**
* @brief Logs statistics about the chain index.
*
* This function reports the number of contigs, intervals, bases, and average
* interval length present in the chain interval map. If a nonzero byte count is
* provided, it also logs the memory footprint in bytes. Per-contig statistics,
* such as the number of intervals and bases, as well as the average interval
* length, are reported for each contig.
*
* @param bytes (optional) The size in bytes of the loaded index to report.
*/
void ChainMap::log_index_size(size_t bytes) const {
size_t total_intervals = 0;
size_t total_bases = 0;
std::vector<std::pair<std::string, std::pair<size_t, size_t>>> stats;
stats.reserve(interval_map.size());
for (const auto &kv : interval_map) {
size_t n = kv.second.size();
size_t bases = 0;
for (const auto &intvl : kv.second) {
bases += static_cast<size_t>(intvl.source_end) -
static_cast<size_t>(intvl.source_start);
}
total_intervals += n;
total_bases += bases;
stats.push_back({kv.first, {n, bases}});
}
size_t avg_len = total_intervals ? total_bases / total_intervals : 0;
std::cerr << "[I::chain::log_index_size] contigs=" << interval_map.size()
<< " intervals=" << total_intervals << " bases=" << total_bases
<< " avg_len=" << avg_len;
if (bytes) std::cerr << " bytes=" << bytes << "\n";
for (const auto &s : stats) {
size_t avg = s.second.first ? s.second.second / s.second.first : 0;
std::cerr << "[I::chain::log_index_size] " << s.first
<< ": intervals=" << s.second.first
<< " bases=" << s.second.second << " avg_len=" << avg << "\n";
}
}

/* Get the rank in the start bitvector at contig[pos]
* If pos > len(contig): return rank(contig[-1])
*
Expand Down
1 change: 1 addition & 0 deletions src/chain.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class ChainMap {
void debug_print_interval_map();
void debug_print_intervals(std::string contig, const int n);
bool interval_map_sanity_check();
void log_index_size(size_t bytes = 0) const;
int get_start_rank(std::string contig, int pos);
int get_end_rank(std::string contig, int pos);
bool update_interval_indexes(const std::string contig, const int32_t pos,
Expand Down
13 changes: 10 additions & 3 deletions src/leviosam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <algorithm>
#include <ctime>
#include <vector>
#include <fstream>

#include "aln.hpp"
#include "collate.hpp"
Expand Down Expand Up @@ -121,7 +122,8 @@ void serialize_run(lift_opts args) {
chain::ChainMap cfp(args.chain_fname, args.verbose,
args.allowed_cigar_changes, args.length_map);
std::ofstream o(fn_index, std::ios::binary);
cfp.serialize(o);
size_t bytes = cfp.serialize(o);
cfp.log_index_size(bytes);
std::cerr << "[I::serialize_run] levioSAM ChainMap saved to "
<< fn_index << "\n";
} else {
Expand Down Expand Up @@ -299,14 +301,17 @@ std::map<std::string, std::string> load_fasta(std::string ref_name) {
}

void lift_run(lift_opts args) {
size_t chain_bytes = 0;
chain::ChainMap chain_map = [&] {
if (args.chainmap_fname != "") {
std::cerr << "[I::lift_run] Loading levioSAM index...";
std::cerr << "[I::lift_run] Loading levioSAM 2 index...";
std::ifstream in(args.chainmap_fname, std::ios::binary);
std::ifstream fs(args.chainmap_fname, std::ios::binary | std::ios::ate);
chain_bytes = fs.tellg();
return chain::ChainMap(in, args.verbose,
args.allowed_cigar_changes);
} else if (args.chain_fname != "") {
std::cerr << "[I::lift_run] Building levioSAM index...";
std::cerr << "[I::lift_run] Building levioSAM 2 index...";
if (args.length_map.size() == 0) {
std::cerr << "[E::lift_run] No length map is found. Please "
"set -F properly.\n";
Expand Down Expand Up @@ -349,6 +354,8 @@ void lift_run(lift_opts args) {
exit(1);
}
std::cerr << "done\n";
if (args.chainmap_fname != "" || args.chain_fname != "")
chain_map.log_index_size(chain_bytes);

samFile *sam_fp = (args.sam_fname == "")
? sam_open("-", "r")
Expand Down
Loading