-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcorrectness_testing.cpp
More file actions
124 lines (105 loc) · 4.03 KB
/
correctness_testing.cpp
File metadata and controls
124 lines (105 loc) · 4.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#include <fstream>
#include <iostream>
#include <string>
#include <iomanip>
#include "s2st.hpp"
#include "superfix.hpp"
#include "fstack.hpp"
#include "parse_tree_ptr.hpp"
#include "uncompressed_parse_tree_ptr.hpp"
#include "parse_tree_levels.hpp"
#include "parse_tree_symbol_processing.hpp"
#include "context_insensitive_decomposition.hpp"
#include "index.hpp"
#include "recompression.hpp"
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/search/fm_index/all.hpp>
#include <seqan3/search/search.hpp>
#include <random>
#include <chrono>
using namespace recomp;
std::string get_file_contents(const char *filename)
{
std::ifstream in(filename, std::ios::in | std::ios::binary);
if (in)
{
return(std::string((std::istreambuf_iterator<char>(in)), std::istreambuf_iterator<char>()));
}
throw(errno);
}
std::vector<Fragment> generate_frag_queries(size_t len, size_t num_queries) {
std::random_device dev;
// std::mt19937 rng(dev());
std::mt19937 rng(0);
std::uniform_int_distribution<uint64_t> disti(0, 9999999999999ULL);
std::uniform_int_distribution<uint64_t> distl(4, 60);
std::vector<Fragment> frag_queries;
for (size_t i = 0; i < num_queries; i++) {
size_t index = disti(rng) % len;
size_t frag_len = std::min(distl(rng), len - index);
frag_queries.push_back(Fragment{index, frag_len});
}
return frag_queries;
}
int main(int argc, char **argv) {
if (argc != 2) {
std::cout << "Usage: " << argv[0] << " <input_file>" << std::endl;
return 1;
}
auto text = get_file_contents(argv[1]);
auto myindex = Index::from(text);
std::cout << "Constructed text index" << std::endl;
std::vector<uint8_t> text_vec(text.size());
for (size_t i = 0; i < text_vec.size(); i++) {
text_vec[i] = (unsigned char)text[i];
}
auto frag_queries = generate_frag_queries(text.size(), 1000);
std::vector<std::vector<uint8_t>> queries(frag_queries.size());
std::transform(frag_queries.begin(), frag_queries.end(), queries.begin(), [&](Fragment f){
auto str = myindex.slp.extract(f.index, f.length);
std::vector<uint8_t> vec(str.size());
for (size_t i = 0; i < vec.size(); i++) {
vec[i] = (unsigned char)str[i];
}
return vec;
});
std::vector<std::vector<size_t>> query_results_report(frag_queries.size());
std::vector<size_t> query_results_left(frag_queries.size());
for (size_t i = 0; i < frag_queries.size(); i++) {
auto frag = frag_queries[i];
myindex.report(frag, [&](int64_t j){
query_results_report[i].push_back(j);
});
query_results_left[i] = myindex.leftmost(frag);
}
std::cout << "Finished index reporting" << std::endl;
std::vector<std::vector<size_t>> query_results_fm(frag_queries.size());
auto fmindex = seqan3::fm_index{text_vec};
std::cout << "Constructed FM Index" << std::endl;
auto results = seqan3::search(queries, fmindex);
std::cout << "Completed searching" << std::endl;
for (auto && result : results) {
query_results_fm[result.query_id()].push_back(result.reference_begin_position());
}
bool ok = true;
for (size_t i = 0; i < frag_queries.size(); i++) {
std::sort(query_results_report[i].begin(), query_results_report[i].end());
if (query_results_left[i] != query_results_fm[i][0]) {
ok = false;
std::cout << "Leftmost mismatch on query: " << i << ":\n";
std::cout << query_results_left[i] << " " << query_results_fm[i][0] << std::endl;
}
if (query_results_report[i] != query_results_fm[i]) {
ok = false;
std::cout << "Reporting mismatch on query: " << i << ":\n";
for (auto el : query_results_report[i]) {
std::cout << el << " ";
} std::cout << "\n";
for (auto el : query_results_fm[i]) {
std::cout << el << " ";
} std::cout << "\n";
}
}
return !ok;
}