-
Notifications
You must be signed in to change notification settings - Fork 37
Open
Description
When running your ART source code on FB dataset, we found a potential bug of ART that it could not search some keys which have been inserted.
The code below is used to reproduce the case. Data could be fetched from https://drive.google.com/file/d/1NDvJ5fIvNkMLRbiHPHzIEEirfL3wMsU0/view?usp=sharing.
Would you mind taking a look and fix it?
Thank you very much in advance.
Regards,
Zhicong
#include"./ART/Tree.h"
#include"./ART/Tree.cpp"
#include <iostream>
#include <chrono>
#include "tbb/tbb.h"
#include <cstdio>
#include <random>
#include <fstream>
using namespace ART_unsynchronized;
void loadKey(TID tid, Key &key) {
// Store the key of the tuple into the key vector
// Implementation is database specific
key.setKeyLen(sizeof(tid));
reinterpret_cast<uint64_t *>(&key[0])[0] = __builtin_bswap64(tid);
}
template<class T>
bool load_binary_data(T data[], int length, const std::string &file_path) {
std::ifstream is(file_path.c_str(), std::ios::binary | std::ios::in);
if (!is.is_open()) {
return false;
}
is.read(reinterpret_cast<char *>(data), std::streamsize(length * sizeof(T)));
is.close();
return true;
}
int main() {
ART_unsynchronized::Tree idx(loadKey);
size_t data_size = 200000000;
// Load data from file
uint64_t *keys = new uint64_t[data_size];
std::cout << "Loading data" << std::endl;
load_binary_data(keys, data_size,"./fb_200M_uint64");
// Make data Unique
std::cout << "Make data Unique" << std::endl;
std::sort(keys, keys + data_size);
auto tail = std::unique(keys, keys + data_size);
data_size = tail - keys;
// Shuffle all data
std::random_shuffle(keys, keys + data_size);
printf("unique key size is %d\n", data_size);
// print first 20 keys
for (auto i = 0; i < data_size; i++) {
printf("%llu ", keys[i]);
if (i >= 20) break;
}
printf("\n");
// Insert
printf("Inserting\n");
for (int i = 0; i < data_size; i++) {
Key key;
loadKey(keys[i], key);
idx.insert(key, keys[i]);
auto val = idx.lookup(key);
if (val != keys[i]) {
std::cout << "Position " << i << ", Insert error, wrong key read: " << val << " expected:" << keys[i]
<< std::endl;
throw;
}
}
// Searching
printf("Searching\n");
for (uint64_t i = 0; i < data_size; i++) {
Key key;
loadKey(keys[i], key);
auto val = idx.lookup(key);
if (val != keys[i]) {
std::cout << "Position " << i << ", Insert error, wrong key read: " << val << " expected:" << keys[i]
<< std::endl;
throw;
}
}
std::cout << "Pass test" << std::endl;
return 0;
}The output is as follow
Loading data
Make data Unique
unique key size is 200000000
4060157729 5065591993 58797781080 73649762378 14811202877 26396414685 52284035418 35497117877 61610484743 40478218422 4174327440 32246990818 69960350543 2586919880 74534544755 60312685616 43819963826 9641372789 40302608760 36968601931 35444885175
Inserting
Position 38002711, Insert error, wrong key read: 0 expected:12298204682441981952
terminate called without an active exception
Aborted (core dumped)
Metadata
Metadata
Assignees
Labels
No labels