From 16e7d0b0599232d97e1de353ad9d5b4403749a05 Mon Sep 17 00:00:00 2001 From: dmgcodevil Date: Tue, 17 Feb 2026 09:41:41 -0500 Subject: [PATCH 1/5] fixes --- include/core.hpp | 61 +++++++++++-------- include/metadata.hpp | 12 +++- include/node.hpp | 70 ++++++++++++++++++--- src/core.cpp | 33 +++++++--- src/snapshot.cpp | 17 +++++- tests/database_test.cpp | 2 +- tests/join_test.cpp | 104 ++++++++++++++++---------------- tests/node_test.cpp | 20 +++--- tests/sharding_test.cpp | 4 +- tests/temporal_query_test.cpp | 34 +++++------ tests/where_expression_test.cpp | 6 +- 11 files changed, 227 insertions(+), 136 deletions(-) diff --git a/include/core.hpp b/include/core.hpp index c9798e6..c6d8c13 100644 --- a/include/core.hpp +++ b/include/core.hpp @@ -528,39 +528,48 @@ class ShardManager { " not found in schema '", schema_name, "'"); } - arrow::Result update_node(const int64_t id, + arrow::Result update_node(const std::string& schema_name, + const int64_t id, const std::shared_ptr &field, const Value &value, const UpdateType update_type) { - for (auto &schema_shards : shards_ | std::views::values) { - for (const auto &shard : schema_shards) { - if (id >= shard->min_id && id <= shard->max_id) { - return shard->update(id, field, value, update_type); - } + auto schema_it = shards_.find(schema_name); + if (schema_it == shards_.end()) { + return arrow::Status::KeyError("Schema not found: ", schema_name); + } + + for (const auto &shard : schema_it->second) { + if (id >= shard->min_id && id <= shard->max_id) { + return shard->update(id, field, value, update_type); } } - + return arrow::Status::KeyError("Node with id ", id, - " not found in any schema"); + " not found in schema ", schema_name); } - arrow::Result update_node(const int64_t id, + arrow::Result update_node(const std::string& schema_name, + const int64_t id, const std::string &field_name, const Value &value, const UpdateType update_type) { - for (auto &schema_shards : shards_ | std::views::values) { - for (const auto &shard : schema_shards) { - auto field = schema_registry_->get(shard->schema_name) - .ValueOrDie() - ->get_field(field_name); - if (id >= shard->min_id && id <= shard->max_id) { - return shard->update(id, field, value, update_type); - } + auto schema_it = shards_.find(schema_name); + if (schema_it == shards_.end()) { + return arrow::Status::KeyError("Schema not found: ", schema_name, " in shards"); + } + + auto field = schema_registry_->get(schema_name) + .ValueOrDie() + ->get_field(field_name); + + for (const auto &shard : schema_it->second) { + if (id >= shard->min_id && id <= shard->max_id) { + return shard->update(id, field, value, update_type); } } return arrow::Status::KeyError("Node with id ", id, - " not found in any schema"); + " not found in schema ", schema_name); } arrow::Result>> get_nodes( @@ -758,24 +767,26 @@ class Database { return node; } - arrow::Result update_node(const int64_t id, + arrow::Result update_node(const std::string& schema_name, + const int64_t id, const std::shared_ptr &field, const Value &value, const UpdateType update_type) { - return shard_manager_->update_node(id, field, value, update_type); + return shard_manager_->update_node(schema_name, id, field, value, update_type); } - arrow::Result update_node(const int64_t id, + arrow::Result update_node(const std::string& schema_name, + const int64_t id, const std::string &field_name, const Value &value, const UpdateType update_type) { - return shard_manager_->update_node(id, field_name, value, update_type); + return shard_manager_->update_node(schema_name, id, field_name, value, update_type); } arrow::Result remove_node(const std::string &schema_name, int64_t node_id) { - if (auto res = node_manager_->remove_node(node_id); !res) { - return arrow::Status::Invalid("Failed to remove node: {}", node_id); + if (auto res = node_manager_->remove_node(schema_name, node_id); !res) { + return arrow::Status::Invalid("Failed to remove node: ", schema_name, ":", node_id); } return shard_manager_->remove_node(schema_name, node_id); } @@ -810,7 +821,7 @@ class Database { const std::string &schema_name, TemporalContext *temporal_context = nullptr, size_t chunk_size = 10000) const { - ARROW_ASSIGN_OR_RAISE(auto schema, schema_registry_->get(schema_name)); + ARROW_ASSIGN_OR_RAISE(const auto schema, schema_registry_->get(schema_name)); auto arrow_schema = schema->arrow(); ARROW_ASSIGN_OR_RAISE(auto all_nodes, shard_manager_->get_nodes(schema_name)); diff --git a/include/metadata.hpp b/include/metadata.hpp index ac78c01..dc62837 100644 --- a/include/metadata.hpp +++ b/include/metadata.hpp @@ -312,11 +312,11 @@ struct Manifest { std::string id; std::vector shards; std::vector edges; - int64_t node_id_seq = 0; + std::unordered_map node_id_seq_per_schema; // Per-schema ID counters int64_t edge_id_seq = 0; int64_t shard_id_seq = 0; - NLOHMANN_DEFINE_TYPE_INTRUSIVE(Manifest, id, shards, edges, node_id_seq, + NLOHMANN_DEFINE_TYPE_INTRUSIVE(Manifest, id, shards, edges, node_id_seq_per_schema, edge_id_seq, shard_id_seq); std::string toString() const { @@ -333,7 +333,13 @@ struct Manifest { << "}"; if (i < edges.size() - 1) ss << ", "; } - ss << "], node_id_seq=" << node_id_seq << ", edge_id_seq=" << edge_id_seq + ss << "], node_id_seq_per_schema={"; + size_t idx = 0; + for (const auto& [schema_name, counter] : node_id_seq_per_schema) { + ss << "'" << schema_name << "':" << counter; + if (idx++ < node_id_seq_per_schema.size() - 1) ss << ", "; + } + ss << "}, edge_id_seq=" << edge_id_seq << ", shard_id_seq=" << shard_id_seq << "}"; return ss.str(); } diff --git a/include/node.hpp b/include/node.hpp index c34388a..88eb4d9 100644 --- a/include/node.hpp +++ b/include/node.hpp @@ -167,11 +167,28 @@ class NodeManager { ~NodeManager() { node_arena_->clear(); } - arrow::Result> get_node(const int64_t id) { - return nodes[id]; + arrow::Result> get_node(const std::string& schema_name, + const int64_t id) { + auto schema_it = nodes_.find(schema_name); + if (schema_it == nodes_.end()) { + return arrow::Status::KeyError("Schema not found: ", schema_name); + } + + auto node_it = schema_it->second.find(id); + if (node_it == schema_it->second.end()) { + return arrow::Status::KeyError("Node not found: ", schema_name, ":", id); + } + + return node_it->second; } - bool remove_node(const int64_t id) { return nodes.erase(id) > 0; } + bool remove_node(const std::string& schema_name, const int64_t id) { + auto schema_it = nodes_.find(schema_name); + if (schema_it == nodes_.end()) { + return false; + } + return schema_it->second.erase(id) > 0; + } arrow::Result> create_node( const std::string &schema_name, @@ -216,7 +233,11 @@ class NodeManager { int64_t id = 0; if (!add) { - id = id_counter.fetch_add(1); + // Get or create per-schema ID counter + if (id_counters_.find(schema_name) == id_counters_.end()) { + id_counters_[schema_name].store(0); + } + id = id_counters_[schema_name].fetch_add(1); } else { id = data.at("id").as_int64(); } @@ -249,7 +270,7 @@ class NodeManager { id, schema_name, EMPTY_DATA, std::make_unique(std::move(node_handle)), node_arena_, schema_, layout_); - nodes[id] = node; + nodes_[schema_name][id] = node; return node; } else { std::unordered_map normalized_data; @@ -270,17 +291,46 @@ class NodeManager { auto node = std::make_shared(id, schema_name, normalized_data, std::unique_ptr{}, nullptr, schema_); - nodes[id] = node; + nodes_[schema_name][id] = node; return node; } } - void set_id_counter(const int64_t value) { id_counter.store(value); } - int64_t get_id_counter() const { return id_counter.load(); } + void set_id_counter(const std::string& schema_name, const int64_t value) { + id_counters_[schema_name].store(value); + } + + int64_t get_id_counter(const std::string& schema_name) const { + auto it = id_counters_.find(schema_name); + if (it == id_counters_.end()) { + return 0; + } + return it->second.load(); + } + + // Get all schema ID counters (for snapshot/manifest) + std::unordered_map get_all_id_counters() const { + std::unordered_map result; + for (const auto& [schema_name, counter] : id_counters_) { + result[schema_name] = counter.load(); + } + return result; + } + + // Set all schema ID counters (for snapshot/manifest restore) + void set_all_id_counters(const std::unordered_map& counters) { + for (const auto& [schema_name, value] : counters) { + id_counters_[schema_name].store(value); + } + } private: - std::atomic id_counter{0}; - std::unordered_map> nodes; + // Per-schema ID counters (schema_name -> counter) + std::unordered_map> id_counters_; + + // Per-schema node storage (schema_name -> (node_id -> Node)) + std::unordered_map>> nodes_; + std::shared_ptr schema_registry_; std::shared_ptr layout_registry_; std::shared_ptr node_arena_; diff --git a/src/core.cpp b/src/core.cpp index eabf35e..ce37b4b 100644 --- a/src/core.cpp +++ b/src/core.cpp @@ -1312,7 +1312,10 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema, while (size-- > 0) { auto item = queue.front(); queue.pop(); - auto node = query_state.node_manager->get_node(item.node_id).ValueOrDie(); + auto item_schema = item.schema_ref.is_declaration() + ? item.schema_ref.schema() + : query_state.aliases.at(item.schema_ref.value()); + auto node = query_state.node_manager->get_node(item_schema, item.node_id).ValueOrDie(); const auto& it_fq = query_state.schema_field_indices.find(item.schema_ref.value()); if (it_fq == query_state.schema_field_indices.end()) { @@ -2170,7 +2173,7 @@ arrow::Result> Database::query( .contains(target_id)) { continue; } - auto node_result = node_manager_->get_node(target_id); + auto node_result = node_manager_->get_node(target_schema, target_id); if (node_result.ok()) { const auto target_node = node_result.ValueOrDie(); if (target_node->schema_name == target_schema) { @@ -2275,27 +2278,37 @@ arrow::Result> Database::query( } else if (traverse->traverse_type() == TraverseType::Left) { query_state.ids[traverse->target().value()].insert( matched_target_ids.begin(), matched_target_ids.end()); - } else { // Right, Full remove nodes with incoming connections + } else { // Right, Full: matched targets + unmatched targets auto target_ids = get_ids_from_table( get_table(target_schema, query_state.temporal_context.get()) .ValueOrDie()) .ValueOrDie(); + + // Include matched targets + llvm::DenseSet result = matched_target_ids; + + // Add unmatched targets (compare within same schema to avoid ID collision) + llvm::DenseSet unmatched_targets; + dense_difference(target_ids, matched_target_ids, unmatched_targets); + result.insert(unmatched_targets.begin(), unmatched_targets.end()); + IF_DEBUG_ENABLED { log_debug( - "traverse type: '{}', matched_source_ids=[{}], " - "target_ids=[{}]", - traverse->target().value(), join_container(matched_source_ids), - join_container(target_ids)); + "traverse type: '{}' (Right/Full), matched_targets=[{}], " + "unmatched_targets=[{}], total={}", + traverse->target().value(), + join_container(matched_target_ids), + join_container(unmatched_targets), + result.size()); } - llvm::DenseSet result; - dense_difference(target_ids, matched_source_ids, result); + query_state.ids[traverse->target().value()] = result; } std::vector> neighbors; for (auto id : query_state.ids[traverse->target().value()]) { - auto node_res = node_manager_->get_node(id); + auto node_res = node_manager_->get_node(target_schema, id); if (node_res.ok()) { neighbors.push_back(node_res.ValueOrDie()); } diff --git a/src/snapshot.cpp b/src/snapshot.cpp index c83a044..691ae31 100644 --- a/src/snapshot.cpp +++ b/src/snapshot.cpp @@ -51,7 +51,7 @@ arrow::Result SnapshotManager::initialize() { log_info(this->manifest_->toString()); edge_store_->set_id_seq(manifest_->edge_id_seq); - node_manager_->set_id_counter(manifest_->node_id_seq); + node_manager_->set_all_id_counters(manifest_->node_id_seq_per_schema); shard_manager_->set_id_counter(manifest_->shard_id_seq); // Restore index counters for each schema @@ -197,12 +197,23 @@ arrow::Result SnapshotManager::commit() { Manifest new_manifest; new_manifest.id = generate_uuid(); new_manifest.edge_id_seq = edge_store_->get_edge_id_counter(); - new_manifest.node_id_seq = node_manager_->get_id_counter(); + new_manifest.node_id_seq_per_schema = node_manager_->get_all_id_counters(); new_manifest.shard_id_seq = shard_manager_->get_id_counter(); + std::stringstream node_counters_str; + node_counters_str << "{"; + size_t idx = 0; + for (const auto& [schema_name, counter] : new_manifest.node_id_seq_per_schema) { + node_counters_str << schema_name << ":" << counter; + if (idx++ < new_manifest.node_id_seq_per_schema.size() - 1) { + node_counters_str << ", "; + } + } + node_counters_str << "}"; + log_info("Saving counters: edge_id_seq=" + std::to_string(new_manifest.edge_id_seq) + - ", node_id_seq=" + std::to_string(new_manifest.node_id_seq) + + ", node_id_seq_per_schema=" + node_counters_str.str() + ", shard_id_seq=" + std::to_string(new_manifest.shard_id_seq)); for (const auto &edge_type : edge_store_->get_edge_types()) { diff --git a/tests/database_test.cpp b/tests/database_test.cpp index 8d5eafc..c10d14f 100644 --- a/tests/database_test.cpp +++ b/tests/database_test.cpp @@ -603,7 +603,7 @@ TEST_F(DatabaseTest, VerifyUpdatedFlag) { EXPECT_TRUE(is_clean) << "Shard should be marked as clean after snapshot"; // Update a node - db->update_node(0, "age", int64_t(30), SET).ValueOrDie(); + db->update_node("users", 0, "age", int64_t(30), SET).ValueOrDie(); // Verify shard is marked as updated again is_clean = shard_manager->is_shard_clean("users", 0).ValueOrDie(); diff --git a/tests/join_test.cpp b/tests/join_test.cpp index e410f32..653b911 100644 --- a/tests/join_test.cpp +++ b/tests/join_test.cpp @@ -112,7 +112,7 @@ TEST(JoinTest, UserFriendCompanyInnerJoin) { auto db = setup_test_db(); db->connect(0, "friend", 1).ValueOrDie(); - db->connect(1, "works-at", 6).ValueOrDie(); + db->connect(1, "works-at", 1).ValueOrDie(); Query query = Query::from("u:users") @@ -144,7 +144,7 @@ TEST(JoinTest, UserFriendCompanyInnerJoin) { expected_cells["f.name"] = arrow::MakeScalar("bob"); expected_cells["f.age"] = arrow::MakeScalar((int64_t)31); - expected_cells["c.id"] = arrow::MakeScalar((int64_t)6); + expected_cells["c.id"] = arrow::MakeScalar((int64_t)1); expected_cells["c.name"] = arrow::MakeScalar("google"); expected_cells["c.size"] = arrow::MakeScalar((int64_t)3000); @@ -248,7 +248,7 @@ TEST(JoinTest, InnerJoinFromSameNodeMultiTarget) { auto db = setup_test_db(); db->connect(0, "friend", 1).ValueOrDie(); // alex -> bob db->connect(0, "friend", 2).ValueOrDie(); // alex -> jeff - db->connect(0, "works-at", 6).ValueOrDie(); // alex -> google + db->connect(0, "works-at", 1).ValueOrDie(); // alex -> google Query query = Query::from("u:users") @@ -282,7 +282,7 @@ TEST(JoinTest, InnerJoinFromSameNodeMultiTarget) { expected_row1["f.name"] = arrow::MakeScalar("bob"); expected_row1["f.age"] = arrow::MakeScalar((int64_t)31); // Google (company) - expected_row1["c.id"] = arrow::MakeScalar((int64_t)6); + expected_row1["c.id"] = arrow::MakeScalar((int64_t)1); expected_row1["c.name"] = arrow::MakeScalar("google"); expected_row1["c.size"] = arrow::MakeScalar((int64_t)3000); @@ -312,7 +312,7 @@ TEST(JoinTest, InnerJoinFromSameNodeMultiTarget) { expected_row2["f.name"] = arrow::MakeScalar("jeff"); expected_row2["f.age"] = arrow::MakeScalar((int64_t)33); // Google (company) - expected_row2["c.id"] = arrow::MakeScalar((int64_t)6); + expected_row2["c.id"] = arrow::MakeScalar((int64_t)1); expected_row2["c.name"] = arrow::MakeScalar("google"); expected_row2["c.size"] = arrow::MakeScalar((int64_t)3000); @@ -334,9 +334,9 @@ TEST(JoinTest, InnerJoinFromSameNodeAndEndConnections) { auto db = setup_test_db(); db->connect(0, "friend", 1).ValueOrDie(); // alex -> bob db->connect(0, "friend", 2).ValueOrDie(); // alex -> jeff - db->connect(0, "works-at", 5).ValueOrDie(); // alex -> ibm - db->connect(1, "works-at", 6).ValueOrDie(); // bob -> google - db->connect(2, "works-at", 7).ValueOrDie(); // jeff -> aws + db->connect(0, "works-at", 0).ValueOrDie(); // alex -> ibm + db->connect(1, "works-at", 1).ValueOrDie(); // bob -> google + db->connect(2, "works-at", 2).ValueOrDie(); // jeff -> aws Query query = Query::from("u:users") @@ -371,7 +371,7 @@ TEST(JoinTest, InnerJoinFromSameNodeAndEndConnections) { expected_row1["f.name"] = arrow::MakeScalar("bob"); expected_row1["f.age"] = arrow::MakeScalar((int64_t)31); // IBM (company) - expected_row1["c.id"] = arrow::MakeScalar((int64_t)5); + expected_row1["c.id"] = arrow::MakeScalar((int64_t)0); expected_row1["c.name"] = arrow::MakeScalar("ibm"); expected_row1["c.size"] = arrow::MakeScalar((int64_t)1000); @@ -401,7 +401,7 @@ TEST(JoinTest, InnerJoinFromSameNodeAndEndConnections) { expected_row2["f.name"] = arrow::MakeScalar("jeff"); expected_row2["f.age"] = arrow::MakeScalar((int64_t)33); // IBM (company) - expected_row2["c.id"] = arrow::MakeScalar((int64_t)5); + expected_row2["c.id"] = arrow::MakeScalar((int64_t)0); expected_row2["c.name"] = arrow::MakeScalar("ibm"); expected_row2["c.size"] = arrow::MakeScalar((int64_t)1000); @@ -424,7 +424,7 @@ TEST(JoinTest, EmptyResultFromInnerJoin) { // Create relationships that will result in empty results due to inner join db->connect(0, "friend", 1).ValueOrDie(); // alex -> bob (friend) db->connect(1, "friend", 2).ValueOrDie(); // bob -> jeff (friend) - db->connect(1, "works-at", 6).ValueOrDie(); // bob -> google + db->connect(1, "works-at", 1).ValueOrDie(); // bob -> google // But no connections for jeff to any company // Query that will return no results because jeff doesn't work anywhere @@ -457,10 +457,10 @@ TEST(JoinTest, MultiPathToSameTarget) { // Create multiple paths to the same target db->connect(0, "friend", 1).ValueOrDie(); // alex -> bob db->connect(0, "friend", 2).ValueOrDie(); // alex -> jeff - db->connect(0, "works-at", 5).ValueOrDie(); // alex -> ibm - db->connect(1, "works-at", 5) + db->connect(0, "works-at", 0).ValueOrDie(); // alex -> ibm + db->connect(1, "works-at", 0) .ValueOrDie(); // bob -> ibm (same company as alex) - db->connect(2, "works-at", 6).ValueOrDie(); // jeff -> google + db->connect(2, "works-at", 1).ValueOrDie(); // jeff -> google // Query: Find all friends of alex who work at the same company as alex Query query = @@ -469,9 +469,9 @@ TEST(JoinTest, MultiPathToSameTarget) { .traverse("u", "works-at", "c1:companies", TraverseType::Inner) .traverse("f", "works-at", "c2:companies", TraverseType::Inner) .where("c1.id", CompareOp::Eq, - Value((int64_t)5)) // Filter for alex's company (IBM) + Value((int64_t)0)) // Filter for alex's company (IBM ID 0) .where("c2.id", CompareOp::Eq, - Value((int64_t)5)) // Filter for friend's company (also IBM) + Value((int64_t)0)) // Filter for friend's company (also IBM ID 0) .build(); auto query_result = db->query(query); @@ -498,11 +498,11 @@ TEST(JoinTest, MultiPathToSameTarget) { expected_row["f.name"] = arrow::MakeScalar("bob"); expected_row["f.age"] = arrow::MakeScalar((int64_t)31); // c1 is IBM (alex's company) - expected_row["c1.id"] = arrow::MakeScalar((int64_t)5); + expected_row["c1.id"] = arrow::MakeScalar((int64_t)0); expected_row["c1.name"] = arrow::MakeScalar("ibm"); expected_row["c1.size"] = arrow::MakeScalar((int64_t)1000); // c2 is also IBM (bob's company) - expected_row["c2.id"] = arrow::MakeScalar((int64_t)5); + expected_row["c2.id"] = arrow::MakeScalar((int64_t)0); expected_row["c2.name"] = arrow::MakeScalar("ibm"); expected_row["c2.size"] = arrow::MakeScalar((int64_t)1000); @@ -526,14 +526,14 @@ TEST(JoinTest, CartesianProductExplosion) { db->connect(0, "friend", 2).ValueOrDie(); // alex -> jeff db->connect(0, "friend", 3).ValueOrDie(); // alex -> sam - db->connect(1, "works-at", 5).ValueOrDie(); // bob -> ibm - db->connect(1, "works-at", 6).ValueOrDie(); // bob -> google + db->connect(1, "works-at", 0).ValueOrDie(); // bob -> ibm + db->connect(1, "works-at", 1).ValueOrDie(); // bob -> google - db->connect(2, "works-at", 6).ValueOrDie(); // jeff -> google - db->connect(2, "works-at", 7).ValueOrDie(); // jeff -> aws + db->connect(2, "works-at", 1).ValueOrDie(); // jeff -> google + db->connect(2, "works-at", 2).ValueOrDie(); // jeff -> aws - db->connect(3, "works-at", 5).ValueOrDie(); // sam -> ibm - db->connect(3, "works-at", 7).ValueOrDie(); // sam -> aws + db->connect(3, "works-at", 0).ValueOrDie(); // sam -> ibm + db->connect(3, "works-at", 2).ValueOrDie(); // sam -> aws // Query: Friends of alex and where they work // Results in 3 friends × ~2 companies each = ~6 rows total @@ -581,7 +581,7 @@ TEST(JoinTest, LeftJoin) { // Create relationships where some nodes don't have target matches db->connect(0, "friend", 1).ValueOrDie(); // alex -> bob db->connect(0, "friend", 2).ValueOrDie(); // alex -> jeff - db->connect(1, "works-at", 6).ValueOrDie(); // bob -> google + db->connect(1, "works-at", 1).ValueOrDie(); // bob -> google // jeff has no company (will produce NULL in the results with LEFT JOIN) // LEFT JOIN: Keep all users even if they don't work at any company @@ -613,7 +613,7 @@ TEST(JoinTest, LeftJoin) { bob_row["f.id"] = arrow::MakeScalar((int64_t)1); bob_row["f.name"] = arrow::MakeScalar("bob"); bob_row["f.age"] = arrow::MakeScalar((int64_t)31); - bob_row["c.id"] = arrow::MakeScalar((int64_t)6); + bob_row["c.id"] = arrow::MakeScalar((int64_t)1); bob_row["c.name"] = arrow::MakeScalar("google"); bob_row["c.size"] = arrow::MakeScalar((int64_t)3000); @@ -691,10 +691,10 @@ TEST(JoinTest, RightJoin) { // Create relationships where some targets don't have matching sources db->connect(0, "friend", 1).ValueOrDie(); // alex -> bob db->connect(0, "friend", 2).ValueOrDie(); // alex -> jeff - db->connect(1, "works-at", 6).ValueOrDie(); // bob -> google - db->connect(2, "works-at", 7).ValueOrDie(); // jeff -> aws + db->connect(1, "works-at", 1).ValueOrDie(); // bob -> google + db->connect(2, "works-at", 2).ValueOrDie(); // jeff -> aws // Sam (id=3) has no friends but works at ibm - db->connect(3, "works-at", 5).ValueOrDie(); // sam -> ibm + db->connect(3, "works-at", 0).ValueOrDie(); // sam -> ibm // RIGHT JOIN: Keep all companies even if no users work there Query query = @@ -741,11 +741,11 @@ TEST(JoinTest, CombinedJoinTypes) { auto db = setup_test_db(); db->connect(0, "friend", 1).ValueOrDie(); // alex -> bob db->connect(0, "friend", 2).ValueOrDie(); // alex -> jeff - db->connect(1, "works-at", 6).ValueOrDie(); // bob -> google + db->connect(1, "works-at", 1).ValueOrDie(); // bob -> google // jeff has no company // Create a row for matt who has no friends - db->connect(4, "works-at", 5).ValueOrDie(); // matt -> ibm + db->connect(4, "works-at", 0).ValueOrDie(); // matt -> ibm // Query that combines INNER, LEFT and RIGHT joins Query query = @@ -828,7 +828,7 @@ TEST(JoinTest, CombinedJoinTypes) { c_id_col->GetScalar(i).ValueOrDie()) ->value; - if (u_id == 0 && f_id == 1 && c_id == 6) { + if (u_id == 0 && f_id == 1 && c_id == 1) { has_alex_bob_google = true; std::cout << " ✓ Found alex->bob->google pattern" << std::endl; } @@ -857,7 +857,7 @@ TEST(JoinTest, CombinedJoinTypes) { c_id_col->GetScalar(i).ValueOrDie()) ->value; - if (c_id == 7) { + if (c_id == 2) { has_null_null_aws = true; std::cout << " ✓ Found NULL->NULL->aws pattern" << std::endl; } @@ -877,8 +877,8 @@ TEST(JoinTest, MultiLevelLeftJoin) { db->connect(0, "friend", 1).ValueOrDie(); // alex -> bob db->connect(0, "friend", 2).ValueOrDie(); // alex -> jeff db->connect(0, "friend", 3).ValueOrDie(); // alex -> sam - db->connect(1, "works-at", 6).ValueOrDie(); // bob -> google - db->connect(2, "likes", 5).ValueOrDie(); // jeff -> ibm + db->connect(1, "works-at", 1).ValueOrDie(); // bob -> google + db->connect(2, "likes", 0).ValueOrDie(); // jeff -> ibm (Company ID 0) // sam has no company and no likes // Multi-level LEFT JOINs: Keep all users at each level @@ -942,7 +942,7 @@ TEST(JoinTest, MultiLevelLeftJoin) { c_id_col->GetScalar(i).ValueOrDie()) ->value; - if (u_id == 0 && f_id == 1 && c_id == 6) { + if (u_id == 0 && f_id == 1 && c_id == 1) { found_alex_bob_google = true; break; } @@ -1128,11 +1128,11 @@ TEST(JoinTest, FullOuterJoin) { // Create relationships for a FULL OUTER JOIN scenario db->connect(0, "friend", 1).ValueOrDie(); // alex -> bob db->connect(0, "friend", 2).ValueOrDie(); // alex -> jeff - db->connect(1, "works-at", 6).ValueOrDie(); // bob -> google + db->connect(1, "works-at", 1).ValueOrDie(); // bob -> google // jeff has no company // matt (id=4) has no friends but works at ibm - db->connect(4, "works-at", 5).ValueOrDie(); // matt -> ibm + db->connect(4, "works-at", 0).ValueOrDie(); // matt -> ibm // AWS (id=7) has no employee connected directly @@ -1235,7 +1235,7 @@ TEST(JoinTest, FullOuterJoin) { c_id_col->GetScalar(i).ValueOrDie()) ->value; - if (u_id == 0 && f_id == 1 && c_id == 6) { + if (u_id == 0 && f_id == 1 && c_id == 1) { has_alex_bob_google = true; std::cout << " ✓ Found alex->bob->google pattern" << std::endl; } @@ -1264,7 +1264,7 @@ TEST(JoinTest, FullOuterJoin) { c_id_col->GetScalar(i).ValueOrDie()) ->value; - if (c_id == 7) { + if (c_id == 2) { has_null_null_aws = true; std::cout << " ✓ Found NULL->NULL->aws pattern" << std::endl; } @@ -1284,7 +1284,7 @@ TEST(JoinTest, SelectClauseFiltering) { // Create some connections for our test db->connect(0, "friend", 1).ValueOrDie(); // alex -> bob db->connect(0, "friend", 2).ValueOrDie(); // alex -> jeff - db->connect(1, "works-at", 6).ValueOrDie(); // bob -> google + db->connect(1, "works-at", 1).ValueOrDie(); // bob -> google // Query with SELECT - only get user (u) and friend (f) columns Query query = @@ -1523,8 +1523,8 @@ TEST(JoinTest, MultiPatternPathThroughFriends) { // Create relationships db_custom->connect(0, "FRIEND", 1).ValueOrDie(); // Alex -> Bob db_custom->connect(1, "FRIEND", 0).ValueOrDie(); // Bob -> Alex - db_custom->connect(0, "WORKS_AT", 4).ValueOrDie(); // Alex -> Google - db_custom->connect(1, "WORKS_AT", 5).ValueOrDie(); // Bob -> IBM + db_custom->connect(0, "WORKS_AT", 0).ValueOrDie(); // Alex -> Google (Company ID 0) + db_custom->connect(1, "WORKS_AT", 1).ValueOrDie(); // Bob -> IBM (Company ID 1) // Run the query: MATCH (u:User)-[:FRIEND INNER]->(f:User), (f)-[:WORKS_AT // INNER]->(c:Company) @@ -1620,15 +1620,15 @@ TEST(JoinTest, MultiPatternPathThroughFriends) { std::cout << "Row " << i << ": u.id=" << u_id << ", f.id=" << f_id << ", c.id=" << c_id << std::endl; - if (u_id == 0 && f_id == 1 && c_id == 5) { + if (u_id == 0 && f_id == 1 && c_id == 1) { found_alex_bob_ibm = true; - std::cout << " ✓ Found Alex(ID=0)->Bob(ID=1)->IBM(ID=5) pattern by ID" + std::cout << " ✓ Found Alex(ID=0)->Bob(ID=1)->IBM(ID=1) pattern by ID" << std::endl; } - if (u_id == 1 && f_id == 0 && c_id == 4) { + if (u_id == 1 && f_id == 0 && c_id == 0) { found_bob_alex_google = true; - std::cout << " ✓ Found Bob(ID=1)->Alex(ID=0)->Google(ID=4) pattern by ID" + std::cout << " ✓ Found Bob(ID=1)->Alex(ID=0)->Google(ID=0) pattern by ID" << std::endl; } } @@ -1645,9 +1645,9 @@ TEST(JoinTest, MultiPatternWithSharedVars) { auto db = setup_test_db(); db->connect(0, "FRIEND", 1).ValueOrDie(); // Alex -> Bob db->connect(0, "FRIEND", 2).ValueOrDie(); // Alex -> Jeff - db->connect(0, "WORKS_AT", 6).ValueOrDie(); // Alex -> Google - db->connect(2, "WORKS_AT", 6).ValueOrDie(); // Jeff -> Google - db->connect(1, "WORKS_AT", 5).ValueOrDie(); + db->connect(0, "WORKS_AT", 1).ValueOrDie(); // Alex -> Google (Company ID 1) + db->connect(2, "WORKS_AT", 1).ValueOrDie(); // Jeff -> Google (Company ID 1) + db->connect(1, "WORKS_AT", 0).ValueOrDie(); // Bob -> IBM (Company ID 0) Query query = Query::from("u:users") .traverse("u", "FRIEND", "f:users") @@ -1708,10 +1708,10 @@ TEST(JoinTest, MultiPatternWithSharedVars) { f_name_col->GetScalar(0).ValueOrDie()); ASSERT_EQ(f_name_scalar->view(), "jeff"); - // Check c.id (should be 6 - google) + // Check c.id (should be 1 - google) auto c_id_scalar = std::static_pointer_cast( c_id_col->GetScalar(0).ValueOrDie()); - ASSERT_EQ(c_id_scalar->value, 6); + ASSERT_EQ(c_id_scalar->value, 1); auto c_name_scalar = std::static_pointer_cast( c_name_col->GetScalar(0).ValueOrDie()); diff --git a/tests/node_test.cpp b/tests/node_test.cpp index 3b9200b..bae973e 100644 --- a/tests/node_test.cpp +++ b/tests/node_test.cpp @@ -88,7 +88,7 @@ TEST_F(NodeTest, NodeManagerGetNode) { int64_t node_id = original_node->id; // Retrieve node - auto retrieved_result = node_manager_->get_node(node_id); + auto retrieved_result = node_manager_->get_node("User",node_id); ASSERT_TRUE(retrieved_result.ok()); auto retrieved_node = retrieved_result.ValueOrDie(); @@ -112,15 +112,15 @@ TEST_F(NodeTest, NodeManagerRemoveNode) { int64_t node_id = node->id; // Verify node exists - auto get_result = node_manager_->get_node(node_id); + auto get_result = node_manager_->get_node("User",node_id); ASSERT_TRUE(get_result.ok()); // Remove node - bool removed = node_manager_->remove_node(node_id); + bool removed = node_manager_->remove_node("User",node_id); EXPECT_TRUE(removed); // Verify node no longer exists - auto get_result_after = node_manager_->get_node(node_id); + auto get_result_after = node_manager_->get_node("User",node_id); // EXPECT_FALSE(get_result_after.ok()) << "Node should not exist after // removal"; } @@ -392,8 +392,8 @@ TEST_F(NodeTest, DISABLED_NodeManagerValidationAutoGeneratedId) { // Test multiple nodes and ID counter TEST_F(NodeTest, MultipleNodesAndIdCounter) { // Set initial ID counter - node_manager_->set_id_counter(100); - EXPECT_EQ(node_manager_->get_id_counter(), 100); + node_manager_->set_id_counter("User", 100); + EXPECT_EQ(node_manager_->get_id_counter("User"), 100); std::unordered_map node_data1 = {{"name", Value{"User1"}}, {"score", Value{80.0}}}; @@ -414,11 +414,11 @@ TEST_F(NodeTest, MultipleNodesAndIdCounter) { EXPECT_EQ(node2->id, 101); // Verify ID counter advanced - EXPECT_EQ(node_manager_->get_id_counter(), 102); + EXPECT_EQ(node_manager_->get_id_counter("User"), 102); // Verify both nodes exist and are different - auto get1_result = node_manager_->get_node(100); - auto get2_result = node_manager_->get_node(101); + auto get1_result = node_manager_->get_node("User",100); + auto get2_result = node_manager_->get_node("User",101); ASSERT_TRUE(get1_result.ok()); ASSERT_TRUE(get2_result.ok()); @@ -453,7 +453,7 @@ TEST_F(NodeTest, PerformanceTest) { // Access all nodes and verify data for (int i = 0; i < num_nodes; ++i) { - auto node_result = node_manager_->get_node(node_ids[i]); + auto node_result = node_manager_->get_node("User",node_ids[i]); ASSERT_TRUE(node_result.ok()) << "Failed to get node " << i; auto node = node_result.ValueOrDie(); diff --git a/tests/sharding_test.cpp b/tests/sharding_test.cpp index 790ab8b..d19da94 100644 --- a/tests/sharding_test.cpp +++ b/tests/sharding_test.cpp @@ -91,7 +91,7 @@ TEST_F(ShardingTest, AddAndRetrieveNodes) { TEST_F(ShardingTest, UpdateNodes) { // Create a node auto node = create_test_node("Original", 0); - auto update_result = db->update_node(node->id, "name", Value{"Updated"}, SET); + auto update_result = db->update_node("test-schema", node->id, "name", Value{"Updated"}, SET); ASSERT_TRUE(update_result.ok()) << "Failed to update node: " << update_result.status().ToString(); @@ -115,7 +115,7 @@ TEST_F(ShardingTest, Compaction) { } // Now remove some nodes to create gaps - auto update_result = db->update_node(1, "name", Value{"Removed"}, SET); + auto update_result = db->update_node("test-schema", 1, "name", Value{"Removed"}, SET); ASSERT_TRUE(update_result.ok()); // Compact the schema diff --git a/tests/temporal_query_test.cpp b/tests/temporal_query_test.cpp index 8a7e081..33a0cf3 100644 --- a/tests/temporal_query_test.cpp +++ b/tests/temporal_query_test.cpp @@ -105,13 +105,13 @@ TEST_F(TemporalQueryTest, NodeUpdateAtDifferentTimes) { // Update age to 26 at t1 mock_clock_.set_time(t1_); auto update_result1 = - db_->update_node(user_id, "age", Value(26), UpdateType::SET); + db_->update_node("User", user_id, "age", Value(26), UpdateType::SET); ASSERT_TRUE(update_result1.ok()) << update_result1.status(); // Update age to 27 at t2 mock_clock_.set_time(t2_); auto update_result2 = - db_->update_node(user_id, "age", Value(27), UpdateType::SET); + db_->update_node("User", user_id, "age", Value(27), UpdateType::SET); ASSERT_TRUE(update_result2.ok()) << update_result2.status(); // Query current version (at t2): should see age=27 @@ -187,11 +187,11 @@ TEST_F(TemporalQueryTest, MultipleFieldUpdateAtSameTime) { // Update both age and active at t1 mock_clock_.set_time(t1_); - auto update1 = db_->update_node(user_id, "age", Value(31), UpdateType::SET); + auto update1 = db_->update_node("User", user_id, "age", Value(31), UpdateType::SET); ASSERT_TRUE(update1.ok()); auto update2 = - db_->update_node(user_id, "active", Value(false), UpdateType::SET); + db_->update_node("User", user_id, "active", Value(false), UpdateType::SET); ASSERT_TRUE(update2.ok()); // Query at current time: should see age=31, active=false @@ -224,13 +224,13 @@ TEST_F(TemporalQueryTest, ClockAdvanceAndQuery) { // Advance time and update mock_clock_.advance_seconds(1); // 1 second after t0 uint64_t update1_time = mock_clock_.now_nanos(); - auto update1 = db_->update_node(user_id, "age", Value(36), UpdateType::SET); + auto update1 = db_->update_node("User", user_id, "age", Value(36), UpdateType::SET); ASSERT_TRUE(update1.ok()); // Advance time and update again mock_clock_.advance_seconds(1); // 2 seconds after t0 uint64_t update2_time = mock_clock_.now_nanos(); - auto update2 = db_->update_node(user_id, "age", Value(37), UpdateType::SET); + auto update2 = db_->update_node("User", user_id, "age", Value(37), UpdateType::SET); ASSERT_TRUE(update2.ok()); // Query current: should see age=37 @@ -299,12 +299,12 @@ TEST_F(TemporalQueryTest, BitemporalQueryWithUpdates) { // Update at t1 mock_clock_.set_time(t1_); - auto update1 = db_->update_node(user_id, "age", Value(41), UpdateType::SET); + auto update1 = db_->update_node("User", user_id, "age", Value(41), UpdateType::SET); ASSERT_TRUE(update1.ok()); // Update at t2 mock_clock_.set_time(t2_); - auto update2 = db_->update_node(user_id, "age", Value(42), UpdateType::SET); + auto update2 = db_->update_node("User", user_id, "age", Value(42), UpdateType::SET); ASSERT_TRUE(update2.ok()); // ======================================================================== @@ -369,7 +369,7 @@ TEST_F(TemporalQueryTest, TemporalQueryBetweenUpdateTimes) { // Update at t1 mock_clock_.set_time(t1_); - auto update1 = db_->update_node(user_id, "age", Value(51), UpdateType::SET); + auto update1 = db_->update_node("User", user_id, "age", Value(51), UpdateType::SET); ASSERT_TRUE(update1.ok()); // Calculate midpoint between t0 and t1 @@ -583,12 +583,12 @@ TEST_F(TemporalQueryTest, NullFieldInVersionChain) { // Update age to 30 at t1 mock_clock_.set_time(t1_); - auto update1 = db_->update_node(user_id, "age", Value(30), UpdateType::SET); + auto update1 = db_->update_node("User", user_id, "age", Value(30), UpdateType::SET); ASSERT_TRUE(update1.ok()); // Update age to NULL at t2 mock_clock_.set_time(t2_); - auto update2 = db_->update_node(user_id, "age", Value(), UpdateType::SET); + auto update2 = db_->update_node("User", user_id, "age", Value(), UpdateType::SET); ASSERT_TRUE(update2.ok()); // Query at t0: should see age=25 @@ -689,7 +689,7 @@ TEST_F(TemporalQueryTest, MultipleNodesIndependentVersions) { mock_clock_.set_time(t1_); auto alice_update = - db_->update_node(alice_id, "age", Value(26), UpdateType::SET); + db_->update_node("User", alice_id, "age", Value(26), UpdateType::SET); ASSERT_TRUE(alice_update.ok()); // Bob: created at t1 with age=30, updated to 31 at t2 @@ -697,7 +697,7 @@ TEST_F(TemporalQueryTest, MultipleNodesIndependentVersions) { int64_t bob_id = create_simple_user("Bob", 30); mock_clock_.set_time(t2_); - auto bob_update = db_->update_node(bob_id, "age", Value(31), UpdateType::SET); + auto bob_update = db_->update_node("User", bob_id, "age", Value(31), UpdateType::SET); ASSERT_TRUE(bob_update.ok()); // Query at t0: should see only Alice (age=25) @@ -775,7 +775,7 @@ TEST_F(TemporalQueryTest, VersioningDisabledFallback) { // Update to age=26 mock_clock_.set_time(t1_); auto update_result = - db_no_version->update_node(user_id, "age", Value(26), UpdateType::SET); + db_no_version->update_node("User", user_id, "age", Value(26), UpdateType::SET); ASSERT_TRUE(update_result.ok()); // Temporal query at t0 (should return CURRENT version, not historical) @@ -817,7 +817,7 @@ TEST_F(TemporalQueryTest, NoOpUpdateDoesNotCreateNewVersion) { int64_t user_id = create_simple_user("Alice", 25); // Get the node and count initial versions - auto node_result = db_->get_node_manager()->get_node(user_id); + auto node_result = db_->get_node_manager()->get_node("User", user_id); ASSERT_TRUE(node_result.ok()); auto node = node_result.ValueOrDie(); auto handle = node->get_handle(); @@ -838,11 +838,11 @@ TEST_F(TemporalQueryTest, NoOpUpdateDoesNotCreateNewVersion) { // Update to SAME value at t1 (no-op update) mock_clock_.set_time(t1_); auto update_result = - db_->update_node(user_id, "age", Value(25), UpdateType::SET); + db_->update_node("User", user_id, "age", Value(25), UpdateType::SET); ASSERT_TRUE(update_result.ok()); // Get updated node and count versions again - node_result = db_->get_node_manager()->get_node(user_id); + node_result = db_->get_node_manager()->get_node("User", user_id); ASSERT_TRUE(node_result.ok()); node = node_result.ValueOrDie(); handle = node->get_handle(); diff --git a/tests/where_expression_test.cpp b/tests/where_expression_test.cpp index c3e7e28..b356b19 100644 --- a/tests/where_expression_test.cpp +++ b/tests/where_expression_test.cpp @@ -118,9 +118,9 @@ class WhereExpressionTest : public ::testing::Test { db_->connect(2, "FRIEND", 4).ValueOrDie(); // Create work relationships - db_->connect(0, "WORKS_AT", 10).ValueOrDie(); // Alice -> TechCorp - db_->connect(1, "WORKS_AT", 10).ValueOrDie(); // Bob -> TechCorp - db_->connect(2, "WORKS_AT", 11).ValueOrDie(); // Charlie -> StartupInc + db_->connect(0, "WORKS_AT", 0).ValueOrDie(); // Alice -> TechCorp + db_->connect(1, "WORKS_AT", 0).ValueOrDie(); // Bob -> TechCorp + db_->connect(2, "WORKS_AT", 1).ValueOrDie(); // Charlie -> StartupInc } std::shared_ptr user_schema_; From 8af7f7b9391a9cc29723446b48c8d0274d26e2b5 Mon Sep 17 00:00:00 2001 From: dmgcodevil Date: Tue, 17 Feb 2026 10:58:00 -0500 Subject: [PATCH 2/5] fixes --- include/logger.hpp | 2 +- src/core.cpp | 46 ++++++++++++++++++++++++++++++--------------- tests/join_test.cpp | 1 + 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/include/logger.hpp b/include/logger.hpp index 81a0c6a..fdc7718 100644 --- a/include/logger.hpp +++ b/include/logger.hpp @@ -336,7 +336,7 @@ constexpr bool is_warn_enabled() { } while (0) // Conditional code blocks - completely eliminated when disabled -#define IF_DEBUG_ENABLED if constexpr (is_debug_enabled()) +#define IF_DEBUG_ENABLED if constexpr (true) #define IF_INFO_ENABLED if constexpr (is_info_enabled()) diff --git a/src/core.cpp b/src/core.cpp index ce37b4b..9ab126f 100644 --- a/src/core.cpp +++ b/src/core.cpp @@ -2285,22 +2285,38 @@ arrow::Result> Database::query( .ValueOrDie()) .ValueOrDie(); - // Include matched targets - llvm::DenseSet result = matched_target_ids; + llvm::DenseSet result; - // Add unmatched targets (compare within same schema to avoid ID collision) - llvm::DenseSet unmatched_targets; - dense_difference(target_ids, matched_target_ids, unmatched_targets); - result.insert(unmatched_targets.begin(), unmatched_targets.end()); - - IF_DEBUG_ENABLED { - log_debug( - "traverse type: '{}' (Right/Full), matched_targets=[{}], " - "unmatched_targets=[{}], total={}", - traverse->target().value(), - join_container(matched_target_ids), - join_container(unmatched_targets), - result.size()); + // Check if this is a self-join (same schema for source and target) + if (source_schema == target_schema) { + // Self-join: Exclude nodes that were sources with matches + // (prevents same node appearing as both source and unmatched target) + dense_difference(target_ids, matched_source_ids, result); + IF_DEBUG_ENABLED { + log_debug( + "traverse type: '{}' (Right/Full, self-join), " + "matched_source_ids=[{}], unmatched_targets=[{}], total={}", + traverse->target().value(), + join_container(matched_source_ids), + join_container(result), + result.size()); + } + } else { + // Cross-schema join: Include matched targets + unmatched targets + // (compare IDs within same schema to avoid ID collision) + result = matched_target_ids; + llvm::DenseSet unmatched_targets; + dense_difference(target_ids, matched_target_ids, unmatched_targets); + result.insert(unmatched_targets.begin(), unmatched_targets.end()); + IF_DEBUG_ENABLED { + log_debug( + "traverse type: '{}' (Right/Full, cross-schema), " + "matched_targets=[{}], unmatched_targets=[{}], total={}", + traverse->target().value(), + join_container(matched_target_ids), + join_container(unmatched_targets), + result.size()); + } } query_state.ids[traverse->target().value()] = result; diff --git a/tests/join_test.cpp b/tests/join_test.cpp index 653b911..c9ccebd 100644 --- a/tests/join_test.cpp +++ b/tests/join_test.cpp @@ -1719,6 +1719,7 @@ TEST(JoinTest, MultiPatternWithSharedVars) { } TEST(JoinTest, FullJoinFriendRelationship) { + Logger::get_instance().set_level(LogLevel::DEBUG); auto db = setup_test_db(); // Create friend relationships From 16ffd5e70ccb2ed3dbe49e60a42a83555e483295 Mon Sep 17 00:00:00 2001 From: dmgcodevil Date: Tue, 17 Feb 2026 11:24:41 -0500 Subject: [PATCH 3/5] fix JoinTest.MultiLevelLeftJoin --- tests/join_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/join_test.cpp b/tests/join_test.cpp index c9ccebd..3e1bf2b 100644 --- a/tests/join_test.cpp +++ b/tests/join_test.cpp @@ -970,7 +970,7 @@ TEST(JoinTest, MultiLevelLeftJoin) { l_id_col->GetScalar(i).ValueOrDie()) ->value; - if (u_id == 0 && f_id == 2 && l_id == 5) { + if (u_id == 0 && f_id == 2 && l_id == 0) { // l_id=0 (IBM per-schema) found_alex_jeff_ibm = true; break; } From 27101d17a9b610d2061a2316ca6bb349b48a6c68 Mon Sep 17 00:00:00 2001 From: dmgcodevil Date: Tue, 17 Feb 2026 17:13:53 -0500 Subject: [PATCH 4/5] fix database_test --- include/logger.hpp | 2 +- src/core.cpp | 1 - tests/database_test.cpp | 4 +++- tests/join_test.cpp | 1 - 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/logger.hpp b/include/logger.hpp index fdc7718..81a0c6a 100644 --- a/include/logger.hpp +++ b/include/logger.hpp @@ -336,7 +336,7 @@ constexpr bool is_warn_enabled() { } while (0) // Conditional code blocks - completely eliminated when disabled -#define IF_DEBUG_ENABLED if constexpr (true) +#define IF_DEBUG_ENABLED if constexpr (is_debug_enabled()) #define IF_INFO_ENABLED if constexpr (is_info_enabled()) diff --git a/src/core.cpp b/src/core.cpp index 9ab126f..65d0f3b 100644 --- a/src/core.cpp +++ b/src/core.cpp @@ -541,7 +541,6 @@ struct QueryState { for (const auto& f : schema->fields()) { std::string fq_name = alias + "." + f->name(); int field_id = next_field_id.fetch_add(1); - names.emplace_back(fq_name); indices.emplace_back(field_id); field_id_to_name[field_id] = fq_name; diff --git a/tests/database_test.cpp b/tests/database_test.cpp index c10d14f..45f31c3 100644 --- a/tests/database_test.cpp +++ b/tests/database_test.cpp @@ -233,7 +233,9 @@ TEST_F(DatabaseTest, CreateDbAndSnapshot) { auto manifest_json = read_json_file(manifest_path).ValueOrDie(); EXPECT_EQ(manifest_json["edge_id_seq"], 0); - EXPECT_EQ(manifest_json["node_id_seq"], 0); + // node_id_seq is now per-schema (a map) + EXPECT_TRUE(manifest_json["node_id_seq_per_schema"].is_object()); + EXPECT_TRUE(manifest_json["node_id_seq_per_schema"].empty()); // No schemas yet EXPECT_EQ(manifest_json["shard_id_seq"], 0); EXPECT_TRUE(manifest_json["edges"].empty()); EXPECT_TRUE(manifest_json["shards"].empty()); diff --git a/tests/join_test.cpp b/tests/join_test.cpp index 3e1bf2b..4760780 100644 --- a/tests/join_test.cpp +++ b/tests/join_test.cpp @@ -1719,7 +1719,6 @@ TEST(JoinTest, MultiPatternWithSharedVars) { } TEST(JoinTest, FullJoinFriendRelationship) { - Logger::get_instance().set_level(LogLevel::DEBUG); auto db = setup_test_db(); // Create friend relationships From 2be0d75d15c640e6cff6ea5f4c0c3109e3505885 Mon Sep 17 00:00:00 2001 From: dmgcodevil Date: Tue, 17 Feb 2026 17:24:52 -0500 Subject: [PATCH 5/5] formatting --- include/core.hpp | 44 +++++++++++++++++++---------------- include/metadata.hpp | 14 ++++++----- include/node.hpp | 31 +++++++++++++----------- src/core.cpp | 25 ++++++++++---------- src/snapshot.cpp | 5 ++-- tests/database_test.cpp | 3 ++- tests/join_test.cpp | 15 +++++++----- tests/node_test.cpp | 14 +++++------ tests/sharding_test.cpp | 6 +++-- tests/temporal_query_test.cpp | 35 +++++++++++++++++----------- 10 files changed, 109 insertions(+), 83 deletions(-) diff --git a/include/core.hpp b/include/core.hpp index c6d8c13..07160ed 100644 --- a/include/core.hpp +++ b/include/core.hpp @@ -528,7 +528,7 @@ class ShardManager { " not found in schema '", schema_name, "'"); } - arrow::Result update_node(const std::string& schema_name, + arrow::Result update_node(const std::string &schema_name, const int64_t id, const std::shared_ptr &field, const Value &value, @@ -537,39 +537,39 @@ class ShardManager { if (schema_it == shards_.end()) { return arrow::Status::KeyError("Schema not found: ", schema_name); } - + for (const auto &shard : schema_it->second) { if (id >= shard->min_id && id <= shard->max_id) { return shard->update(id, field, value, update_type); } } - - return arrow::Status::KeyError("Node with id ", id, - " not found in schema ", schema_name); + + return arrow::Status::KeyError("Node with id ", id, " not found in schema ", + schema_name); } - arrow::Result update_node(const std::string& schema_name, + arrow::Result update_node(const std::string &schema_name, const int64_t id, const std::string &field_name, const Value &value, const UpdateType update_type) { auto schema_it = shards_.find(schema_name); if (schema_it == shards_.end()) { - return arrow::Status::KeyError("Schema not found: ", schema_name, " in shards"); + return arrow::Status::KeyError("Schema not found: ", schema_name, + " in shards"); } - - auto field = schema_registry_->get(schema_name) - .ValueOrDie() - ->get_field(field_name); - + + auto field = + schema_registry_->get(schema_name).ValueOrDie()->get_field(field_name); + for (const auto &shard : schema_it->second) { if (id >= shard->min_id && id <= shard->max_id) { return shard->update(id, field, value, update_type); } } - return arrow::Status::KeyError("Node with id ", id, - " not found in schema ", schema_name); + return arrow::Status::KeyError("Node with id ", id, " not found in schema ", + schema_name); } arrow::Result>> get_nodes( @@ -767,26 +767,29 @@ class Database { return node; } - arrow::Result update_node(const std::string& schema_name, + arrow::Result update_node(const std::string &schema_name, const int64_t id, const std::shared_ptr &field, const Value &value, const UpdateType update_type) { - return shard_manager_->update_node(schema_name, id, field, value, update_type); + return shard_manager_->update_node(schema_name, id, field, value, + update_type); } - arrow::Result update_node(const std::string& schema_name, + arrow::Result update_node(const std::string &schema_name, const int64_t id, const std::string &field_name, const Value &value, const UpdateType update_type) { - return shard_manager_->update_node(schema_name, id, field_name, value, update_type); + return shard_manager_->update_node(schema_name, id, field_name, value, + update_type); } arrow::Result remove_node(const std::string &schema_name, int64_t node_id) { if (auto res = node_manager_->remove_node(schema_name, node_id); !res) { - return arrow::Status::Invalid("Failed to remove node: ", schema_name, ":", node_id); + return arrow::Status::Invalid("Failed to remove node: ", schema_name, ":", + node_id); } return shard_manager_->remove_node(schema_name, node_id); } @@ -821,7 +824,8 @@ class Database { const std::string &schema_name, TemporalContext *temporal_context = nullptr, size_t chunk_size = 10000) const { - ARROW_ASSIGN_OR_RAISE(const auto schema, schema_registry_->get(schema_name)); + ARROW_ASSIGN_OR_RAISE(const auto schema, + schema_registry_->get(schema_name)); auto arrow_schema = schema->arrow(); ARROW_ASSIGN_OR_RAISE(auto all_nodes, shard_manager_->get_nodes(schema_name)); diff --git a/include/metadata.hpp b/include/metadata.hpp index dc62837..f389a3e 100644 --- a/include/metadata.hpp +++ b/include/metadata.hpp @@ -312,12 +312,14 @@ struct Manifest { std::string id; std::vector shards; std::vector edges; - std::unordered_map node_id_seq_per_schema; // Per-schema ID counters + std::unordered_map + node_id_seq_per_schema; // Per-schema ID counters int64_t edge_id_seq = 0; int64_t shard_id_seq = 0; - NLOHMANN_DEFINE_TYPE_INTRUSIVE(Manifest, id, shards, edges, node_id_seq_per_schema, - edge_id_seq, shard_id_seq); + NLOHMANN_DEFINE_TYPE_INTRUSIVE(Manifest, id, shards, edges, + node_id_seq_per_schema, edge_id_seq, + shard_id_seq); std::string toString() const { std::stringstream ss; @@ -335,12 +337,12 @@ struct Manifest { } ss << "], node_id_seq_per_schema={"; size_t idx = 0; - for (const auto& [schema_name, counter] : node_id_seq_per_schema) { + for (const auto &[schema_name, counter] : node_id_seq_per_schema) { ss << "'" << schema_name << "':" << counter; if (idx++ < node_id_seq_per_schema.size() - 1) ss << ", "; } - ss << "}, edge_id_seq=" << edge_id_seq - << ", shard_id_seq=" << shard_id_seq << "}"; + ss << "}, edge_id_seq=" << edge_id_seq << ", shard_id_seq=" << shard_id_seq + << "}"; return ss.str(); } diff --git a/include/node.hpp b/include/node.hpp index 88eb4d9..757aa82 100644 --- a/include/node.hpp +++ b/include/node.hpp @@ -167,22 +167,22 @@ class NodeManager { ~NodeManager() { node_arena_->clear(); } - arrow::Result> get_node(const std::string& schema_name, - const int64_t id) { + arrow::Result> get_node(const std::string &schema_name, + const int64_t id) { auto schema_it = nodes_.find(schema_name); if (schema_it == nodes_.end()) { return arrow::Status::KeyError("Schema not found: ", schema_name); } - + auto node_it = schema_it->second.find(id); if (node_it == schema_it->second.end()) { return arrow::Status::KeyError("Node not found: ", schema_name, ":", id); } - + return node_it->second; } - bool remove_node(const std::string& schema_name, const int64_t id) { + bool remove_node(const std::string &schema_name, const int64_t id) { auto schema_it = nodes_.find(schema_name); if (schema_it == nodes_.end()) { return false; @@ -296,11 +296,11 @@ class NodeManager { } } - void set_id_counter(const std::string& schema_name, const int64_t value) { + void set_id_counter(const std::string &schema_name, const int64_t value) { id_counters_[schema_name].store(value); } - - int64_t get_id_counter(const std::string& schema_name) const { + + int64_t get_id_counter(const std::string &schema_name) const { auto it = id_counters_.find(schema_name); if (it == id_counters_.end()) { return 0; @@ -311,15 +311,16 @@ class NodeManager { // Get all schema ID counters (for snapshot/manifest) std::unordered_map get_all_id_counters() const { std::unordered_map result; - for (const auto& [schema_name, counter] : id_counters_) { + for (const auto &[schema_name, counter] : id_counters_) { result[schema_name] = counter.load(); } return result; } // Set all schema ID counters (for snapshot/manifest restore) - void set_all_id_counters(const std::unordered_map& counters) { - for (const auto& [schema_name, value] : counters) { + void set_all_id_counters( + const std::unordered_map &counters) { + for (const auto &[schema_name, value] : counters) { id_counters_[schema_name].store(value); } } @@ -327,10 +328,12 @@ class NodeManager { private: // Per-schema ID counters (schema_name -> counter) std::unordered_map> id_counters_; - + // Per-schema node storage (schema_name -> (node_id -> Node)) - std::unordered_map>> nodes_; - + std::unordered_map>> + nodes_; + std::shared_ptr schema_registry_; std::shared_ptr layout_registry_; std::shared_ptr node_arena_; diff --git a/src/core.cpp b/src/core.cpp index 65d0f3b..2c1d9a3 100644 --- a/src/core.cpp +++ b/src/core.cpp @@ -1312,9 +1312,10 @@ populate_rows_bfs(int64_t node_id, const SchemaRef& start_schema, auto item = queue.front(); queue.pop(); auto item_schema = item.schema_ref.is_declaration() - ? item.schema_ref.schema() - : query_state.aliases.at(item.schema_ref.value()); - auto node = query_state.node_manager->get_node(item_schema, item.node_id).ValueOrDie(); + ? item.schema_ref.schema() + : query_state.aliases.at(item.schema_ref.value()); + auto node = query_state.node_manager->get_node(item_schema, item.node_id) + .ValueOrDie(); const auto& it_fq = query_state.schema_field_indices.find(item.schema_ref.value()); if (it_fq == query_state.schema_field_indices.end()) { @@ -2172,7 +2173,8 @@ arrow::Result> Database::query( .contains(target_id)) { continue; } - auto node_result = node_manager_->get_node(target_schema, target_id); + auto node_result = + node_manager_->get_node(target_schema, target_id); if (node_result.ok()) { const auto target_node = node_result.ValueOrDie(); if (target_node->schema_name == target_schema) { @@ -2283,21 +2285,21 @@ arrow::Result> Database::query( get_table(target_schema, query_state.temporal_context.get()) .ValueOrDie()) .ValueOrDie(); - + llvm::DenseSet result; - + // Check if this is a self-join (same schema for source and target) if (source_schema == target_schema) { // Self-join: Exclude nodes that were sources with matches - // (prevents same node appearing as both source and unmatched target) + // (prevents same node appearing as both source and unmatched + // target) dense_difference(target_ids, matched_source_ids, result); IF_DEBUG_ENABLED { log_debug( "traverse type: '{}' (Right/Full, self-join), " "matched_source_ids=[{}], unmatched_targets=[{}], total={}", traverse->target().value(), - join_container(matched_source_ids), - join_container(result), + join_container(matched_source_ids), join_container(result), result.size()); } } else { @@ -2313,11 +2315,10 @@ arrow::Result> Database::query( "matched_targets=[{}], unmatched_targets=[{}], total={}", traverse->target().value(), join_container(matched_target_ids), - join_container(unmatched_targets), - result.size()); + join_container(unmatched_targets), result.size()); } } - + query_state.ids[traverse->target().value()] = result; } diff --git a/src/snapshot.cpp b/src/snapshot.cpp index 691ae31..203234a 100644 --- a/src/snapshot.cpp +++ b/src/snapshot.cpp @@ -203,14 +203,15 @@ arrow::Result SnapshotManager::commit() { std::stringstream node_counters_str; node_counters_str << "{"; size_t idx = 0; - for (const auto& [schema_name, counter] : new_manifest.node_id_seq_per_schema) { + for (const auto &[schema_name, counter] : + new_manifest.node_id_seq_per_schema) { node_counters_str << schema_name << ":" << counter; if (idx++ < new_manifest.node_id_seq_per_schema.size() - 1) { node_counters_str << ", "; } } node_counters_str << "}"; - + log_info("Saving counters: edge_id_seq=" + std::to_string(new_manifest.edge_id_seq) + ", node_id_seq_per_schema=" + node_counters_str.str() + diff --git a/tests/database_test.cpp b/tests/database_test.cpp index 45f31c3..bfe5205 100644 --- a/tests/database_test.cpp +++ b/tests/database_test.cpp @@ -235,7 +235,8 @@ TEST_F(DatabaseTest, CreateDbAndSnapshot) { EXPECT_EQ(manifest_json["edge_id_seq"], 0); // node_id_seq is now per-schema (a map) EXPECT_TRUE(manifest_json["node_id_seq_per_schema"].is_object()); - EXPECT_TRUE(manifest_json["node_id_seq_per_schema"].empty()); // No schemas yet + EXPECT_TRUE( + manifest_json["node_id_seq_per_schema"].empty()); // No schemas yet EXPECT_EQ(manifest_json["shard_id_seq"], 0); EXPECT_TRUE(manifest_json["edges"].empty()); EXPECT_TRUE(manifest_json["shards"].empty()); diff --git a/tests/join_test.cpp b/tests/join_test.cpp index 4760780..a60f15e 100644 --- a/tests/join_test.cpp +++ b/tests/join_test.cpp @@ -470,8 +470,9 @@ TEST(JoinTest, MultiPathToSameTarget) { .traverse("f", "works-at", "c2:companies", TraverseType::Inner) .where("c1.id", CompareOp::Eq, Value((int64_t)0)) // Filter for alex's company (IBM ID 0) - .where("c2.id", CompareOp::Eq, - Value((int64_t)0)) // Filter for friend's company (also IBM ID 0) + .where( + "c2.id", CompareOp::Eq, + Value((int64_t)0)) // Filter for friend's company (also IBM ID 0) .build(); auto query_result = db->query(query); @@ -1521,10 +1522,12 @@ TEST(JoinTest, MultiPatternPathThroughFriends) { } // Create relationships - db_custom->connect(0, "FRIEND", 1).ValueOrDie(); // Alex -> Bob - db_custom->connect(1, "FRIEND", 0).ValueOrDie(); // Bob -> Alex - db_custom->connect(0, "WORKS_AT", 0).ValueOrDie(); // Alex -> Google (Company ID 0) - db_custom->connect(1, "WORKS_AT", 1).ValueOrDie(); // Bob -> IBM (Company ID 1) + db_custom->connect(0, "FRIEND", 1).ValueOrDie(); // Alex -> Bob + db_custom->connect(1, "FRIEND", 0).ValueOrDie(); // Bob -> Alex + db_custom->connect(0, "WORKS_AT", 0) + .ValueOrDie(); // Alex -> Google (Company ID 0) + db_custom->connect(1, "WORKS_AT", 1) + .ValueOrDie(); // Bob -> IBM (Company ID 1) // Run the query: MATCH (u:User)-[:FRIEND INNER]->(f:User), (f)-[:WORKS_AT // INNER]->(c:Company) diff --git a/tests/node_test.cpp b/tests/node_test.cpp index bae973e..b069821 100644 --- a/tests/node_test.cpp +++ b/tests/node_test.cpp @@ -88,7 +88,7 @@ TEST_F(NodeTest, NodeManagerGetNode) { int64_t node_id = original_node->id; // Retrieve node - auto retrieved_result = node_manager_->get_node("User",node_id); + auto retrieved_result = node_manager_->get_node("User", node_id); ASSERT_TRUE(retrieved_result.ok()); auto retrieved_node = retrieved_result.ValueOrDie(); @@ -112,15 +112,15 @@ TEST_F(NodeTest, NodeManagerRemoveNode) { int64_t node_id = node->id; // Verify node exists - auto get_result = node_manager_->get_node("User",node_id); + auto get_result = node_manager_->get_node("User", node_id); ASSERT_TRUE(get_result.ok()); // Remove node - bool removed = node_manager_->remove_node("User",node_id); + bool removed = node_manager_->remove_node("User", node_id); EXPECT_TRUE(removed); // Verify node no longer exists - auto get_result_after = node_manager_->get_node("User",node_id); + auto get_result_after = node_manager_->get_node("User", node_id); // EXPECT_FALSE(get_result_after.ok()) << "Node should not exist after // removal"; } @@ -417,8 +417,8 @@ TEST_F(NodeTest, MultipleNodesAndIdCounter) { EXPECT_EQ(node_manager_->get_id_counter("User"), 102); // Verify both nodes exist and are different - auto get1_result = node_manager_->get_node("User",100); - auto get2_result = node_manager_->get_node("User",101); + auto get1_result = node_manager_->get_node("User", 100); + auto get2_result = node_manager_->get_node("User", 101); ASSERT_TRUE(get1_result.ok()); ASSERT_TRUE(get2_result.ok()); @@ -453,7 +453,7 @@ TEST_F(NodeTest, PerformanceTest) { // Access all nodes and verify data for (int i = 0; i < num_nodes; ++i) { - auto node_result = node_manager_->get_node("User",node_ids[i]); + auto node_result = node_manager_->get_node("User", node_ids[i]); ASSERT_TRUE(node_result.ok()) << "Failed to get node " << i; auto node = node_result.ValueOrDie(); diff --git a/tests/sharding_test.cpp b/tests/sharding_test.cpp index d19da94..b19b4ff 100644 --- a/tests/sharding_test.cpp +++ b/tests/sharding_test.cpp @@ -91,7 +91,8 @@ TEST_F(ShardingTest, AddAndRetrieveNodes) { TEST_F(ShardingTest, UpdateNodes) { // Create a node auto node = create_test_node("Original", 0); - auto update_result = db->update_node("test-schema", node->id, "name", Value{"Updated"}, SET); + auto update_result = + db->update_node("test-schema", node->id, "name", Value{"Updated"}, SET); ASSERT_TRUE(update_result.ok()) << "Failed to update node: " << update_result.status().ToString(); @@ -115,7 +116,8 @@ TEST_F(ShardingTest, Compaction) { } // Now remove some nodes to create gaps - auto update_result = db->update_node("test-schema", 1, "name", Value{"Removed"}, SET); + auto update_result = + db->update_node("test-schema", 1, "name", Value{"Removed"}, SET); ASSERT_TRUE(update_result.ok()); // Compact the schema diff --git a/tests/temporal_query_test.cpp b/tests/temporal_query_test.cpp index 33a0cf3..4f43a33 100644 --- a/tests/temporal_query_test.cpp +++ b/tests/temporal_query_test.cpp @@ -187,11 +187,12 @@ TEST_F(TemporalQueryTest, MultipleFieldUpdateAtSameTime) { // Update both age and active at t1 mock_clock_.set_time(t1_); - auto update1 = db_->update_node("User", user_id, "age", Value(31), UpdateType::SET); + auto update1 = + db_->update_node("User", user_id, "age", Value(31), UpdateType::SET); ASSERT_TRUE(update1.ok()); - auto update2 = - db_->update_node("User", user_id, "active", Value(false), UpdateType::SET); + auto update2 = db_->update_node("User", user_id, "active", Value(false), + UpdateType::SET); ASSERT_TRUE(update2.ok()); // Query at current time: should see age=31, active=false @@ -224,13 +225,15 @@ TEST_F(TemporalQueryTest, ClockAdvanceAndQuery) { // Advance time and update mock_clock_.advance_seconds(1); // 1 second after t0 uint64_t update1_time = mock_clock_.now_nanos(); - auto update1 = db_->update_node("User", user_id, "age", Value(36), UpdateType::SET); + auto update1 = + db_->update_node("User", user_id, "age", Value(36), UpdateType::SET); ASSERT_TRUE(update1.ok()); // Advance time and update again mock_clock_.advance_seconds(1); // 2 seconds after t0 uint64_t update2_time = mock_clock_.now_nanos(); - auto update2 = db_->update_node("User", user_id, "age", Value(37), UpdateType::SET); + auto update2 = + db_->update_node("User", user_id, "age", Value(37), UpdateType::SET); ASSERT_TRUE(update2.ok()); // Query current: should see age=37 @@ -299,12 +302,14 @@ TEST_F(TemporalQueryTest, BitemporalQueryWithUpdates) { // Update at t1 mock_clock_.set_time(t1_); - auto update1 = db_->update_node("User", user_id, "age", Value(41), UpdateType::SET); + auto update1 = + db_->update_node("User", user_id, "age", Value(41), UpdateType::SET); ASSERT_TRUE(update1.ok()); // Update at t2 mock_clock_.set_time(t2_); - auto update2 = db_->update_node("User", user_id, "age", Value(42), UpdateType::SET); + auto update2 = + db_->update_node("User", user_id, "age", Value(42), UpdateType::SET); ASSERT_TRUE(update2.ok()); // ======================================================================== @@ -369,7 +374,8 @@ TEST_F(TemporalQueryTest, TemporalQueryBetweenUpdateTimes) { // Update at t1 mock_clock_.set_time(t1_); - auto update1 = db_->update_node("User", user_id, "age", Value(51), UpdateType::SET); + auto update1 = + db_->update_node("User", user_id, "age", Value(51), UpdateType::SET); ASSERT_TRUE(update1.ok()); // Calculate midpoint between t0 and t1 @@ -583,12 +589,14 @@ TEST_F(TemporalQueryTest, NullFieldInVersionChain) { // Update age to 30 at t1 mock_clock_.set_time(t1_); - auto update1 = db_->update_node("User", user_id, "age", Value(30), UpdateType::SET); + auto update1 = + db_->update_node("User", user_id, "age", Value(30), UpdateType::SET); ASSERT_TRUE(update1.ok()); // Update age to NULL at t2 mock_clock_.set_time(t2_); - auto update2 = db_->update_node("User", user_id, "age", Value(), UpdateType::SET); + auto update2 = + db_->update_node("User", user_id, "age", Value(), UpdateType::SET); ASSERT_TRUE(update2.ok()); // Query at t0: should see age=25 @@ -697,7 +705,8 @@ TEST_F(TemporalQueryTest, MultipleNodesIndependentVersions) { int64_t bob_id = create_simple_user("Bob", 30); mock_clock_.set_time(t2_); - auto bob_update = db_->update_node("User", bob_id, "age", Value(31), UpdateType::SET); + auto bob_update = + db_->update_node("User", bob_id, "age", Value(31), UpdateType::SET); ASSERT_TRUE(bob_update.ok()); // Query at t0: should see only Alice (age=25) @@ -774,8 +783,8 @@ TEST_F(TemporalQueryTest, VersioningDisabledFallback) { // Update to age=26 mock_clock_.set_time(t1_); - auto update_result = - db_no_version->update_node("User", user_id, "age", Value(26), UpdateType::SET); + auto update_result = db_no_version->update_node("User", user_id, "age", + Value(26), UpdateType::SET); ASSERT_TRUE(update_result.ok()); // Temporal query at t0 (should return CURRENT version, not historical)