From 0b348365463b28ef7d0e6ae1809d489938ec75d6 Mon Sep 17 00:00:00 2001 From: dmgcodevil Date: Tue, 10 Feb 2026 10:11:42 -0500 Subject: [PATCH 1/7] temporal context + node view --- docs/NODE_VIEW_DESIGN.md | 379 ++++++++++++++++++++++ docs/README_temporal_versioning.md | 498 +++++++++++++++++++++++++++++ include/node.hpp | 81 +++++ include/node_arena.hpp | 105 +++++- include/node_view.hpp | 113 +++++++ include/schema_layout.hpp | 24 +- include/temporal_context.hpp | 139 ++++++++ tests/CMakeLists.txt | 17 + tests/node_view_test.cpp | 292 +++++++++++++++++ 9 files changed, 1638 insertions(+), 10 deletions(-) create mode 100644 docs/NODE_VIEW_DESIGN.md create mode 100644 docs/README_temporal_versioning.md create mode 100644 include/node_view.hpp create mode 100644 include/temporal_context.hpp create mode 100644 tests/node_view_test.cpp diff --git a/docs/NODE_VIEW_DESIGN.md b/docs/NODE_VIEW_DESIGN.md new file mode 100644 index 0000000..25b565a --- /dev/null +++ b/docs/NODE_VIEW_DESIGN.md @@ -0,0 +1,379 @@ +# NodeView Design - Complete Implementation + +## Overview + +This document describes the complete `NodeView` design for time-travel queries in TundraDB. + +## Architecture + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ Query Execution │ +├──────────────────────────────────────────────────────────────────┤ +│ 1. Create TemporalContext with snapshot (valid_time, tx_time) │ +│ 2. For each node, call node.view(&ctx) │ +│ 3. NodeView resolves version once (cached in TemporalContext) │ +│ 4. Read fields from NodeView (no per-field lookups) │ +└──────────────────────────────────────────────────────────────────┘ +``` + +## Components + +### 1. TemporalSnapshot (temporal_context.hpp) + +Represents a point in bitemporal space: + +```cpp +struct TemporalSnapshot { + uint64_t valid_time; // VALIDTIME: when fact was true in domain + uint64_t tx_time; // TXNTIME: when DB knew about the fact + + // UINT64_MAX means "current time" + static TemporalSnapshot as_of_valid(uint64_t vt); // VALIDTIME only + static TemporalSnapshot as_of_tx(uint64_t tt); // TXNTIME only + bool is_current() const; // Both times = UINT64_MAX +}; +``` + +**Usage:** +```cpp +// Time-travel to specific VALIDTIME +auto snapshot = TemporalSnapshot::as_of_valid(1704067200000); + +// Bitemporal: specific valid & tx times +auto snapshot = TemporalSnapshot(1704067200000, 1706745600000); +``` + +--- + +### 2. TemporalContext (temporal_context.hpp) + +Per-query state for temporal queries: + +```cpp +class TemporalContext { + TemporalSnapshot snapshot_; + std::unordered_map version_cache_; // node_id -> version + +public: + explicit TemporalContext(TemporalSnapshot snapshot); + + // Resolve visible version for a node (with caching) + VersionInfo* resolve_version(int64_t node_id, const NodeHandle& handle); + + void clear_cache(); +}; +``` + +**Key Features:** +- **Version resolution**: Finds the visible version at the snapshot +- **Caching**: Avoids repeated version chain traversals +- **Bitemporal visibility**: Checks both VALIDTIME and TXNTIME + +**Visibility Rule:** +```cpp +A version is visible if: + (version.valid_from <= snapshot.valid_time < version.valid_to) + AND + (version.tx_from <= snapshot.tx_time < version.tx_to) +``` + +--- + +### 3. NodeView (node_view.hpp) + +Lightweight view of a Node at a specific temporal snapshot: + +```cpp +class NodeView { + Node* node_; + VersionInfo* resolved_version_; // Resolved once at construction + NodeArena* arena_; + SchemaLayout* layout_; + +public: + NodeView(Node* node, VersionInfo* resolved_version, + NodeArena* arena, SchemaLayout* layout); + + // Same interface as Node + arrow::Result get_value_ptr(const Field& field) const; + arrow::Result get_value_ref(const Field& field) const; + arrow::Result get_value(const Field& field) const; + + bool is_visible() const; // false if node didn't exist at snapshot + const VersionInfo* get_resolved_version() const; + Node* get_node() const; +}; +``` + +**Key Features:** +- **Lightweight**: Only 24 bytes (3 pointers) +- **No per-field lookups**: Version resolved once at construction +- **Same interface as Node**: Drop-in replacement for field access +- **Handles non-existence**: `is_visible() == false` if node didn't exist at snapshot + +--- + +### 4. Node::view() Method (node.hpp) + +Factory method on Node to create NodeView: + +```cpp +class Node { +public: + NodeView view(TemporalContext* ctx = nullptr) { + if (!ctx || !handle_) { + // No temporal context -> use current version + return NodeView(this, handle_->version_info_, arena_.get(), layout_.get()); + } + + // Resolve version using TemporalContext (cached) + VersionInfo* resolved = ctx->resolve_version(id, *handle_); + return NodeView(this, resolved, arena_.get(), layout_.get()); + } +}; +``` + +--- + +### 5. VersionInfo Extension (node_arena.hpp) + +Extended with TXNTIME support: + +```cpp +struct VersionInfo { + uint64_t version_id; + + // VALIDTIME (domain) + uint64_t valid_from; + uint64_t valid_to; // UINT64_MAX = INF + + // TXNTIME (system knowledge) - NEW + uint64_t tx_from; + uint64_t tx_to; // UINT64_MAX = INF + + VersionInfo* prev; + llvm::SmallDenseMap updated_fields; + + // Bitemporal visibility check + bool is_visible_at(uint64_t valid_time, uint64_t tx_time) const; + + // Find version visible at snapshot + const VersionInfo* find_version_at_snapshot(uint64_t vt, uint64_t tt) const; +}; +``` + +**Constructor:** +```cpp +VersionInfo(uint64_t vid, uint64_t ts_from, VersionInfo* prev_ver = nullptr) + : version_id(vid), + valid_from(ts_from), + tx_from(ts_from), // Initially tx_from = valid_from + prev(prev_ver) {} +``` + +--- + +### 6. NodeArena Helper Methods (node_arena.hpp) + +Added methods for NodeView to resolve fields: + +```cpp +class NodeArena { +public: + // Get field value pointer starting from a specific version + const char* get_field_value_ptr_from_version( + const VersionInfo* version, + const SchemaLayout& layout, + const Field& field) const; + + // Get field value starting from a specific version + arrow::Result get_field_value_from_version( + const VersionInfo* version, + const SchemaLayout& layout, + const Field& field) const; +}; +``` + +--- + +## Usage Examples + +### Example 1: Current Version (No Time-Travel) + +```cpp +Node node = ...; + +// Create view without temporal context (current version) +NodeView view = node.view(nullptr); + +// Read fields +auto age = view.get_value(age_field); +auto dept = view.get_value(department_field); +``` + +--- + +### Example 2: Time-Travel Query (VALIDTIME) + +```cpp +// Scenario: Node has 3 versions at times t0, t1, t2 +// v0 (t0): age=25, dept="Engineering" +// v1 (t1): age=26, dept="Engineering" +// v2 (t2): age=26, dept="Sales" + +// Query at t1 (between v1 and v2) +TemporalContext ctx(TemporalSnapshot::as_of_valid(t1)); +NodeView view = node.view(&ctx); + +auto age = view.get_value(age_field); // Returns 26 (from v1) +auto dept = view.get_value(dept_field); // Returns "Engineering" (from v1) +``` + +--- + +### Example 3: Bitemporal Query (VALIDTIME + TXNTIME) + +```cpp +// Query what the DB knew at tx_time=100 about valid_time=90 +TemporalContext ctx(TemporalSnapshot(90, 100)); +NodeView view = node.view(&ctx); + +// Returns values from the version visible at (vt=90, tt=100) +auto age = view.get_value(age_field); +``` + +--- + +### Example 4: Multiple Field Reads (No Repeated Lookups) + +```cpp +TemporalContext ctx(TemporalSnapshot::as_of_valid(timestamp)); +NodeView view = node.view(&ctx); // Version resolved once here + +// All subsequent field reads use the cached resolved version +auto name = view.get_value(name_field); +auto age = view.get_value(age_field); +auto dept = view.get_value(department_field); +// No map lookups, no repeated version chain traversals! +``` + +--- + +### Example 5: Node Doesn't Exist at Snapshot + +```cpp +uint64_t before_creation = node_creation_time - 100000; + +TemporalContext ctx(TemporalSnapshot::as_of_valid(before_creation)); +NodeView view = node.view(&ctx); + +if (!view.is_visible()) { + // Node didn't exist at this time + std::cout << "Node not visible\n"; +} + +// Accessing fields returns KeyError +auto age_result = view.get_value(age_field); +assert(!age_result.ok() && age_result.status().IsKeyError()); +``` + +--- + +## Performance Characteristics + +### Memory + +| Component | Size | Notes | +|-----------|------|-------| +| `TemporalSnapshot` | 16 bytes | 2 × uint64_t | +| `TemporalContext` | 40+ bytes | Snapshot + version cache map | +| `NodeView` | 24 bytes | 3 pointers | +| `VersionInfo` extension | +16 bytes | Added tx_from/tx_to | + +### Time Complexity + +| Operation | Complexity | Notes | +|-----------|------------|-------| +| `TemporalContext::resolve_version` | O(N) first time, O(1) cached | N = version chain length | +| `NodeView` construction | O(1) | Just pointer assignment | +| `NodeView::get_value` | O(M) | M = field chain depth (usually 1-2) | + +**Key Optimization**: Version resolution happens **once per node per query**, not per field access. + +--- + +## Benefits + +1. **No Per-Field Lookups**: Version resolved once, cached in `TemporalContext` +2. **Unified Interface**: `NodeView` has same API as `Node` (drop-in replacement) +3. **Bitemporal Support**: Ready for VALIDTIME + TXNTIME queries +4. **Efficient**: Minimal overhead (24 bytes per view) +5. **Handles Non-Existence**: Gracefully handles nodes that didn't exist at snapshot +6. **Thread-Safe Reads**: Multiple queries can use separate `TemporalContext` instances + +--- + +## Tests + +See `tests/node_view_test.cpp` for comprehensive tests: + +- ✅ `CurrentVersionView`: No temporal context (current version) +- ✅ `TimeTravelValidTime`: Query at different points in VALIDTIME +- ✅ `TemporalContextCaching`: Version resolution caching +- ✅ `NodeNotVisibleAtSnapshot`: Node didn't exist yet +- ✅ `MultipleFieldReadsFromSameView`: No repeated lookups + +Run tests: +```bash +cd build && ctest -R NodeViewTest -V +``` + +--- + +## Next Steps + +### Phase 2: Query Syntax (Pending) + +Add temporal query syntax to TundraQL grammar: + +```sql +-- Time-travel query +AS OF VALIDTIME 1704067200000 +AS OF TXNTIME 1706745600000 +MATCH (u:User) WHERE u.name = "Alice" +SELECT u.department; + +-- Retroactive correction +MATCH (u:User) WHERE u.name = "Alice" +FOR VALIDTIME [1704067200000, INF) +SET u.department = "Sales"; +``` + +### Phase 3: Interval Surgery (Pending) + +Implement retroactive corrections with interval splitting/coalescing. + +### Phase 4: DIFF Operator (Pending) + +Implement audit trail queries: + +```sql +DIFF FROM (VALIDTIME 1704067200000, TXNTIME 1706745600000) + TO (VALIDTIME 1706745600000, TXNTIME now) +MATCH (u:User) WHERE u.name = "Alice"; +``` + +--- + +## Summary + +The NodeView design provides: +- ✅ Efficient time-travel queries (version resolved once per node) +- ✅ Bitemporal support (VALIDTIME + TXNTIME) +- ✅ Clean API (same as Node, drop-in replacement) +- ✅ Comprehensive tests +- ✅ Ready for query syntax integration + +**Current Status**: Phase 1 Complete (Infrastructure + Tests) ✅ + diff --git a/docs/README_temporal_versioning.md b/docs/README_temporal_versioning.md new file mode 100644 index 0000000..606b4c5 --- /dev/null +++ b/docs/README_temporal_versioning.md @@ -0,0 +1,498 @@ +# TundraDB Temporal Versioning Spec (VALIDTIME + TXNTIME) + +This README is a developer-focused spec for implementing temporal queries and temporal updates (including retroactive corrections) using a `VersionInfo` chain per logical record (node or edge). + +The goal is to make the model *click*: +- **VALIDTIME** = when a fact is true in the domain/world +- **TXNTIME** = when the database recorded/“knew” that fact (system time) + +> **TL;DR:** You do not rewrite history by editing old versions. You create new versions with new (valid/txn) intervals. The only safe in-place mutation is closing `tx_to` on previously-current versions. + +--- + +## 1) Data Model + +### 1.1 Version record + +```cpp +struct VersionInfo { + uint64_t version_id; + + // Domain validity (world time): [valid_from, valid_to) + uint64_t valid_from; + uint64_t valid_to; // default = INF + + // System validity (knowledge/commit time): [tx_from, tx_to) + uint64_t tx_from; // commit timestamp or monotonic commit id + uint64_t tx_to; // default = INF + + // Sparse deltas: field_idx -> value pointer (nullptr means explicit NULL) + map updated_fields; + + // Newest -> older + VersionInfo* prev; +}; +``` + +### 1.2 Logical record vs versions + +A **logical record** (a node id or edge id) is stable. It has a chain of versions: + +``` +logical_id: 42 + head -> v9 -> v8 -> v7 -> ... +``` + +Each `VersionInfo` represents a set of *field deltas* and an interval in **valid-time** and **txn-time**. + +### 1.3 Optional: edge identity choices + +You must choose how to identify edges: + +**A) Stable `edge_id` (recommended)** +- An edge has a stable id, and versions can change edge properties (including dst) over time. +- DIFF can show `UPDATED`. + +**B) Tuple identity** +- Edge identity is `(src, label, dst, key?)`. +- Changing `dst` is modeled as `REMOVED+ADDED`. + +Both are acceptable; A is better for bitemporal editing and auditing. + +--- + +## 2) Time Semantics + +### 2.1 VALIDTIME (domain time) + +- Answers: **“When was it true in the world?”** +- Used for: temporal graphs, historical queries (friendship at date), truth as-of time. + +### 2.2 TXNTIME (system/knowledge time) + +- Answers: **“When did the DB record/believe it?”** +- Used for: audit/replay (“what did we know then?”), backtesting, preventing future leakage, late-arriving data, corrections. + +### 2.3 Visibility rule (bitemporal) + +Given query snapshot `(vtime, txtime)`, a version `v` is visible if: + +``` +v.valid_from <= vtime < v.valid_to +AND +v.tx_from <= txtime < v.tx_to +``` + +If you only implement VALIDTIME initially, treat `tx_from=0, tx_to=INF` for all versions (or ignore tx constraints). + +--- + +## 3) Immutability / Mutability Rules (Critical) + +**After commit:** + +- `valid_from`, `valid_to` are **immutable** (do not rewrite domain history). +- `tx_from` is **immutable** (it is commit time). +- `tx_to` is **mutable only to close old versions** when a newer belief is written. + +> The only allowed in-place mutation is: +> +> **`old_version.tx_to = tx_now`** +> +> This preserves reproducible snapshots: old versions remain visible for earlier `txtime`. + +--- + +## 4) Field Materialization with `updated_fields` + +`updated_fields` is a sparse delta map. Resolving a field `f` at a visible version: + +``` +value resolve(logical_id, field_idx, snapshot): + v = visible_version(logical_id, snapshot) + while v != null: + if v.updated_fields contains field_idx: + return v.updated_fields[field_idx] // may be NULL explicitly + v = v.prev + return DEFAULT/ABSENT (schema default) +``` + +### 4.1 Performance note + +Avoid full materialization: +- resolve only fields referenced by the query +- cache `(logical_id, snapshot, field_idx)` during query execution + +--- + +## 5) Snapshot Selection in Queries + +### 5.1 Read queries + +A query can specify one or two time axes: + +```sql +AS OF VALIDTIME @v +AS OF TXNTIME @t +MATCH ... +SELECT ... +``` + +If `AS OF TXNTIME` is omitted, treat it as `now` (or latest commit) for “current belief”. + +### 5.2 Pattern matching + +`AS OF` applies to: +- nodes (their visible versions) +- edges (their visible versions) +- edge existence (edges can appear/disappear by valid/txn windows) + +--- + +## 6) Updates: Concepts + +There are two distinct intents: + +### 6.1 “Current update” (world changes now) + +Example: “Alice moved departments today.” +- `valid_from = now` +- this is not a retroactive correction; it is a new fact from now onward. + +### 6.2 “Correction” (retroactive fix) + +Example: “We learned Alice actually moved on Jan 14.” +- `valid_from = Jan14` (back in the past) +- `tx_from = commit_time` (when we learned and recorded it) + +This is why VALIDTIME != TXNTIME. + +--- + +## 7) Recommended Update Syntax (Minimal Extensions) + +Your current syntax: + +```sql +MATCH (u:User) WHERE u.name = "Alice" +SET u.age = 26, u.department = "Sales"; +``` + +### 7.1 Add a valid-time target interval + +To make corrections explicit: + +```sql +MATCH (u:User) WHERE u.name = "Alice" +FOR VALIDTIME [@vf, @vt) +SET u.department = "Sales"; +``` + +- `FOR VALIDTIME [...]` defines the **domain interval** to patch. +- `tx_from` is automatically set to `tx_now` at commit. + +### 7.2 Optional: `AS OF` for target selection + +This controls how MATCH is evaluated: + +```sql +MATCH (u:User)-[:MEMBER_OF]->(t:Team) +AS OF VALIDTIME @v_sel +AS OF TXNTIME @t_sel +WHERE t.name = "Risk" +FOR VALIDTIME [@vf, INF) +SET u.department = "Sales"; +``` + +This helps avoid “future knowledge” when running repair jobs. + +--- + +## 8) Core Algorithms + +### 8.1 Helper: interval overlap + +Intervals are half-open: `[from, to)`. + +``` +overlaps([a_from, a_to), [b_from, b_to)) = (a_from < b_to) && (b_from < a_to) +``` + +### 8.2 Finding currently-believed versions at commit time + +When writing at `tx_now`, find versions visible in TX at `tx_now`: + +``` +tx_from <= tx_now < tx_to +``` + +(Optionally also filtered by logical id and other constraints.) + +### 8.3 Interval surgery for corrections + +Operation: +- Patch fields for valid interval `[corr_from, corr_to)` (corr_to may be INF) +- Assign `tx_from = tx_now` + +**High-level:** +1. Find versions currently-believed at `tx_now` +2. For each version overlapping `[corr_from, corr_to)`: + - close old belief: `old.tx_to = tx_now` + - insert up to 3 new versions at `tx_now`: + - left remainder + - corrected middle + - right remainder +3. Coalesce adjacent segments with identical values + +--- + +## 9) Worked Examples (Version Chains) + +All times below are simplified integers. + +### 9.1 Example A: current update (not correction) + +Alice department becomes Sales at time 100, recorded at tx=100. + +Existing belief: +- dept=Eng for `[1, INF)` at tx `[1, INF)` + +Write: `SET dept=Sales` for `[100, INF)` at `tx_now=100`. + +Result versions: + +``` +v1: dept=Eng + valid [1, INF) + tx [1, 100) // closed at tx=100 + +v2: dept=Eng + valid [1, 100) + tx [100, INF) // reassert unchanged left remainder (optional; see notes) + +v3: dept=Sales + valid [100, INF) + tx [100, INF) +``` + +**Optimization:** You can avoid writing `v2` by keeping an unchanged segment represented implicitly, +but many implementations keep explicit segments for simplicity and coalescing. + +### 9.2 Example B: retroactive correction + +Reality: Alice moved to Sales at valid time 80, but the DB learned this only at tx=120. + +Initial belief (at tx=1): +- dept=Eng valid `[1, INF)` tx `[1, INF)` + +Correction write at `tx_now=120`: +- patch dept=Sales for valid `[80, INF)` + +Result: + +``` +v1: dept=Eng + valid [1, INF) + tx [1, 120) // old belief still visible for txtime<120 + +v2: dept=Eng + valid [1, 80) + tx [120, INF) + +v3: dept=Sales + valid [80, INF) + tx [120, INF) +``` + +Now: +- Query `(vtime=90, txtime=100)` -> sees **Eng** (what we believed then) +- Query `(vtime=90, txtime=130)` -> sees **Sales** (corrected belief) + +--- + +## 10) Corner Cases (Must Handle) + +### 10.1 No-op update + +If the patch does not change any field values over `[vf, vt)`: +- Do **not** create new versions +- Return “0 changed” (and DIFF empty) + +### 10.2 Boundary already exists + +If the chain already has a segment boundary at `vf`: +- only patch the overlapping segments +- avoid extra splitting + +### 10.3 Patch overlaps multiple segments + +Example segments: +- `[1,50) Eng`, `[50,120) Ops`, `[120, INF) Eng` +Patch `[100, INF) -> Sales` + +You must: +- split `[50,120)` into `[50,100) Ops` + `[100,120) Sales` +- patch `[120, INF) Eng -> Sales` +- coalesce adjacent Sales segments if produced + +### 10.4 Gaps in valid-time + +If you allow gaps (no version covers some `[t1,t2)`), define behavior: +- treat as “unknown / absent” (no visible row) +- `FOR VALIDTIME` may *create* versions in gaps (new facts) + +### 10.5 Explicit NULL vs inherited + +In `updated_fields`, a `NULL` value means “explicitly NULL”. +Absence of field in map means “inherit from prev”. + +DIFF must distinguish: +- field becomes NULL +- field not present / unchanged + +### 10.6 Deleting a property vs deleting an edge + +Two approaches: +- **tombstone**: an edge version with a deleted flag +- **interval end**: set valid_to to deletion time (via new versions; do not mutate old valid_to) + +Recommended: represent deletion as versioning: +- close old belief in TX (`tx_to = tx_now`) +- write new version(s) that remove existence for a valid interval + - for edges, this may mean the edge simply has no visible version for that interval + - or a tombstone version that suppresses older versions in that interval + +### 10.7 Multiple corrections over time + +If you correct the same logical record multiple times: +- you create multiple tx-time “belief layers” +- queries at different `txtime` see different beliefs +- coalesce within the same tx_from layer if desired + +### 10.8 Concurrent writers + +If concurrent commits can write the same logical record: +- you must serialize updates per logical id, or +- ensure deterministic conflict resolution (e.g., commit id ordering) + +--- + +## 11) DIFF Operator (Audit-Ready) + +DIFF compares two snapshots: + +Snapshot A: `(vA, tA)` +Snapshot B: `(vB, tB)` + +### 11.1 Entity diff (nodes/edges) + +```sql +DIFF +FROM (VALIDTIME @vA, TXNTIME @tA) +TO (VALIDTIME @vB, TXNTIME @tB) +MATCH (u:User {id:"Alice"})-[e:WORKS_AT]->(org) +RETURN u, e, org; +``` + +Output shape (Arrow-friendly): +- `diff_type` in {`ADDED`, `REMOVED`, `UPDATED`} +- `entity_kind` in {`node`, `edge`} +- identifiers (`label`, `id` / `src,dst,type`) +- optional field-level details (`field`, `old_value`, `new_value`) + +### 11.2 Result diff (diff a query’s output) + +```sql +DIFF RESULT +FROM (...) TO (...) +MATCH ... +SELECT ... +KEY BY ; +``` + +This is ideal for “why did this decision change?” audits. + +--- + +## 12) Implementation Checklist + +### 12.1 Required primitives + +- `visible_version(logical_id, vtime, txtime)` +- `resolve_field(logical_id, field_idx, snapshot)` +- `find_versions_visible_in_tx(logical_id, tx_now)` (for writers) +- `insert_version(logical_id, VersionInfo*)` +- `close_version_tx(VersionInfo*, tx_now)` (mutate `tx_to`) +- `coalesce_adjacent_versions(logical_id, tx_from_layer)` (optional but recommended) + +### 12.2 Writer invariants to maintain + +- For a fixed `(logical_id, tx_layer)` the valid-time segments should not overlap. +- For a fixed `(logical_id, snapshot)` there should be at most one visible version per valid-time point. + +--- + +## 13) Recommended Defaults + +- Use half-open intervals `[from, to)`. +- Represent INF as `UINT64_MAX`. +- Use commit id or monotonic clock for `tx_from`. +- Default read behavior: + - If `AS OF TXNTIME` is not specified, treat as “latest” (now). + - If `AS OF VALIDTIME` is not specified, treat as “now” (or required). + +--- + +## 14) Appendix: ASCII Diagram Cheat Sheet + +### 14.1 Single correction (retroactive) + +``` +Time axis (VALIDTIME): 1 -------- 80 --------------- INF +Old belief (tx<120): Eng===============================> +New belief (tx>=120): Eng==========| Sales==============> + ^ split at 80 +TX axis: tx<120 sees old; tx>=120 sees new +``` + +--- + +## 15) Notes on Storage Minimization (Optional) + +You can reduce version explosion by: +- writing only the patched middle versions +- leaving unchanged parts represented by older versions plus careful tx windows +…but this complicates invariants. + +For a first implementation, prefer: +- explicit left/mid/right segments +- coalescing adjacent identical segments + +--- + +## 16) Examples you can paste into docs + +### Simple correction update + +```sql +MATCH (u:User) WHERE u.name = "Alice" +FOR VALIDTIME [80, INF) +SET u.department = "Sales"; +``` + +### Safe replay query + +```sql +AS OF VALIDTIME 90 +AS OF TXNTIME 100 +MATCH (u:User) WHERE u.name="Alice" +SELECT u.department; +``` + +--- + +If you want, I can also generate a short `spec_tests.md` with: +- input version chains +- update operations +- expected resulting chains +- expected query outputs at various snapshots diff --git a/include/node.hpp b/include/node.hpp index db660c7..ac67351 100644 --- a/include/node.hpp +++ b/include/node.hpp @@ -8,7 +8,9 @@ #include "logger.hpp" #include "node_arena.hpp" +#include "node_view.hpp" #include "schema.hpp" +#include "temporal_context.hpp" #include "types.hpp" namespace tundradb { @@ -84,6 +86,9 @@ class Node { [[nodiscard]] std::shared_ptr get_schema() const { return schema_; } + // Get node handle (for testing and internal use) + [[nodiscard]] NodeHandle *get_handle() const { return handle_.get(); } + [[deprecated]] arrow::Result update(const std::string &field, Value value, UpdateType update_type) { @@ -119,6 +124,29 @@ class Node { const Value &value) { return update(field, value, SET); } + + /** + * Create a temporal view of this node. + * + * @param ctx TemporalContext with snapshot (valid_time, tx_time). + * If nullptr, returns view of current version (no time-travel). + * @return NodeView that resolves version once and caches it. + * + * Usage: + * TemporalContext ctx(TemporalSnapshot::as_of_valid(timestamp)); + * auto view = node.view(&ctx); + * auto age = view.get_value_ptr(age_field); + */ + NodeView view(TemporalContext *ctx = nullptr) { + if (!ctx) { + // No temporal context or no handle -> use current version + return {this, handle_->version_info_, arena_.get(), layout_}; + } + + // Resolve version using TemporalContext + VersionInfo *resolved = ctx->resolve_version(id, *handle_); + return {this, resolved, arena_.get(), layout_}; + } }; class NodeManager { @@ -280,6 +308,59 @@ class NodeManager { } }; +// ============================================================================ +// NodeView inline implementations (after Node is fully defined) +// ============================================================================ + +inline arrow::Result NodeView::get_value_ptr( + const std::shared_ptr &field) const { + assert(arena_ != nullptr && "NodeView created with null arena"); + assert(node_ != nullptr && "NodeView created with null node"); + + if (resolved_version_ == nullptr) { + // Non-versioned node -> delegate to Node + return node_->get_value_ptr(field); + } + + const NodeHandle *handle = node_->get_handle(); + assert(handle != nullptr && "Versioned node must have a handle"); + + return arena_->get_field_value_ptr_from_version(*handle, resolved_version_, + layout_, field); +} + +inline arrow::Result NodeView::get_value( + const std::shared_ptr &field) const { + assert(arena_ != nullptr && "NodeView created with null arena"); + assert(node_ != nullptr && "NodeView created with null node"); + + if (resolved_version_ == nullptr) { + // Non-versioned node -> delegate to Node + return node_->get_value(field); + } + + const NodeHandle *handle = node_->get_handle(); + assert(handle != nullptr && "Versioned node must have a handle"); + + return arena_->get_field_value_from_version(*handle, resolved_version_, + layout_, field); +} + +inline bool NodeView::is_visible() const { + assert(arena_ != nullptr && "NodeView created with null arena"); + assert(node_ != nullptr && "NodeView created with null node"); + const NodeHandle *handle = node_->get_handle(); + assert(handle != nullptr && "Node must have a handle"); + + // Non-versioned nodes are always visible + if (!handle->is_versioned()) { + return true; + } + + // For versioned nodes, check if we found a visible version at the snapshot + return resolved_version_ != nullptr; +} + } // namespace tundradb #endif // NODE_HPP \ No newline at end of file diff --git a/include/node_arena.hpp b/include/node_arena.hpp index 7b54022..7c5c5c9 100644 --- a/include/node_arena.hpp +++ b/include/node_arena.hpp @@ -26,13 +26,24 @@ struct NodeHandle; * * Stores only changed fields; forms a linked list via prev pointer. * All versions share the same base node data. + * + * Bitemporal support: + * - valid_from/valid_to: VALIDTIME (when the fact was true in the domain) + * - tx_from/tx_to: TXNTIME (when the database knew about the fact) */ struct VersionInfo { - // Temporal validity interval: [valid_from, valid_to) + // Version identifier uint64_t version_id = 0; + + // VALIDTIME: domain validity interval [valid_from, valid_to) uint64_t valid_from = 0; uint64_t valid_to = std::numeric_limits::max(); + // TXNTIME: system knowledge interval [tx_from, tx_to) + // When the database recorded/believed this version + uint64_t tx_from = 0; + uint64_t tx_to = std::numeric_limits::max(); + // Linked list to previous version VersionInfo* prev = nullptr; @@ -45,14 +56,38 @@ struct VersionInfo { VersionInfo() = default; + // Constructor: initializes both valid and tx times to the same value VersionInfo(uint64_t vid, uint64_t ts_from, VersionInfo* prev_ver = nullptr) - : version_id(vid), valid_from(ts_from), prev(prev_ver) {} + : version_id(vid), + valid_from(ts_from), + tx_from(ts_from), // Initially tx_from = valid_from + prev(prev_ver) {} + // Check if valid at a specific VALIDTIME bool is_valid_at(uint64_t ts) const { return valid_from <= ts && ts < valid_to; } - // O(N) + // Check if visible at a bitemporal snapshot (valid_time, tx_time) + bool is_visible_at(uint64_t valid_time, uint64_t tx_time) const { + return (valid_from <= valid_time && valid_time < valid_to) && + (tx_from <= tx_time && tx_time < tx_to); + } + + // Find version visible at bitemporal snapshot + const VersionInfo* find_version_at_snapshot(uint64_t valid_time, + uint64_t tx_time) const { + const VersionInfo* current = this; + while (current != nullptr) { + if (current->is_visible_at(valid_time, tx_time)) { + return current; + } + current = current->prev; + } + return nullptr; + } + + // Legacy: find version at VALIDTIME only (ignores TXNTIME) const VersionInfo* find_version_at_time(uint64_t ts) const { const VersionInfo* current = this; while (current != nullptr) { @@ -655,6 +690,65 @@ class NodeArena { return version_counter_.load(std::memory_order_relaxed); } + /** + * Get the field value pointer starting from a specific version. + * Used by NodeView for temporal queries. + * + * @param handle NodeHandle (for accessing base node if needed) + * @param version Starting version (pre-resolved by TemporalContext) + * @param layout Schema layout + * @param field Field to read + * @return Pointer to field data or error if not found + */ + static const char* get_field_value_ptr_from_version( + const NodeHandle& handle, const VersionInfo* version, + const std::shared_ptr& layout, + const std::shared_ptr& field) { + const FieldLayout* field_layout = layout->get_field_layout(field); + if (!field_layout) { + return nullptr; + } + + // Try to find in version chain first + const char* field_ptr = + get_field_ptr_from_version_chain(version, field_layout->index); + + if (field_ptr != nullptr) { + return field_ptr; + } + + // Not in version chain, read from base node + return layout->get_field_value_ptr(static_cast(handle.ptr), + field_layout->index); + } + + /** + * Get field value starting from a specific version. + * Used by NodeView for temporal queries. + */ + static arrow::Result get_field_value_from_version( + const NodeHandle& handle, const VersionInfo* version, + const std::shared_ptr& layout, + const std::shared_ptr& field) { + const FieldLayout* field_layout = layout->get_field_layout(field); + if (!field_layout) { + return arrow::Status::KeyError("Field not found in layout"); + } + + // Try to find in version chain first + const char* field_ptr = + get_field_ptr_from_version_chain(version, field_layout->index); + + if (field_ptr != nullptr) { + // Found in version chain, read directly from the field pointer + return layout->get_field_value_from_ptr(field_ptr, *field_layout); + } + + // Not in version chain, read from base node + return layout->get_field_value(static_cast(handle.ptr), + *field_layout); + } + private: static uint64_t get_current_timestamp_ns() { auto now = std::chrono::system_clock::now(); @@ -699,8 +793,7 @@ class NodeArena { /** Traverse the version chain to find field pointer. */ static const char* get_field_ptr_from_version_chain( - const VersionInfo* version_info, uint16_t field_idx, - const SchemaLayout* layout) { + const VersionInfo* version_info, uint16_t field_idx) { const VersionInfo* current = version_info; while (current != nullptr) { // Check if this version has an override for this field @@ -711,7 +804,7 @@ class NodeArena { current = current->prev; } - // Not found in any version, would need to read from base node + // not found in any version, would need to read from the base node // (caller should handle this case) return nullptr; } diff --git a/include/node_view.hpp b/include/node_view.hpp new file mode 100644 index 0000000..23eee3b --- /dev/null +++ b/include/node_view.hpp @@ -0,0 +1,113 @@ +#ifndef NODE_VIEW_HPP +#define NODE_VIEW_HPP + +#include + +#include "node_arena.hpp" +#include "schema.hpp" +#include "temporal_context.hpp" +#include "types.hpp" + +namespace tundradb { + +// Forward declarations +class Node; + +/** + * NodeView: lightweight view of a Node at a specific temporal snapshot. + * + * Purpose: + * - Provides the same field access interface as Node + * - Resolves version once (at construction), then uses it for all field reads + * - Avoids per-field map lookups in TemporalContext + * + * Lifecycle: + * - Created by Node::view(TemporalContext*) + * - Should be short-lived (lifetime tied to query execution) + * + * Usage: + * TemporalContext ctx(TemporalSnapshot::as_of_valid(timestamp)); + * auto view = node->view(&ctx); + * auto age = view.get_value_ptr(age_field); // Uses resolved version + */ +class NodeView { + private: + Node* node_; // Back-reference to Node (for node_id, etc.) + VersionInfo* resolved_version_; // Resolved once at construction + NodeArena* arena_; // For field resolution + std::shared_ptr + layout_; // Schema layout (shared_ptr for proper lifetime) + + public: + /** + * Constructor: resolves version immediately. + * + * If ctx is nullptr, uses current version (no time-travel). + * If no version is visible at snapshot, resolved_version_ will be nullptr. + */ + NodeView(Node* node, VersionInfo* resolved_version, NodeArena* arena, + std::shared_ptr layout) + : node_(node), + resolved_version_(resolved_version), + arena_(arena), + layout_(std::move(layout)) {} + + /** + * Get field value pointer (same interface as Node). + * + * If resolved_version_ is nullptr: + * - Node is non-versioned -> read from base node via Node::get_value_ptr + * + * Otherwise: + * - Uses arena to resolve field from version chain + * - Starts from resolved_version_ (already filtered by time) + */ + arrow::Result get_value_ptr( + const std::shared_ptr& field) const; + + /** + * Get field value as ValueRef (lightweight reference to data). + */ + arrow::Result get_value_ref( + const std::shared_ptr& field) const { + auto ptr_result = get_value_ptr(field); + if (!ptr_result.ok()) { + return ptr_result.status(); + } + + return ValueRef{ptr_result.ValueOrDie(), field->type()}; + } + + /** + * Get field value (copies data into Value). + */ + arrow::Result get_value(const std::shared_ptr& field) const; + + /** + * Check if this view represents a visible node. + * + * Returns true if: + * - Node is non-versioned (resolved_version_ == nullptr && arena exists) + * - Node is versioned and has a visible version at the snapshot + * + * Returns false if: + * - Node is versioned but didn't exist at the temporal snapshot + */ + [[nodiscard]] bool is_visible() const; + + /** + * Get the resolved version info. + */ + [[nodiscard]] const VersionInfo* get_resolved_version() const { + return resolved_version_; + } + + /** + * Get the underlying node. + */ + [[nodiscard]] Node* get_node() const { return node_; } +}; + +} // namespace tundradb + +#endif // NODE_VIEW_HPP diff --git a/include/schema_layout.hpp b/include/schema_layout.hpp index 11ec15b..e40876d 100644 --- a/include/schema_layout.hpp +++ b/include/schema_layout.hpp @@ -84,14 +84,14 @@ class SchemaLayout { /** * Get the size of the bit set in bytes */ - size_t get_bitset_size() const { + [[nodiscard]] size_t get_bitset_size() const { return get_bitset_size_bytes(fields_.size()); } /** * Get the offset where actual field data starts (after bit set + alignment) */ - size_t get_data_offset() const { return data_offset_; } + [[nodiscard]] size_t get_data_offset() const { return data_offset_; } /** * Finalize the layout - adds padding to ensure proper alignment @@ -107,7 +107,7 @@ class SchemaLayout { /** * Get the total size including bit set and data */ - size_t get_total_size_with_bitset() const { + [[nodiscard]] size_t get_total_size_with_bitset() const { return data_offset_ + total_size_; } @@ -148,6 +148,22 @@ class SchemaLayout { return get_field_value(node_data, field->index_); } + /** + * Get field value directly from field pointer (no address math). + * Used by versioning when we already have the exact field data pointer. + * + * @param field_ptr Direct pointer to field data (from updated_fields) + * @param field_layout Field layout for type information + * @return Value read from field_ptr + */ + Value get_field_value_from_ptr(const char* field_ptr, + const FieldLayout& field_layout) const { + if (field_ptr == nullptr) { + return Value{}; // Explicit NULL + } + return Value::read_value_from_memory(field_ptr, field_layout.type); + } + /** * Set field value in node data */ @@ -331,7 +347,7 @@ class LayoutRegistry { return layouts_.erase(schema_name) > 0; } - std::vector get_schema_names() const { + [[nodiscard]] std::vector get_schema_names() const { std::vector names; names.reserve(layouts_.size()); for (auto const& entry : layouts_) { diff --git a/include/temporal_context.hpp b/include/temporal_context.hpp new file mode 100644 index 0000000..531bc4e --- /dev/null +++ b/include/temporal_context.hpp @@ -0,0 +1,139 @@ +#ifndef TEMPORAL_CONTEXT_HPP +#define TEMPORAL_CONTEXT_HPP + +#include +#include + +#include "node_arena.hpp" + +namespace tundradb { + +// Forward declarations +class Node; +class NodeArena; +struct NodeHandle; +struct VersionInfo; + +/** + * Temporal snapshot: specifies a point in VALIDTIME and TXNTIME. + */ +struct TemporalSnapshot { + uint64_t valid_time; // VALIDTIME: when the fact was true in the domain + uint64_t tx_time; // TXNTIME: when the database knew about the fact + + // Default: current time for both axes + TemporalSnapshot() : valid_time(UINT64_MAX), tx_time(UINT64_MAX) {} + + TemporalSnapshot(uint64_t vt, uint64_t tt) : valid_time(vt), tx_time(tt) {} + + // Helper: create snapshot with only VALIDTIME (TXNTIME = current) + static TemporalSnapshot as_of_valid(uint64_t vt) { return {vt, UINT64_MAX}; } + + // Helper: create snapshot with only TXNTIME (VALIDTIME = current) + static TemporalSnapshot as_of_tx(uint64_t tt) { return {UINT64_MAX, tt}; } + + // Check if this is a "current" snapshot (no time-travel) + [[nodiscard]] bool is_current() const { + return valid_time == UINT64_MAX && tx_time == UINT64_MAX; + } +}; + +/** + * TemporalContext: per-query state for time-travel queries. + * + * Responsibilities: + * - Store the temporal snapshot (valid_time, tx_time) + * - Cache resolved versions per node to avoid repeated traversals + * - Provide version resolution for nodes + * + * Usage: + * TemporalContext ctx(snapshot); + * auto view = node->view(&ctx); + * view.get_value_ptr(field); // Uses resolved version + */ +class TemporalContext { + private: + TemporalSnapshot snapshot_; + + // Cache: node_id -> resolved VersionInfo* + // This avoids re-traversing version chains for the same node + std::unordered_map version_cache_; + + public: + explicit TemporalContext(TemporalSnapshot snapshot) : snapshot_(snapshot) {} + + // Get the snapshot + [[nodiscard]] const TemporalSnapshot& snapshot() const { return snapshot_; } + + /** + * Resolve the visible version for a node at this snapshot. + * Returns nullptr if no version is visible (e.g., node didn't exist yet). + * + * Note: naive O(n) implementation, we should use binary search + */ + VersionInfo* resolve_version(int64_t node_id, const NodeHandle& handle) { + if (const auto it = version_cache_.find(node_id); + it != version_cache_.end()) { + return it->second; + } + + // Find a visible version by traversing the chain + VersionInfo* resolved = find_visible_version(handle, snapshot_); + + // Cache result (even if nullptr) + version_cache_[node_id] = resolved; + + return resolved; + } + + // Clear cache (useful between query stages or for testing) + void clear_cache() { version_cache_.clear(); } + + private: + /** + * Find the visible version in the chain at the given snapshot. + * + * A version is visible if: + * valid_from <= snapshot.valid_time < valid_to + * AND + * tx_from <= snapshot.tx_time < tx_to + * + * returns nullptr if no version is visible + */ + static VersionInfo* find_visible_version(const NodeHandle& handle, + const TemporalSnapshot& snapshot) { + VersionInfo* version = handle.version_info_; + + // If snapshot is "current", just return head version + if (snapshot.is_current()) { + return version; + } + + // Traverse chain looking for a visible version + while (version != nullptr) { + // Handle UINT64_MAX as "now" (current time) + const uint64_t vt = snapshot.valid_time; + const uint64_t tt = snapshot.tx_time; + + // If time is UINT64_MAX, treat as current (always visible) + const bool valid_match = + (vt == UINT64_MAX) || + (version->valid_from <= vt && vt < version->valid_to); + + const bool tx_match = + (tt == UINT64_MAX) || (version->tx_from <= tt && tt < version->tx_to); + + if (valid_match && tx_match) { + return version; // Found visible version + } + + version = version->prev; + } + + return nullptr; // No visible version at this snapshot + } +}; + +} // namespace tundradb + +#endif // TEMPORAL_CONTEXT_HPP diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b9cc6d2..efc1350 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -138,6 +138,10 @@ add_executable(string_ref_concurrent_test add_executable(node_version_test node_version_test.cpp) +# Node view temporal query test +add_executable(node_view_test + node_view_test.cpp) + # Link against Arrow and GTest target_link_libraries(sharding_test PRIVATE @@ -339,6 +343,15 @@ target_link_libraries(node_version_test LLVMSupport LLVMCore ) +target_link_libraries(node_view_test + PRIVATE + core + GTest::GTest + GTest::Main + spdlog::spdlog + LLVMSupport LLVMCore +) + # Apply sanitizer flags to all test targets if enabled if(ENABLE_SANITIZERS) target_compile_options(sharding_test PRIVATE ${SANITIZER_COMPILE_FLAGS}) @@ -373,6 +386,9 @@ if(ENABLE_SANITIZERS) target_compile_options(node_version_test PRIVATE ${SANITIZER_COMPILE_FLAGS}) target_link_options(node_version_test PRIVATE ${SANITIZER_LINK_FLAGS}) + + target_compile_options(node_view_test PRIVATE ${SANITIZER_COMPILE_FLAGS}) + target_link_options(node_view_test PRIVATE ${SANITIZER_LINK_FLAGS}) endif() # Simple test registration @@ -392,6 +408,7 @@ add_test(NAME NodeArenaTest COMMAND node_arena_test) add_test(NAME NodeTest COMMAND node_test) add_test(NAME StringRefConcurrentTest COMMAND string_ref_concurrent_test) add_test(NAME NodeVersionTest COMMAND node_version_test) +add_test(NAME NodeViewTest COMMAND node_view_test) # Set TSan options for tests after they've been registered if(ENABLE_SANITIZERS AND SANITIZER_TYPE STREQUAL "thread" AND EXISTS ${TSAN_SUPPRESSIONS_FILE}) diff --git a/tests/node_view_test.cpp b/tests/node_view_test.cpp new file mode 100644 index 0000000..d153af9 --- /dev/null +++ b/tests/node_view_test.cpp @@ -0,0 +1,292 @@ +#include "../include/node_view.hpp" + +#include + +#include +#include + +#include "../include/node.hpp" +#include "../include/node_arena.hpp" +#include "../include/schema.hpp" +#include "../include/schema_layout.hpp" +#include "../include/temporal_context.hpp" + +using namespace tundradb; + +class NodeViewTest : public ::testing::Test { + protected: + void SetUp() override { + layout_registry_ = std::make_shared(); + + // Create fields + llvm::SmallVector, 4> fields; + fields.push_back(std::make_shared("id", ValueType::INT64)); + fields.push_back(std::make_shared("name", ValueType::STRING)); + fields.push_back(std::make_shared("age", ValueType::INT32)); + fields.push_back(std::make_shared("department", ValueType::STRING)); + + // Create schema + schema_ = + std::make_shared(std::string("User"), 1u, std::move(fields)); + + // Get field pointers for tests + id_field_ = schema_->get_field("id"); + name_field_ = schema_->get_field("name"); + age_field_ = schema_->get_field("age"); + department_field_ = schema_->get_field("department"); + + // Create layout from schema + auto layout = std::make_unique(schema_); + layout_registry_->register_layout(std::move(layout)); + + // Get layout pointer + layout_ = layout_registry_->get_layout("User"); + + // Create NodeArena WITH versioning enabled + node_arena_versioned_ = node_arena_factory::create_free_list_arena( + layout_registry_, 2 * 1024 * 1024, 64, true); + + // Create NodeArena WITHOUT versioning for comparison + node_arena_non_versioned_ = node_arena_factory::create_free_list_arena( + layout_registry_, 2 * 1024 * 1024, 64, false); + } + + std::shared_ptr layout_registry_; + std::shared_ptr schema_; + std::shared_ptr layout_; + std::shared_ptr node_arena_versioned_; + std::shared_ptr node_arena_non_versioned_; + + // Field pointers for convenience + std::shared_ptr id_field_; + std::shared_ptr name_field_; + std::shared_ptr age_field_; + std::shared_ptr department_field_; +}; + +/** + * Test: Basic NodeView without temporal context (current version) + */ +TEST_F(NodeViewTest, CurrentVersionView) { + // Allocate versioned node + NodeHandle handle = node_arena_versioned_->allocate_node("User"); + + // Create Node wrapper + Node node(0, "User", {}, std::make_unique(std::move(handle)), + node_arena_versioned_, schema_, layout_); + + // Set initial values + node_arena_versioned_->set_field_value_v0(*node.get_handle(), layout_, + id_field_, Value(int64_t(100))); + node_arena_versioned_->set_field_value_v0( + *node.get_handle(), layout_, name_field_, Value(std::string("Alice"))); + node_arena_versioned_->set_field_value_v0(*node.get_handle(), layout_, + age_field_, Value(int32_t(25))); + node_arena_versioned_->set_field_value_v0(*node.get_handle(), layout_, + department_field_, + Value(std::string("Engineering"))); + + // Create view without temporal context (current version) + NodeView view = node.view(nullptr); + + ASSERT_TRUE(view.is_visible()); + + auto age_result = view.get_value(age_field_); + ASSERT_TRUE(age_result.ok()); + EXPECT_EQ(age_result.ValueOrDie().as_int32(), 25); + + auto dept_result = view.get_value(department_field_); + ASSERT_TRUE(dept_result.ok()); + EXPECT_EQ(dept_result.ValueOrDie().as_string(), "Engineering"); +} + +/** + * Test: Time-travel query using VALIDTIME + */ +TEST_F(NodeViewTest, TimeTravelValidTime) { + // Allocate versioned node + NodeHandle handle = node_arena_versioned_->allocate_node("User"); + + // Create Node wrapper + Node node(0, "User", {}, std::make_unique(std::move(handle)), + node_arena_versioned_, schema_, layout_); + + // v0: Alice, age=25, dept=Engineering + node_arena_versioned_->set_field_value_v0(*node.get_handle(), layout_, + id_field_, Value(int64_t(100))); + node_arena_versioned_->set_field_value_v0( + *node.get_handle(), layout_, name_field_, Value(std::string("Alice"))); + node_arena_versioned_->set_field_value_v0(*node.get_handle(), layout_, + age_field_, Value(int32_t(25))); + node_arena_versioned_->set_field_value_v0(*node.get_handle(), layout_, + department_field_, + Value(std::string("Engineering"))); + + uint64_t t0 = node.get_handle()->version_info_->valid_from; + + // v1: Update age to 26 at time t1 + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + + std::vector, Value>> updates1 = { + {age_field_, Value(int32_t(26))}}; + auto update1_result = node_arena_versioned_->update_fields(*node.get_handle(), + layout_, updates1); + ASSERT_TRUE(update1_result.ok()); + + uint64_t t1 = node.get_handle()->version_info_->valid_from; + + // v2: Update department to Sales at time t2 + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + + std::vector, Value>> updates2 = { + {department_field_, Value(std::string("Sales"))}}; + auto update2_result = node_arena_versioned_->update_fields(*node.get_handle(), + layout_, updates2); + ASSERT_TRUE(update2_result.ok()); + + uint64_t t2 = node.get_handle()->version_info_->valid_from; + + // Query at t0 (before any updates) + TemporalContext ctx_t0(TemporalSnapshot::as_of_valid(t0)); + NodeView view_t0 = node.view(&ctx_t0); + + ASSERT_TRUE(view_t0.is_visible()); + auto age_t0 = view_t0.get_value(age_field_); + ASSERT_TRUE(age_t0.ok()); + EXPECT_EQ(age_t0.ValueOrDie().as_int32(), 25); + + auto dept_t0 = view_t0.get_value(department_field_); + ASSERT_TRUE(dept_t0.ok()); + EXPECT_EQ(dept_t0.ValueOrDie().as_string(), "Engineering"); + + // Query at t1 (after age update, before dept update) + TemporalContext ctx_t1(TemporalSnapshot::as_of_valid(t1)); + NodeView view_t1 = node.view(&ctx_t1); + + ASSERT_TRUE(view_t1.is_visible()); + auto age_t1 = view_t1.get_value(age_field_); + ASSERT_TRUE(age_t1.ok()); + EXPECT_EQ(age_t1.ValueOrDie().as_int32(), 26); // Updated + + auto dept_t1 = view_t1.get_value(department_field_); + ASSERT_TRUE(dept_t1.ok()); + EXPECT_EQ(dept_t1.ValueOrDie().as_string(), + "Engineering"); // Still old value + + // Query at t2 (after both updates) + TemporalContext ctx_t2(TemporalSnapshot::as_of_valid(t2)); + NodeView view_t2 = node.view(&ctx_t2); + + ASSERT_TRUE(view_t2.is_visible()); + auto age_t2 = view_t2.get_value(age_field_); + ASSERT_TRUE(age_t2.ok()); + EXPECT_EQ(age_t2.ValueOrDie().as_int32(), 26); + + auto dept_t2 = view_t2.get_value(department_field_); + ASSERT_TRUE(dept_t2.ok()); + EXPECT_EQ(dept_t2.ValueOrDie().as_string(), "Sales"); // Updated +} + +/** + * Test: Temporal context caches resolved versions + */ +TEST_F(NodeViewTest, TemporalContextCaching) { + // Allocate versioned node + NodeHandle handle = node_arena_versioned_->allocate_node("User"); + + // Create Node wrapper + Node node(0, "User", {}, std::make_unique(std::move(handle)), + node_arena_versioned_, schema_, layout_); + + node_arena_versioned_->set_field_value_v0(*node.get_handle(), layout_, + age_field_, Value(int32_t(25))); + + uint64_t t0 = node.get_handle()->version_info_->valid_from; + + // Create temporal context + TemporalContext ctx(TemporalSnapshot::as_of_valid(t0)); + + // First view: should resolve and cache version + NodeView view1 = node.view(&ctx); + ASSERT_TRUE(view1.is_visible()); + + // Second view: should use cached version + NodeView view2 = node.view(&ctx); + ASSERT_TRUE(view2.is_visible()); + + // Both views should resolve to the same version + EXPECT_EQ(view1.get_resolved_version(), view2.get_resolved_version()); +} + +/** + * Test: NodeView works with non-versioned nodes + */ +TEST_F(NodeViewTest, NonVersionedNodeView) { + // Allocate NON-versioned node + NodeHandle handle = node_arena_non_versioned_->allocate_node("User"); + + // Create Node wrapper + Node node(0, "User", {}, std::make_unique(std::move(handle)), + node_arena_non_versioned_, schema_, layout_); + + // Set values (non-versioned, direct writes) + node_arena_non_versioned_->set_field_value_v0( + *node.get_handle(), layout_, name_field_, Value(std::string("Bob"))); + node_arena_non_versioned_->set_field_value_v0(*node.get_handle(), layout_, + age_field_, Value(int32_t(30))); + + // Create view without temporal context (should work for non-versioned) + NodeView view = node.view(nullptr); + + ASSERT_TRUE(view.is_visible()); + + auto name = view.get_value(name_field_); + auto age = view.get_value(age_field_); + + ASSERT_TRUE(name.ok()); + ASSERT_TRUE(age.ok()); + + EXPECT_EQ(name.ValueOrDie().as_string(), "Bob"); + EXPECT_EQ(age.ValueOrDie().as_int32(), 30); +} + +/** + * Test: Multiple field reads from same view (no repeated lookups) + */ +TEST_F(NodeViewTest, MultipleFieldReadsFromSameView) { + // Allocate versioned node + NodeHandle handle = node_arena_versioned_->allocate_node("User"); + + // Create Node wrapper + Node node(0, "User", {}, std::make_unique(std::move(handle)), + node_arena_versioned_, schema_, layout_); + + node_arena_versioned_->set_field_value_v0( + *node.get_handle(), layout_, name_field_, Value(std::string("Alice"))); + node_arena_versioned_->set_field_value_v0(*node.get_handle(), layout_, + age_field_, Value(int32_t(25))); + node_arena_versioned_->set_field_value_v0(*node.get_handle(), layout_, + department_field_, + Value(std::string("Engineering"))); + + uint64_t t0 = node.get_handle()->version_info_->valid_from; + + // Create view once + TemporalContext ctx(TemporalSnapshot::as_of_valid(t0)); + NodeView view = node.view(&ctx); + + // Read multiple fields from same view + auto name = view.get_value(name_field_); + auto age = view.get_value(age_field_); + auto dept = view.get_value(department_field_); + + ASSERT_TRUE(name.ok()); + ASSERT_TRUE(age.ok()); + ASSERT_TRUE(dept.ok()); + + EXPECT_EQ(name.ValueOrDie().as_string(), "Alice"); + EXPECT_EQ(age.ValueOrDie().as_int32(), 25); + EXPECT_EQ(dept.ValueOrDie().as_string(), "Engineering"); +} + +// namespace tundradb From 065a863e8ec4ee5259740d198fb066815d5ab34c Mon Sep 17 00:00:00 2001 From: dmgcodevil Date: Tue, 10 Feb 2026 22:21:18 -0500 Subject: [PATCH 2/7] tests --- CMakeLists.txt | 1 + include/clock.hpp | 115 ++++++++++ include/node_arena.hpp | 6 +- include/query.hpp | 55 ++++- src/core.cpp | 35 +++- tests/CMakeLists.txt | 21 ++ tests/temporal_query_test.cpp | 381 ++++++++++++++++++++++++++++++++++ 7 files changed, 602 insertions(+), 12 deletions(-) create mode 100644 include/clock.hpp create mode 100644 tests/temporal_query_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 969d5a7..a494e03 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -222,6 +222,7 @@ add_library(core src/schema.cpp src/arrow_utils.cpp src/types.cpp + src/clock.cpp ) target_include_directories(core diff --git a/include/clock.hpp b/include/clock.hpp new file mode 100644 index 0000000..433af64 --- /dev/null +++ b/include/clock.hpp @@ -0,0 +1,115 @@ +#ifndef CLOCK_HPP +#define CLOCK_HPP + +#include +#include +#include + +namespace tundradb { + +/** + * Abstract clock interface for time management. + * Allows injection of mock clocks for testing temporal queries. + * + * Usage in production: + * Clock::instance().now_nanos() // Uses SystemClock by default + * + * Usage in tests: + * MockClock mock; + * Clock::set_instance(&mock); + * mock.set_time(timestamp); + * // ... run tests ... + * Clock::reset(); // Restore SystemClock + */ +class Clock { + public: + virtual ~Clock() = default; + + /** + * Get current timestamp in nanoseconds since Unix epoch. + */ + virtual uint64_t now_nanos() const = 0; + + /** + * Get the global Clock instance. + */ + static Clock& instance() { + Clock* inst = instance_.load(std::memory_order_acquire); + return inst ? *inst : default_instance(); + } + + /** + * Set the global Clock instance (for testing). + * Pass nullptr to restore the default SystemClock. + */ + static void set_instance(Clock* clock) { + instance_.store(clock, std::memory_order_release); + } + + /** + * Reset to default SystemClock. + */ + static void reset() { set_instance(nullptr); } + + private: + static Clock& default_instance(); + static std::atomic instance_; +}; + +/** + * System clock using std::chrono (production). + */ +class SystemClock : public Clock { + public: + uint64_t now_nanos() const override { + auto now = std::chrono::system_clock::now(); + auto duration = now.time_since_epoch(); + return std::chrono::duration_cast(duration) + .count(); + } +}; + +/** + * Mock clock for testing (controllable time). + */ +class MockClock : public Clock { + public: + explicit MockClock(uint64_t initial_time = 0) : current_time_(initial_time) {} + + uint64_t now_nanos() const override { + return current_time_.load(std::memory_order_relaxed); + } + + /** + * Set the current time. + */ + void set_time(uint64_t nanos) { + current_time_.store(nanos, std::memory_order_relaxed); + } + + /** + * Advance time by delta nanoseconds. + */ + void advance(uint64_t delta_nanos) { + current_time_.fetch_add(delta_nanos, std::memory_order_relaxed); + } + + /** + * Advance time by seconds (convenience method). + */ + void advance_seconds(uint64_t seconds) { + advance(seconds * 1'000'000'000ULL); + } + + /** + * Advance time by milliseconds (convenience method). + */ + void advance_millis(uint64_t millis) { advance(millis * 1'000'000ULL); } + + private: + mutable std::atomic current_time_; +}; + +} // namespace tundradb + +#endif // CLOCK_HPP diff --git a/include/node_arena.hpp b/include/node_arena.hpp index 7c5c5c9..138f125 100644 --- a/include/node_arena.hpp +++ b/include/node_arena.hpp @@ -10,6 +10,7 @@ #include #include +#include "clock.hpp" #include "free_list_arena.hpp" #include "mem_arena.hpp" #include "memory_arena.hpp" @@ -751,10 +752,7 @@ class NodeArena { private: static uint64_t get_current_timestamp_ns() { - auto now = std::chrono::system_clock::now(); - auto duration = now.time_since_epoch(); - return std::chrono::duration_cast(duration) - .count(); + return Clock::instance().now_nanos(); } /** Write field directly to node memory (handles strings). */ diff --git a/include/query.hpp b/include/query.hpp index a84e462..709c932 100644 --- a/include/query.hpp +++ b/include/query.hpp @@ -14,6 +14,7 @@ #include "node.hpp" #include "schema.hpp" +#include "temporal_context.hpp" #include "types.hpp" namespace tundradb { @@ -809,16 +810,19 @@ class Query { std::shared_ptr select, bool optimize_where, - ExecutionConfig execution_config) + ExecutionConfig execution_config, + std::optional temporal_snapshot = std::nullopt) : from_(std::move(from)), clauses_(std::move(clauses)), select_(std::move(select)), inline_where_(optimize_where), - execution_config_(execution_config) {} + execution_config_(execution_config), + temporal_snapshot_(std::move(temporal_snapshot)) {} class Builder; [[nodiscard]] const SchemaRef& from() const { return from_; } @@ -834,6 +838,11 @@ class Query { return execution_config_; } + [[nodiscard]] const std::optional& temporal_snapshot() + const { + return temporal_snapshot_; + } + static Builder from(const std::string& schema) { return Builder(schema); } class Builder { @@ -843,6 +852,7 @@ class Query { std::shared_ptr