diff --git a/CMakeLists.txt b/CMakeLists.txt index 969d5a7..a494e03 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -222,6 +222,7 @@ add_library(core src/schema.cpp src/arrow_utils.cpp src/types.cpp + src/clock.cpp ) target_include_directories(core diff --git a/bench/tundra_runner.cpp b/bench/tundra_runner.cpp index aa9fbfa..bb2a495 100644 --- a/bench/tundra_runner.cpp +++ b/bench/tundra_runner.cpp @@ -104,8 +104,8 @@ void load_data(Database& db, const std::string& users_csv, db.connect(wsrc->Value(i), "WORKS_AT", users_count + wdst->Value(i)).ValueOrDie(); } - db.get_table("User").ValueOrDie(); - db.get_table("Company").ValueOrDie(); + db.get_table("User", nullptr).ValueOrDie(); + db.get_table("Company", nullptr).ValueOrDie(); auto load_end = std::chrono::high_resolution_clock::now(); auto load_duration = std::chrono::duration_cast( diff --git a/docs/NODE_VIEW_DESIGN.md b/docs/NODE_VIEW_DESIGN.md new file mode 100644 index 0000000..25b565a --- /dev/null +++ b/docs/NODE_VIEW_DESIGN.md @@ -0,0 +1,379 @@ +# NodeView Design - Complete Implementation + +## Overview + +This document describes the complete `NodeView` design for time-travel queries in TundraDB. + +## Architecture + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ Query Execution │ +├──────────────────────────────────────────────────────────────────┤ +│ 1. Create TemporalContext with snapshot (valid_time, tx_time) │ +│ 2. For each node, call node.view(&ctx) │ +│ 3. NodeView resolves version once (cached in TemporalContext) │ +│ 4. Read fields from NodeView (no per-field lookups) │ +└──────────────────────────────────────────────────────────────────┘ +``` + +## Components + +### 1. TemporalSnapshot (temporal_context.hpp) + +Represents a point in bitemporal space: + +```cpp +struct TemporalSnapshot { + uint64_t valid_time; // VALIDTIME: when fact was true in domain + uint64_t tx_time; // TXNTIME: when DB knew about the fact + + // UINT64_MAX means "current time" + static TemporalSnapshot as_of_valid(uint64_t vt); // VALIDTIME only + static TemporalSnapshot as_of_tx(uint64_t tt); // TXNTIME only + bool is_current() const; // Both times = UINT64_MAX +}; +``` + +**Usage:** +```cpp +// Time-travel to specific VALIDTIME +auto snapshot = TemporalSnapshot::as_of_valid(1704067200000); + +// Bitemporal: specific valid & tx times +auto snapshot = TemporalSnapshot(1704067200000, 1706745600000); +``` + +--- + +### 2. TemporalContext (temporal_context.hpp) + +Per-query state for temporal queries: + +```cpp +class TemporalContext { + TemporalSnapshot snapshot_; + std::unordered_map version_cache_; // node_id -> version + +public: + explicit TemporalContext(TemporalSnapshot snapshot); + + // Resolve visible version for a node (with caching) + VersionInfo* resolve_version(int64_t node_id, const NodeHandle& handle); + + void clear_cache(); +}; +``` + +**Key Features:** +- **Version resolution**: Finds the visible version at the snapshot +- **Caching**: Avoids repeated version chain traversals +- **Bitemporal visibility**: Checks both VALIDTIME and TXNTIME + +**Visibility Rule:** +```cpp +A version is visible if: + (version.valid_from <= snapshot.valid_time < version.valid_to) + AND + (version.tx_from <= snapshot.tx_time < version.tx_to) +``` + +--- + +### 3. NodeView (node_view.hpp) + +Lightweight view of a Node at a specific temporal snapshot: + +```cpp +class NodeView { + Node* node_; + VersionInfo* resolved_version_; // Resolved once at construction + NodeArena* arena_; + SchemaLayout* layout_; + +public: + NodeView(Node* node, VersionInfo* resolved_version, + NodeArena* arena, SchemaLayout* layout); + + // Same interface as Node + arrow::Result get_value_ptr(const Field& field) const; + arrow::Result get_value_ref(const Field& field) const; + arrow::Result get_value(const Field& field) const; + + bool is_visible() const; // false if node didn't exist at snapshot + const VersionInfo* get_resolved_version() const; + Node* get_node() const; +}; +``` + +**Key Features:** +- **Lightweight**: Only 24 bytes (3 pointers) +- **No per-field lookups**: Version resolved once at construction +- **Same interface as Node**: Drop-in replacement for field access +- **Handles non-existence**: `is_visible() == false` if node didn't exist at snapshot + +--- + +### 4. Node::view() Method (node.hpp) + +Factory method on Node to create NodeView: + +```cpp +class Node { +public: + NodeView view(TemporalContext* ctx = nullptr) { + if (!ctx || !handle_) { + // No temporal context -> use current version + return NodeView(this, handle_->version_info_, arena_.get(), layout_.get()); + } + + // Resolve version using TemporalContext (cached) + VersionInfo* resolved = ctx->resolve_version(id, *handle_); + return NodeView(this, resolved, arena_.get(), layout_.get()); + } +}; +``` + +--- + +### 5. VersionInfo Extension (node_arena.hpp) + +Extended with TXNTIME support: + +```cpp +struct VersionInfo { + uint64_t version_id; + + // VALIDTIME (domain) + uint64_t valid_from; + uint64_t valid_to; // UINT64_MAX = INF + + // TXNTIME (system knowledge) - NEW + uint64_t tx_from; + uint64_t tx_to; // UINT64_MAX = INF + + VersionInfo* prev; + llvm::SmallDenseMap updated_fields; + + // Bitemporal visibility check + bool is_visible_at(uint64_t valid_time, uint64_t tx_time) const; + + // Find version visible at snapshot + const VersionInfo* find_version_at_snapshot(uint64_t vt, uint64_t tt) const; +}; +``` + +**Constructor:** +```cpp +VersionInfo(uint64_t vid, uint64_t ts_from, VersionInfo* prev_ver = nullptr) + : version_id(vid), + valid_from(ts_from), + tx_from(ts_from), // Initially tx_from = valid_from + prev(prev_ver) {} +``` + +--- + +### 6. NodeArena Helper Methods (node_arena.hpp) + +Added methods for NodeView to resolve fields: + +```cpp +class NodeArena { +public: + // Get field value pointer starting from a specific version + const char* get_field_value_ptr_from_version( + const VersionInfo* version, + const SchemaLayout& layout, + const Field& field) const; + + // Get field value starting from a specific version + arrow::Result get_field_value_from_version( + const VersionInfo* version, + const SchemaLayout& layout, + const Field& field) const; +}; +``` + +--- + +## Usage Examples + +### Example 1: Current Version (No Time-Travel) + +```cpp +Node node = ...; + +// Create view without temporal context (current version) +NodeView view = node.view(nullptr); + +// Read fields +auto age = view.get_value(age_field); +auto dept = view.get_value(department_field); +``` + +--- + +### Example 2: Time-Travel Query (VALIDTIME) + +```cpp +// Scenario: Node has 3 versions at times t0, t1, t2 +// v0 (t0): age=25, dept="Engineering" +// v1 (t1): age=26, dept="Engineering" +// v2 (t2): age=26, dept="Sales" + +// Query at t1 (between v1 and v2) +TemporalContext ctx(TemporalSnapshot::as_of_valid(t1)); +NodeView view = node.view(&ctx); + +auto age = view.get_value(age_field); // Returns 26 (from v1) +auto dept = view.get_value(dept_field); // Returns "Engineering" (from v1) +``` + +--- + +### Example 3: Bitemporal Query (VALIDTIME + TXNTIME) + +```cpp +// Query what the DB knew at tx_time=100 about valid_time=90 +TemporalContext ctx(TemporalSnapshot(90, 100)); +NodeView view = node.view(&ctx); + +// Returns values from the version visible at (vt=90, tt=100) +auto age = view.get_value(age_field); +``` + +--- + +### Example 4: Multiple Field Reads (No Repeated Lookups) + +```cpp +TemporalContext ctx(TemporalSnapshot::as_of_valid(timestamp)); +NodeView view = node.view(&ctx); // Version resolved once here + +// All subsequent field reads use the cached resolved version +auto name = view.get_value(name_field); +auto age = view.get_value(age_field); +auto dept = view.get_value(department_field); +// No map lookups, no repeated version chain traversals! +``` + +--- + +### Example 5: Node Doesn't Exist at Snapshot + +```cpp +uint64_t before_creation = node_creation_time - 100000; + +TemporalContext ctx(TemporalSnapshot::as_of_valid(before_creation)); +NodeView view = node.view(&ctx); + +if (!view.is_visible()) { + // Node didn't exist at this time + std::cout << "Node not visible\n"; +} + +// Accessing fields returns KeyError +auto age_result = view.get_value(age_field); +assert(!age_result.ok() && age_result.status().IsKeyError()); +``` + +--- + +## Performance Characteristics + +### Memory + +| Component | Size | Notes | +|-----------|------|-------| +| `TemporalSnapshot` | 16 bytes | 2 × uint64_t | +| `TemporalContext` | 40+ bytes | Snapshot + version cache map | +| `NodeView` | 24 bytes | 3 pointers | +| `VersionInfo` extension | +16 bytes | Added tx_from/tx_to | + +### Time Complexity + +| Operation | Complexity | Notes | +|-----------|------------|-------| +| `TemporalContext::resolve_version` | O(N) first time, O(1) cached | N = version chain length | +| `NodeView` construction | O(1) | Just pointer assignment | +| `NodeView::get_value` | O(M) | M = field chain depth (usually 1-2) | + +**Key Optimization**: Version resolution happens **once per node per query**, not per field access. + +--- + +## Benefits + +1. **No Per-Field Lookups**: Version resolved once, cached in `TemporalContext` +2. **Unified Interface**: `NodeView` has same API as `Node` (drop-in replacement) +3. **Bitemporal Support**: Ready for VALIDTIME + TXNTIME queries +4. **Efficient**: Minimal overhead (24 bytes per view) +5. **Handles Non-Existence**: Gracefully handles nodes that didn't exist at snapshot +6. **Thread-Safe Reads**: Multiple queries can use separate `TemporalContext` instances + +--- + +## Tests + +See `tests/node_view_test.cpp` for comprehensive tests: + +- ✅ `CurrentVersionView`: No temporal context (current version) +- ✅ `TimeTravelValidTime`: Query at different points in VALIDTIME +- ✅ `TemporalContextCaching`: Version resolution caching +- ✅ `NodeNotVisibleAtSnapshot`: Node didn't exist yet +- ✅ `MultipleFieldReadsFromSameView`: No repeated lookups + +Run tests: +```bash +cd build && ctest -R NodeViewTest -V +``` + +--- + +## Next Steps + +### Phase 2: Query Syntax (Pending) + +Add temporal query syntax to TundraQL grammar: + +```sql +-- Time-travel query +AS OF VALIDTIME 1704067200000 +AS OF TXNTIME 1706745600000 +MATCH (u:User) WHERE u.name = "Alice" +SELECT u.department; + +-- Retroactive correction +MATCH (u:User) WHERE u.name = "Alice" +FOR VALIDTIME [1704067200000, INF) +SET u.department = "Sales"; +``` + +### Phase 3: Interval Surgery (Pending) + +Implement retroactive corrections with interval splitting/coalescing. + +### Phase 4: DIFF Operator (Pending) + +Implement audit trail queries: + +```sql +DIFF FROM (VALIDTIME 1704067200000, TXNTIME 1706745600000) + TO (VALIDTIME 1706745600000, TXNTIME now) +MATCH (u:User) WHERE u.name = "Alice"; +``` + +--- + +## Summary + +The NodeView design provides: +- ✅ Efficient time-travel queries (version resolved once per node) +- ✅ Bitemporal support (VALIDTIME + TXNTIME) +- ✅ Clean API (same as Node, drop-in replacement) +- ✅ Comprehensive tests +- ✅ Ready for query syntax integration + +**Current Status**: Phase 1 Complete (Infrastructure + Tests) ✅ + diff --git a/docs/README_temporal_versioning.md b/docs/README_temporal_versioning.md new file mode 100644 index 0000000..606b4c5 --- /dev/null +++ b/docs/README_temporal_versioning.md @@ -0,0 +1,498 @@ +# TundraDB Temporal Versioning Spec (VALIDTIME + TXNTIME) + +This README is a developer-focused spec for implementing temporal queries and temporal updates (including retroactive corrections) using a `VersionInfo` chain per logical record (node or edge). + +The goal is to make the model *click*: +- **VALIDTIME** = when a fact is true in the domain/world +- **TXNTIME** = when the database recorded/“knew” that fact (system time) + +> **TL;DR:** You do not rewrite history by editing old versions. You create new versions with new (valid/txn) intervals. The only safe in-place mutation is closing `tx_to` on previously-current versions. + +--- + +## 1) Data Model + +### 1.1 Version record + +```cpp +struct VersionInfo { + uint64_t version_id; + + // Domain validity (world time): [valid_from, valid_to) + uint64_t valid_from; + uint64_t valid_to; // default = INF + + // System validity (knowledge/commit time): [tx_from, tx_to) + uint64_t tx_from; // commit timestamp or monotonic commit id + uint64_t tx_to; // default = INF + + // Sparse deltas: field_idx -> value pointer (nullptr means explicit NULL) + map updated_fields; + + // Newest -> older + VersionInfo* prev; +}; +``` + +### 1.2 Logical record vs versions + +A **logical record** (a node id or edge id) is stable. It has a chain of versions: + +``` +logical_id: 42 + head -> v9 -> v8 -> v7 -> ... +``` + +Each `VersionInfo` represents a set of *field deltas* and an interval in **valid-time** and **txn-time**. + +### 1.3 Optional: edge identity choices + +You must choose how to identify edges: + +**A) Stable `edge_id` (recommended)** +- An edge has a stable id, and versions can change edge properties (including dst) over time. +- DIFF can show `UPDATED`. + +**B) Tuple identity** +- Edge identity is `(src, label, dst, key?)`. +- Changing `dst` is modeled as `REMOVED+ADDED`. + +Both are acceptable; A is better for bitemporal editing and auditing. + +--- + +## 2) Time Semantics + +### 2.1 VALIDTIME (domain time) + +- Answers: **“When was it true in the world?”** +- Used for: temporal graphs, historical queries (friendship at date), truth as-of time. + +### 2.2 TXNTIME (system/knowledge time) + +- Answers: **“When did the DB record/believe it?”** +- Used for: audit/replay (“what did we know then?”), backtesting, preventing future leakage, late-arriving data, corrections. + +### 2.3 Visibility rule (bitemporal) + +Given query snapshot `(vtime, txtime)`, a version `v` is visible if: + +``` +v.valid_from <= vtime < v.valid_to +AND +v.tx_from <= txtime < v.tx_to +``` + +If you only implement VALIDTIME initially, treat `tx_from=0, tx_to=INF` for all versions (or ignore tx constraints). + +--- + +## 3) Immutability / Mutability Rules (Critical) + +**After commit:** + +- `valid_from`, `valid_to` are **immutable** (do not rewrite domain history). +- `tx_from` is **immutable** (it is commit time). +- `tx_to` is **mutable only to close old versions** when a newer belief is written. + +> The only allowed in-place mutation is: +> +> **`old_version.tx_to = tx_now`** +> +> This preserves reproducible snapshots: old versions remain visible for earlier `txtime`. + +--- + +## 4) Field Materialization with `updated_fields` + +`updated_fields` is a sparse delta map. Resolving a field `f` at a visible version: + +``` +value resolve(logical_id, field_idx, snapshot): + v = visible_version(logical_id, snapshot) + while v != null: + if v.updated_fields contains field_idx: + return v.updated_fields[field_idx] // may be NULL explicitly + v = v.prev + return DEFAULT/ABSENT (schema default) +``` + +### 4.1 Performance note + +Avoid full materialization: +- resolve only fields referenced by the query +- cache `(logical_id, snapshot, field_idx)` during query execution + +--- + +## 5) Snapshot Selection in Queries + +### 5.1 Read queries + +A query can specify one or two time axes: + +```sql +AS OF VALIDTIME @v +AS OF TXNTIME @t +MATCH ... +SELECT ... +``` + +If `AS OF TXNTIME` is omitted, treat it as `now` (or latest commit) for “current belief”. + +### 5.2 Pattern matching + +`AS OF` applies to: +- nodes (their visible versions) +- edges (their visible versions) +- edge existence (edges can appear/disappear by valid/txn windows) + +--- + +## 6) Updates: Concepts + +There are two distinct intents: + +### 6.1 “Current update” (world changes now) + +Example: “Alice moved departments today.” +- `valid_from = now` +- this is not a retroactive correction; it is a new fact from now onward. + +### 6.2 “Correction” (retroactive fix) + +Example: “We learned Alice actually moved on Jan 14.” +- `valid_from = Jan14` (back in the past) +- `tx_from = commit_time` (when we learned and recorded it) + +This is why VALIDTIME != TXNTIME. + +--- + +## 7) Recommended Update Syntax (Minimal Extensions) + +Your current syntax: + +```sql +MATCH (u:User) WHERE u.name = "Alice" +SET u.age = 26, u.department = "Sales"; +``` + +### 7.1 Add a valid-time target interval + +To make corrections explicit: + +```sql +MATCH (u:User) WHERE u.name = "Alice" +FOR VALIDTIME [@vf, @vt) +SET u.department = "Sales"; +``` + +- `FOR VALIDTIME [...]` defines the **domain interval** to patch. +- `tx_from` is automatically set to `tx_now` at commit. + +### 7.2 Optional: `AS OF` for target selection + +This controls how MATCH is evaluated: + +```sql +MATCH (u:User)-[:MEMBER_OF]->(t:Team) +AS OF VALIDTIME @v_sel +AS OF TXNTIME @t_sel +WHERE t.name = "Risk" +FOR VALIDTIME [@vf, INF) +SET u.department = "Sales"; +``` + +This helps avoid “future knowledge” when running repair jobs. + +--- + +## 8) Core Algorithms + +### 8.1 Helper: interval overlap + +Intervals are half-open: `[from, to)`. + +``` +overlaps([a_from, a_to), [b_from, b_to)) = (a_from < b_to) && (b_from < a_to) +``` + +### 8.2 Finding currently-believed versions at commit time + +When writing at `tx_now`, find versions visible in TX at `tx_now`: + +``` +tx_from <= tx_now < tx_to +``` + +(Optionally also filtered by logical id and other constraints.) + +### 8.3 Interval surgery for corrections + +Operation: +- Patch fields for valid interval `[corr_from, corr_to)` (corr_to may be INF) +- Assign `tx_from = tx_now` + +**High-level:** +1. Find versions currently-believed at `tx_now` +2. For each version overlapping `[corr_from, corr_to)`: + - close old belief: `old.tx_to = tx_now` + - insert up to 3 new versions at `tx_now`: + - left remainder + - corrected middle + - right remainder +3. Coalesce adjacent segments with identical values + +--- + +## 9) Worked Examples (Version Chains) + +All times below are simplified integers. + +### 9.1 Example A: current update (not correction) + +Alice department becomes Sales at time 100, recorded at tx=100. + +Existing belief: +- dept=Eng for `[1, INF)` at tx `[1, INF)` + +Write: `SET dept=Sales` for `[100, INF)` at `tx_now=100`. + +Result versions: + +``` +v1: dept=Eng + valid [1, INF) + tx [1, 100) // closed at tx=100 + +v2: dept=Eng + valid [1, 100) + tx [100, INF) // reassert unchanged left remainder (optional; see notes) + +v3: dept=Sales + valid [100, INF) + tx [100, INF) +``` + +**Optimization:** You can avoid writing `v2` by keeping an unchanged segment represented implicitly, +but many implementations keep explicit segments for simplicity and coalescing. + +### 9.2 Example B: retroactive correction + +Reality: Alice moved to Sales at valid time 80, but the DB learned this only at tx=120. + +Initial belief (at tx=1): +- dept=Eng valid `[1, INF)` tx `[1, INF)` + +Correction write at `tx_now=120`: +- patch dept=Sales for valid `[80, INF)` + +Result: + +``` +v1: dept=Eng + valid [1, INF) + tx [1, 120) // old belief still visible for txtime<120 + +v2: dept=Eng + valid [1, 80) + tx [120, INF) + +v3: dept=Sales + valid [80, INF) + tx [120, INF) +``` + +Now: +- Query `(vtime=90, txtime=100)` -> sees **Eng** (what we believed then) +- Query `(vtime=90, txtime=130)` -> sees **Sales** (corrected belief) + +--- + +## 10) Corner Cases (Must Handle) + +### 10.1 No-op update + +If the patch does not change any field values over `[vf, vt)`: +- Do **not** create new versions +- Return “0 changed” (and DIFF empty) + +### 10.2 Boundary already exists + +If the chain already has a segment boundary at `vf`: +- only patch the overlapping segments +- avoid extra splitting + +### 10.3 Patch overlaps multiple segments + +Example segments: +- `[1,50) Eng`, `[50,120) Ops`, `[120, INF) Eng` +Patch `[100, INF) -> Sales` + +You must: +- split `[50,120)` into `[50,100) Ops` + `[100,120) Sales` +- patch `[120, INF) Eng -> Sales` +- coalesce adjacent Sales segments if produced + +### 10.4 Gaps in valid-time + +If you allow gaps (no version covers some `[t1,t2)`), define behavior: +- treat as “unknown / absent” (no visible row) +- `FOR VALIDTIME` may *create* versions in gaps (new facts) + +### 10.5 Explicit NULL vs inherited + +In `updated_fields`, a `NULL` value means “explicitly NULL”. +Absence of field in map means “inherit from prev”. + +DIFF must distinguish: +- field becomes NULL +- field not present / unchanged + +### 10.6 Deleting a property vs deleting an edge + +Two approaches: +- **tombstone**: an edge version with a deleted flag +- **interval end**: set valid_to to deletion time (via new versions; do not mutate old valid_to) + +Recommended: represent deletion as versioning: +- close old belief in TX (`tx_to = tx_now`) +- write new version(s) that remove existence for a valid interval + - for edges, this may mean the edge simply has no visible version for that interval + - or a tombstone version that suppresses older versions in that interval + +### 10.7 Multiple corrections over time + +If you correct the same logical record multiple times: +- you create multiple tx-time “belief layers” +- queries at different `txtime` see different beliefs +- coalesce within the same tx_from layer if desired + +### 10.8 Concurrent writers + +If concurrent commits can write the same logical record: +- you must serialize updates per logical id, or +- ensure deterministic conflict resolution (e.g., commit id ordering) + +--- + +## 11) DIFF Operator (Audit-Ready) + +DIFF compares two snapshots: + +Snapshot A: `(vA, tA)` +Snapshot B: `(vB, tB)` + +### 11.1 Entity diff (nodes/edges) + +```sql +DIFF +FROM (VALIDTIME @vA, TXNTIME @tA) +TO (VALIDTIME @vB, TXNTIME @tB) +MATCH (u:User {id:"Alice"})-[e:WORKS_AT]->(org) +RETURN u, e, org; +``` + +Output shape (Arrow-friendly): +- `diff_type` in {`ADDED`, `REMOVED`, `UPDATED`} +- `entity_kind` in {`node`, `edge`} +- identifiers (`label`, `id` / `src,dst,type`) +- optional field-level details (`field`, `old_value`, `new_value`) + +### 11.2 Result diff (diff a query’s output) + +```sql +DIFF RESULT +FROM (...) TO (...) +MATCH ... +SELECT ... +KEY BY ; +``` + +This is ideal for “why did this decision change?” audits. + +--- + +## 12) Implementation Checklist + +### 12.1 Required primitives + +- `visible_version(logical_id, vtime, txtime)` +- `resolve_field(logical_id, field_idx, snapshot)` +- `find_versions_visible_in_tx(logical_id, tx_now)` (for writers) +- `insert_version(logical_id, VersionInfo*)` +- `close_version_tx(VersionInfo*, tx_now)` (mutate `tx_to`) +- `coalesce_adjacent_versions(logical_id, tx_from_layer)` (optional but recommended) + +### 12.2 Writer invariants to maintain + +- For a fixed `(logical_id, tx_layer)` the valid-time segments should not overlap. +- For a fixed `(logical_id, snapshot)` there should be at most one visible version per valid-time point. + +--- + +## 13) Recommended Defaults + +- Use half-open intervals `[from, to)`. +- Represent INF as `UINT64_MAX`. +- Use commit id or monotonic clock for `tx_from`. +- Default read behavior: + - If `AS OF TXNTIME` is not specified, treat as “latest” (now). + - If `AS OF VALIDTIME` is not specified, treat as “now” (or required). + +--- + +## 14) Appendix: ASCII Diagram Cheat Sheet + +### 14.1 Single correction (retroactive) + +``` +Time axis (VALIDTIME): 1 -------- 80 --------------- INF +Old belief (tx<120): Eng===============================> +New belief (tx>=120): Eng==========| Sales==============> + ^ split at 80 +TX axis: tx<120 sees old; tx>=120 sees new +``` + +--- + +## 15) Notes on Storage Minimization (Optional) + +You can reduce version explosion by: +- writing only the patched middle versions +- leaving unchanged parts represented by older versions plus careful tx windows +…but this complicates invariants. + +For a first implementation, prefer: +- explicit left/mid/right segments +- coalescing adjacent identical segments + +--- + +## 16) Examples you can paste into docs + +### Simple correction update + +```sql +MATCH (u:User) WHERE u.name = "Alice" +FOR VALIDTIME [80, INF) +SET u.department = "Sales"; +``` + +### Safe replay query + +```sql +AS OF VALIDTIME 90 +AS OF TXNTIME 100 +MATCH (u:User) WHERE u.name="Alice" +SELECT u.department; +``` + +--- + +If you want, I can also generate a short `spec_tests.md` with: +- input version chains +- update operations +- expected resulting chains +- expected query outputs at various snapshots diff --git a/include/clock.hpp b/include/clock.hpp new file mode 100644 index 0000000..433af64 --- /dev/null +++ b/include/clock.hpp @@ -0,0 +1,115 @@ +#ifndef CLOCK_HPP +#define CLOCK_HPP + +#include +#include +#include + +namespace tundradb { + +/** + * Abstract clock interface for time management. + * Allows injection of mock clocks for testing temporal queries. + * + * Usage in production: + * Clock::instance().now_nanos() // Uses SystemClock by default + * + * Usage in tests: + * MockClock mock; + * Clock::set_instance(&mock); + * mock.set_time(timestamp); + * // ... run tests ... + * Clock::reset(); // Restore SystemClock + */ +class Clock { + public: + virtual ~Clock() = default; + + /** + * Get current timestamp in nanoseconds since Unix epoch. + */ + virtual uint64_t now_nanos() const = 0; + + /** + * Get the global Clock instance. + */ + static Clock& instance() { + Clock* inst = instance_.load(std::memory_order_acquire); + return inst ? *inst : default_instance(); + } + + /** + * Set the global Clock instance (for testing). + * Pass nullptr to restore the default SystemClock. + */ + static void set_instance(Clock* clock) { + instance_.store(clock, std::memory_order_release); + } + + /** + * Reset to default SystemClock. + */ + static void reset() { set_instance(nullptr); } + + private: + static Clock& default_instance(); + static std::atomic instance_; +}; + +/** + * System clock using std::chrono (production). + */ +class SystemClock : public Clock { + public: + uint64_t now_nanos() const override { + auto now = std::chrono::system_clock::now(); + auto duration = now.time_since_epoch(); + return std::chrono::duration_cast(duration) + .count(); + } +}; + +/** + * Mock clock for testing (controllable time). + */ +class MockClock : public Clock { + public: + explicit MockClock(uint64_t initial_time = 0) : current_time_(initial_time) {} + + uint64_t now_nanos() const override { + return current_time_.load(std::memory_order_relaxed); + } + + /** + * Set the current time. + */ + void set_time(uint64_t nanos) { + current_time_.store(nanos, std::memory_order_relaxed); + } + + /** + * Advance time by delta nanoseconds. + */ + void advance(uint64_t delta_nanos) { + current_time_.fetch_add(delta_nanos, std::memory_order_relaxed); + } + + /** + * Advance time by seconds (convenience method). + */ + void advance_seconds(uint64_t seconds) { + advance(seconds * 1'000'000'000ULL); + } + + /** + * Advance time by milliseconds (convenience method). + */ + void advance_millis(uint64_t millis) { advance(millis * 1'000'000ULL); } + + private: + mutable std::atomic current_time_; +}; + +} // namespace tundradb + +#endif // CLOCK_HPP diff --git a/include/config.hpp b/include/config.hpp index d9d442b..4a37bb0 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -36,6 +36,9 @@ class DatabaseConfig { bool validation_enabled = true; + // Enable temporal versioning (copy-on-write for time-travel queries) + bool versioning_enabled_ = false; + friend class DatabaseConfigBuilder; public: @@ -51,6 +54,7 @@ class DatabaseConfig { std::string get_db_path() const { return db_path; } bool is_persistence_enabled() const { return persistence_enabled; } bool is_validation_enabled() const { return validation_enabled; } + bool is_versioning_enabled() const { return versioning_enabled_; } }; class DatabaseConfigBuilder { @@ -100,6 +104,11 @@ class DatabaseConfigBuilder { return *this; } + DatabaseConfigBuilder &with_versioning_enabled(const bool enabled) { + config.versioning_enabled_ = enabled; + return *this; + } + DatabaseConfigBuilder &with_memory_scale_factor(const double factor) { config.shard_memory_pool_size = static_cast(defaults::SHARD_MEMORY_POOL_SIZE * factor); diff --git a/include/core.hpp b/include/core.hpp index c87b9bf..c9798e6 100644 --- a/include/core.hpp +++ b/include/core.hpp @@ -237,8 +237,9 @@ class Shard { return nodes_[node_id]->update(field, value, update_type); } - arrow::Result> get_table() { - if (dirty_ || !table_) { + arrow::Result> get_table(TemporalContext *ctx) { + // if we have ctx we need to create a new table every time + if (dirty_ || !table_ || ctx) { ARROW_ASSIGN_OR_RAISE(const auto schema, schema_registry_->get(schema_name)); auto arrow_schema = schema->arrow(); @@ -251,9 +252,20 @@ class Shard { result, [](const std::shared_ptr &a, const std::shared_ptr &b) { return a->id < b->id; }); - ARROW_ASSIGN_OR_RAISE(table_, create_table(schema, result, chunk_size)); - dirty_ = false; + ARROW_ASSIGN_OR_RAISE(auto table_res, + create_table(schema, result, chunk_size, ctx)); + + if (!ctx) { + // Non-temporal query: cache the table for reuse + table_ = table_res; + dirty_ = false; + } + + // Return the newly created table (temporal or non-temporal) + return table_res; } + + // Reuse cached table (only for non-temporal queries) return table_; } @@ -575,7 +587,7 @@ class ShardManager { } arrow::Result>> get_tables( - const std::string &schema_name) { + const std::string &schema_name, TemporalContext *temporal_context) { const auto schema_it = shards_.find(schema_name); if (schema_it == shards_.end()) { return std::vector>{}; @@ -590,7 +602,7 @@ class ShardManager { std::vector> tables; for (const auto &shard : sorted_shards) { - ARROW_ASSIGN_OR_RAISE(auto table, shard->get_table()); + ARROW_ASSIGN_OR_RAISE(auto table, shard->get_table(temporal_context)); if (table->num_rows() > 0) { tables.push_back(table); } @@ -673,7 +685,8 @@ class Database { shard_manager_( std::make_shared(schema_registry_, config)), node_manager_(std::make_shared( - schema_registry_, config.is_validation_enabled())), + schema_registry_, config.is_validation_enabled(), true, + config.is_versioning_enabled())), config_(config), persistence_enabled_(config.is_persistence_enabled()), edge_store_(std::make_shared(0, config.get_chunk_size())) { @@ -711,6 +724,8 @@ class Database { return metadata_manager_; } + std::shared_ptr get_node_manager() { return node_manager_; } + arrow::Result initialize() { if (persistence_enabled_) { auto storage_init = this->storage_->initialize(); @@ -792,7 +807,9 @@ class Database { arrow::Result compact_all() { return shard_manager_->compact_all(); } arrow::Result> get_table( - const std::string &schema_name, size_t chunk_size = 10000) const { + const std::string &schema_name, + TemporalContext *temporal_context = nullptr, + size_t chunk_size = 10000) const { ARROW_ASSIGN_OR_RAISE(auto schema, schema_registry_->get(schema_name)); auto arrow_schema = schema->arrow(); ARROW_ASSIGN_OR_RAISE(auto all_nodes, @@ -813,7 +830,7 @@ class Database { return a->id < b->id; }); - return create_table(schema, all_nodes, chunk_size); + return create_table(schema, all_nodes, chunk_size, temporal_context); } arrow::Result get_shard_count(const std::string &schema_name) const { diff --git a/include/node.hpp b/include/node.hpp index db660c7..c34388a 100644 --- a/include/node.hpp +++ b/include/node.hpp @@ -8,7 +8,9 @@ #include "logger.hpp" #include "node_arena.hpp" +#include "node_view.hpp" #include "schema.hpp" +#include "temporal_context.hpp" #include "types.hpp" namespace tundradb { @@ -84,6 +86,9 @@ class Node { [[nodiscard]] std::shared_ptr get_schema() const { return schema_; } + // Get node handle (for testing and internal use) + [[nodiscard]] NodeHandle *get_handle() const { return handle_.get(); } + [[deprecated]] arrow::Result update(const std::string &field, Value value, UpdateType update_type) { @@ -119,18 +124,45 @@ class Node { const Value &value) { return update(field, value, SET); } + + /** + * Create a temporal view of this node. + * + * @param ctx TemporalContext with snapshot (valid_time, tx_time). + * If nullptr, returns view of current version (no time-travel). + * @return NodeView that resolves version once and caches it. + * + * Usage: + * TemporalContext ctx(TemporalSnapshot::as_of_valid(timestamp)); + * auto view = node.view(&ctx); + * auto age = view.get_value_ptr(age_field); + */ + NodeView view(TemporalContext *ctx = nullptr) { + if (!ctx) { + // No temporal context > use the current version + return {this, handle_->version_info_, arena_.get(), layout_}; + } + + // Resolve version using TemporalContext + VersionInfo *resolved = ctx->resolve_version(id, *handle_); + return {this, resolved, arena_.get(), layout_}; + } }; class NodeManager { public: explicit NodeManager(std::shared_ptr schema_registry, const bool validation_enabled = true, - const bool use_node_arena = true) { + const bool use_node_arena = true, + const bool enable_versioning = false) { validation_enabled_ = validation_enabled; use_node_arena_ = use_node_arena; schema_registry_ = std::move(schema_registry); layout_registry_ = std::make_shared(); - node_arena_ = node_arena_factory::create_free_list_arena(layout_registry_); + // Create arena with versioning enabled if requested + node_arena_ = node_arena_factory::create_free_list_arena( + layout_registry_, NodeArena::kInitialSize, NodeArena::kMinFragmentSize, + enable_versioning); } ~NodeManager() { node_arena_->clear(); } @@ -191,18 +223,25 @@ class NodeManager { if (use_node_arena_) { NodeHandle node_handle = node_arena_->allocate_node(layout_); - // Logger::get_instance().debug("node has been allocated at {}", - // node_handle.ptr); - node_arena_->set_field_value(node_handle, layout_, - schema_->get_field("id"), Value{id}); + + // Initial population of v0: write directly to base node + // Use set_field_value_v0 for all fields (doesn't create versions) + if (!node_arena_->set_field_value_v0( + node_handle, layout_, schema_->get_field("id"), Value{id})) { + return arrow::Status::Invalid("Failed to set id field"); + } + for (const auto &field : schema_->fields()) { if (field->name() == "id") continue; - if (!data.contains(field->name())) { - // Logger::get_instance().debug("{} set NA value", field->name()); - node_arena_->set_field_value(node_handle, layout_, field, Value()); - } else { - const auto value = data.find(field->name())->second; - node_arena_->set_field_value(node_handle, layout_, field, value); + + Value value; + if (data.contains(field->name())) { + value = data.find(field->name())->second; + } // else: Value() = NULL + + if (!node_arena_->set_field_value_v0(node_handle, layout_, field, + value)) { + return arrow::Status::Invalid("Failed to set field ", field->name()); } } @@ -280,6 +319,59 @@ class NodeManager { } }; +// ============================================================================ +// NodeView inline implementations (after Node is fully defined) +// ============================================================================ + +inline arrow::Result NodeView::get_value_ptr( + const std::shared_ptr &field) const { + assert(arena_ != nullptr && "NodeView created with null arena"); + assert(node_ != nullptr && "NodeView created with null node"); + + if (resolved_version_ == nullptr) { + // Non-versioned node -> delegate to Node + return node_->get_value_ptr(field); + } + + const NodeHandle *handle = node_->get_handle(); + assert(handle != nullptr && "Versioned node must have a handle"); + + return arena_->get_field_value_ptr_from_version(*handle, resolved_version_, + layout_, field); +} + +inline arrow::Result NodeView::get_value( + const std::shared_ptr &field) const { + assert(arena_ != nullptr && "NodeView created with null arena"); + assert(node_ != nullptr && "NodeView created with null node"); + + if (resolved_version_ == nullptr) { + // Non-versioned node -> delegate to Node + return node_->get_value(field); + } + + const NodeHandle *handle = node_->get_handle(); + assert(handle != nullptr && "Versioned node must have a handle"); + + return arena_->get_field_value_from_version(*handle, resolved_version_, + layout_, field); +} + +inline bool NodeView::is_visible() const { + assert(arena_ != nullptr && "NodeView created with null arena"); + assert(node_ != nullptr && "NodeView created with null node"); + const NodeHandle *handle = node_->get_handle(); + assert(handle != nullptr && "Node must have a handle"); + + // Non-versioned nodes are always visible + if (!handle->is_versioned()) { + return true; + } + + // For versioned nodes, check if we found a visible version at the snapshot + return resolved_version_ != nullptr; +} + } // namespace tundradb #endif // NODE_HPP \ No newline at end of file diff --git a/include/node_arena.hpp b/include/node_arena.hpp index 7b54022..e91f45f 100644 --- a/include/node_arena.hpp +++ b/include/node_arena.hpp @@ -10,6 +10,7 @@ #include #include +#include "clock.hpp" #include "free_list_arena.hpp" #include "mem_arena.hpp" #include "memory_arena.hpp" @@ -26,13 +27,24 @@ struct NodeHandle; * * Stores only changed fields; forms a linked list via prev pointer. * All versions share the same base node data. + * + * Bitemporal support: + * - valid_from/valid_to: VALIDTIME (when the fact was true in the domain) + * - tx_from/tx_to: TXNTIME (when the database knew about the fact) */ struct VersionInfo { - // Temporal validity interval: [valid_from, valid_to) + // Version identifier uint64_t version_id = 0; + + // VALIDTIME: domain validity interval [valid_from, valid_to) uint64_t valid_from = 0; uint64_t valid_to = std::numeric_limits::max(); + // TXNTIME: system knowledge interval [tx_from, tx_to) + // When the database recorded/believed this version + uint64_t tx_from = 0; + uint64_t tx_to = std::numeric_limits::max(); + // Linked list to previous version VersionInfo* prev = nullptr; @@ -45,14 +57,38 @@ struct VersionInfo { VersionInfo() = default; + // Constructor: initializes both valid and tx times to the same value VersionInfo(uint64_t vid, uint64_t ts_from, VersionInfo* prev_ver = nullptr) - : version_id(vid), valid_from(ts_from), prev(prev_ver) {} + : version_id(vid), + valid_from(ts_from), + tx_from(ts_from), // Initially tx_from = valid_from + prev(prev_ver) {} + // Check if valid at a specific VALIDTIME bool is_valid_at(uint64_t ts) const { return valid_from <= ts && ts < valid_to; } - // O(N) + // Check if visible at a bitemporal snapshot (valid_time, tx_time) + bool is_visible_at(uint64_t valid_time, uint64_t tx_time) const { + return (valid_from <= valid_time && valid_time < valid_to) && + (tx_from <= tx_time && tx_time < tx_to); + } + + // Find version visible at bitemporal snapshot + const VersionInfo* find_version_at_snapshot(uint64_t valid_time, + uint64_t tx_time) const { + const VersionInfo* current = this; + while (current != nullptr) { + if (current->is_visible_at(valid_time, tx_time)) { + return current; + } + current = current->prev; + } + return nullptr; + } + + // Legacy: find version at VALIDTIME only (ignores TXNTIME) const VersionInfo* find_version_at_time(uint64_t ts) const { const VersionInfo* current = this; while (current != nullptr) { @@ -277,6 +313,10 @@ struct NodeHandle { */ class NodeArena { public: + // consts + static constexpr size_t kInitialSize = 2 * 1024 * 1024; // 2MB default + static constexpr size_t kMinFragmentSize = 64; // 64 bytes minimum fragment + /** * Constructor takes any MemArena implementation + StringArena for strings. * @@ -481,7 +521,7 @@ class NodeArena { const std::vector>& field_updates) { if (field_updates.empty()) return true; - // Non-versioned: update each field directly + // Non-versioned: write directly to base node if (!versioning_enabled_ || !current_handle.is_versioned()) { for (const auto& [field_idx, value] : field_updates) { if (field_idx >= layout->get_fields().size()) { @@ -511,20 +551,52 @@ class NodeArena { VersionInfo* new_version_info = new (version_info_memory) VersionInfo(new_version_id, now, old_version_info); - // Process each field update + // ======================================================================== + // BATCH ALLOCATION: Calculate total memory needed for all fields + // ======================================================================== + size_t total_size = 0; + size_t max_alignment = 1; + + // First pass: calculate total size and max alignment for (const auto& [field_idx, new_value] : field_updates) { if (field_idx >= layout->get_fields().size()) { return arrow::Status::IndexError("Field index out of bounds"); } + if (!new_value.is_null()) { // NULL uses nullptr sentinel (no allocation) + const FieldLayout& field_layout = layout->get_fields()[field_idx]; + total_size += field_layout.size; + max_alignment = std::max(max_alignment, field_layout.alignment); + } + } + + // Batch allocate memory for all non-null fields + char* batch_memory = nullptr; + if (total_size > 0) { + batch_memory = static_cast( + version_arena_->allocate(total_size, max_alignment)); + if (!batch_memory) { + return arrow::Status::OutOfMemory( + "Failed to batch allocate field storage"); + } + } + + // Second pass: write values and assign pointers + size_t offset = 0; + for (const auto& [field_idx, new_value] : field_updates) { const FieldLayout& field_layout = layout->get_fields()[field_idx]; // Handle NULL: use nullptr sentinel if (new_value.is_null()) { new_version_info->updated_fields[field_idx] = nullptr; - continue; + continue; // Skip batch_memory usage for NULL fields } + // At this point, field is non-NULL, so batch_memory must be allocated + // (because total_size > 0 when any field is non-NULL) + assert(batch_memory != nullptr && + "Batch memory must be allocated for non-null fields"); + // Prepare value (convert strings to StringRef) Value storage_value = new_value; if (new_value.type() == ValueType::STRING) { @@ -533,12 +605,9 @@ class NodeArena { storage_value = Value{str_ref, field_layout.type}; } - // Allocate and write field value - char* field_storage = static_cast( - version_arena_->allocate(field_layout.size, field_layout.alignment)); - if (!field_storage) { - return arrow::Status::OutOfMemory("Failed to allocate field storage"); - } + // Use batch-allocated memory (safe because batch_memory != nullptr here) + char* field_storage = batch_memory + offset; + offset += field_layout.size; if (!write_value_to_memory(field_storage, field_layout.type, storage_value)) { @@ -655,12 +724,71 @@ class NodeArena { return version_counter_.load(std::memory_order_relaxed); } + /** + * Get the field value pointer starting from a specific version. + * Used by NodeView for temporal queries. + * + * @param handle NodeHandle (for accessing base node if needed) + * @param version Starting version (pre-resolved by TemporalContext) + * @param layout Schema layout + * @param field Field to read + * @return Pointer to field data or error if not found + */ + static const char* get_field_value_ptr_from_version( + const NodeHandle& handle, const VersionInfo* version, + const std::shared_ptr& layout, + const std::shared_ptr& field) { + const FieldLayout* field_layout = layout->get_field_layout(field); + if (!field_layout) { + return nullptr; + } + + auto [found, field_ptr] = + get_field_ptr_from_version_chain(version, field_layout->index); + + if (found) { + return field_ptr; + } + + // Not in version chain, read from base node + return layout->get_field_value_ptr(static_cast(handle.ptr), + field_layout->index); + } + + /** + * Get field value starting from a specific version. + * Used by NodeView for temporal queries. + */ + static arrow::Result get_field_value_from_version( + const NodeHandle& handle, const VersionInfo* version, + const std::shared_ptr& layout, + const std::shared_ptr& field) { + const FieldLayout* field_layout = layout->get_field_layout(field); + if (!field_layout) { + return arrow::Status::KeyError("Field not found in layout"); + } + + // Try to find in version chain first + auto [found, field_ptr] = + get_field_ptr_from_version_chain(version, field_layout->index); + + if (found) { + if (field_ptr == nullptr) { + // Explicit NULL value + return Value{}; + } + // Read value from version chain + return layout->get_field_value_from_ptr(field_ptr, *field_layout); + } + + // Not in version chain, read from base node + return layout->get_field_value(static_cast(handle.ptr), + *field_layout); + } + private: static uint64_t get_current_timestamp_ns() { - auto now = std::chrono::system_clock::now(); - auto duration = now.time_since_epoch(); - return std::chrono::duration_cast(duration) - .count(); + return Clock::instance().now_nanos(); } /** Write field directly to node memory (handles strings). */ @@ -698,22 +826,27 @@ class NodeArena { } /** Traverse the version chain to find field pointer. */ - static const char* get_field_ptr_from_version_chain( - const VersionInfo* version_info, uint16_t field_idx, - const SchemaLayout* layout) { + /** + * Get field pointer from version chain. + * Returns pair: + * - {true, nullptr} = field found and is explicitly NULL + * - {true, ptr} = field found with value at ptr + * - {false, nullptr} = field not found in version chain (read from base) + */ + static std::pair get_field_ptr_from_version_chain( + const VersionInfo* version_info, uint16_t field_idx) { const VersionInfo* current = version_info; while (current != nullptr) { // Check if this version has an override for this field if (auto it = current->updated_fields.find(field_idx); it != current->updated_fields.end()) { - return it->second; + return {true, it->second}; // Found (value or nullptr for NULL) } current = current->prev; } - // Not found in any version, would need to read from base node - // (caller should handle this case) - return nullptr; + // Not found in any version - read from base node + return {false, nullptr}; } /** Write value to memory (type-safe). */ @@ -769,7 +902,7 @@ namespace node_arena_factory { /** Create NodeArena with MemoryArena (fast, no individual deallocation). */ inline std::unique_ptr create_simple_arena( const std::shared_ptr& layout_registry, - size_t initial_size = 2 * 1024 * 1024, // 2MB default + size_t initial_size = NodeArena::kInitialSize, bool enable_versioning = false) { auto mem_arena = std::make_unique(initial_size); return std::make_unique(std::move(mem_arena), layout_registry, @@ -779,8 +912,8 @@ inline std::unique_ptr create_simple_arena( /** Create NodeArena with FreeListArena (supports individual deallocation). */ inline std::unique_ptr create_free_list_arena( const std::shared_ptr& layout_registry, - size_t initial_size = 2 * 1024 * 1024, // 2MB default - size_t min_fragment_size = 64, // 64 bytes minimum fragment + size_t initial_size = NodeArena::kInitialSize, + size_t min_fragment_size = NodeArena::kMinFragmentSize, bool enable_versioning = false) { auto mem_arena = std::make_unique(initial_size, min_fragment_size); diff --git a/include/node_view.hpp b/include/node_view.hpp new file mode 100644 index 0000000..23eee3b --- /dev/null +++ b/include/node_view.hpp @@ -0,0 +1,113 @@ +#ifndef NODE_VIEW_HPP +#define NODE_VIEW_HPP + +#include + +#include "node_arena.hpp" +#include "schema.hpp" +#include "temporal_context.hpp" +#include "types.hpp" + +namespace tundradb { + +// Forward declarations +class Node; + +/** + * NodeView: lightweight view of a Node at a specific temporal snapshot. + * + * Purpose: + * - Provides the same field access interface as Node + * - Resolves version once (at construction), then uses it for all field reads + * - Avoids per-field map lookups in TemporalContext + * + * Lifecycle: + * - Created by Node::view(TemporalContext*) + * - Should be short-lived (lifetime tied to query execution) + * + * Usage: + * TemporalContext ctx(TemporalSnapshot::as_of_valid(timestamp)); + * auto view = node->view(&ctx); + * auto age = view.get_value_ptr(age_field); // Uses resolved version + */ +class NodeView { + private: + Node* node_; // Back-reference to Node (for node_id, etc.) + VersionInfo* resolved_version_; // Resolved once at construction + NodeArena* arena_; // For field resolution + std::shared_ptr + layout_; // Schema layout (shared_ptr for proper lifetime) + + public: + /** + * Constructor: resolves version immediately. + * + * If ctx is nullptr, uses current version (no time-travel). + * If no version is visible at snapshot, resolved_version_ will be nullptr. + */ + NodeView(Node* node, VersionInfo* resolved_version, NodeArena* arena, + std::shared_ptr layout) + : node_(node), + resolved_version_(resolved_version), + arena_(arena), + layout_(std::move(layout)) {} + + /** + * Get field value pointer (same interface as Node). + * + * If resolved_version_ is nullptr: + * - Node is non-versioned -> read from base node via Node::get_value_ptr + * + * Otherwise: + * - Uses arena to resolve field from version chain + * - Starts from resolved_version_ (already filtered by time) + */ + arrow::Result get_value_ptr( + const std::shared_ptr& field) const; + + /** + * Get field value as ValueRef (lightweight reference to data). + */ + arrow::Result get_value_ref( + const std::shared_ptr& field) const { + auto ptr_result = get_value_ptr(field); + if (!ptr_result.ok()) { + return ptr_result.status(); + } + + return ValueRef{ptr_result.ValueOrDie(), field->type()}; + } + + /** + * Get field value (copies data into Value). + */ + arrow::Result get_value(const std::shared_ptr& field) const; + + /** + * Check if this view represents a visible node. + * + * Returns true if: + * - Node is non-versioned (resolved_version_ == nullptr && arena exists) + * - Node is versioned and has a visible version at the snapshot + * + * Returns false if: + * - Node is versioned but didn't exist at the temporal snapshot + */ + [[nodiscard]] bool is_visible() const; + + /** + * Get the resolved version info. + */ + [[nodiscard]] const VersionInfo* get_resolved_version() const { + return resolved_version_; + } + + /** + * Get the underlying node. + */ + [[nodiscard]] Node* get_node() const { return node_; } +}; + +} // namespace tundradb + +#endif // NODE_VIEW_HPP diff --git a/include/query.hpp b/include/query.hpp index a84e462..709c932 100644 --- a/include/query.hpp +++ b/include/query.hpp @@ -14,6 +14,7 @@ #include "node.hpp" #include "schema.hpp" +#include "temporal_context.hpp" #include "types.hpp" namespace tundradb { @@ -809,16 +810,19 @@ class Query { std::shared_ptr select, bool optimize_where, - ExecutionConfig execution_config) + ExecutionConfig execution_config, + std::optional temporal_snapshot = std::nullopt) : from_(std::move(from)), clauses_(std::move(clauses)), select_(std::move(select)), inline_where_(optimize_where), - execution_config_(execution_config) {} + execution_config_(execution_config), + temporal_snapshot_(std::move(temporal_snapshot)) {} class Builder; [[nodiscard]] const SchemaRef& from() const { return from_; } @@ -834,6 +838,11 @@ class Query { return execution_config_; } + [[nodiscard]] const std::optional& temporal_snapshot() + const { + return temporal_snapshot_; + } + static Builder from(const std::string& schema) { return Builder(schema); } class Builder { @@ -843,6 +852,7 @@ class Query { std::shared_ptr