From c75636224bbb0a6d6aed0a6a211aacc35e982841 Mon Sep 17 00:00:00 2001 From: Oleksii Skorykh Date: Thu, 18 Dec 2025 11:56:33 +0100 Subject: [PATCH] Trim repository to in-memory article scope --- README.md | 22 +- cmd/godb-server/main.go | 22 +- internal/engine/article4_select_test.go | 105 ++ internal/engine/engine_execute.go | 4 - internal/index/btree/README.md | 30 - internal/index/btree/btree_test.go | 345 ----- internal/index/btree/file.go | 1217 ----------------- internal/index/btree/index.go | 35 - internal/index/btree/manager.go | 73 - internal/index/btree/page.go | 77 -- internal/index/btree/types.go | 8 - internal/sql/ast.go | 7 - internal/sql/parse_index.go | 30 - internal/sql/parser.go | 4 +- internal/storage/README.md | 14 +- internal/storage/filestore/README.md | 103 -- internal/storage/filestore/filestore.go | 321 ----- .../filestore/filestore_recovery_test.go | 416 ------ internal/storage/filestore/filestore_test.go | 368 ----- internal/storage/filestore/format.go | 309 ----- internal/storage/filestore/page.go | 224 --- internal/storage/filestore/page_test.go | 150 -- internal/storage/filestore/recovery.go | 370 ----- internal/storage/filestore/tx.go | 628 --------- internal/storage/filestore/wal.go | 258 ---- internal/storage/filestore/wal_test.go | 86 -- internal/storage/memstore/memstore.go | 136 +- internal/storage/memstore/memstore_test.go | 76 - internal/storage/storage.go | 3 - 29 files changed, 128 insertions(+), 5313 deletions(-) create mode 100644 internal/engine/article4_select_test.go delete mode 100644 internal/index/btree/README.md delete mode 100644 internal/index/btree/btree_test.go delete mode 100644 internal/index/btree/file.go delete mode 100644 internal/index/btree/index.go delete mode 100644 internal/index/btree/manager.go delete mode 100644 internal/index/btree/page.go delete mode 100644 internal/index/btree/types.go delete mode 100644 internal/sql/parse_index.go delete mode 100644 internal/storage/filestore/README.md delete mode 100644 internal/storage/filestore/filestore.go delete mode 100644 internal/storage/filestore/filestore_recovery_test.go delete mode 100644 internal/storage/filestore/filestore_test.go delete mode 100644 internal/storage/filestore/format.go delete mode 100644 internal/storage/filestore/page.go delete mode 100644 internal/storage/filestore/page_test.go delete mode 100644 internal/storage/filestore/recovery.go delete mode 100644 internal/storage/filestore/tx.go delete mode 100644 internal/storage/filestore/wal.go delete mode 100644 internal/storage/filestore/wal_test.go diff --git a/README.md b/README.md index 88642a5..1d17293 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,7 @@ GoDB is a tiny educational database engine written in Go. It exists as a playgro ## Features -- Pluggable storage engines: - - In-memory store for quick experimentation - - Experimental on-disk filestore with a simple WAL (write-ahead log) +- In-memory storage engine for quick experimentation - Simple SQL support: - `CREATE TABLE` - `INSERT INTO ... VALUES (...)` @@ -34,7 +32,7 @@ GoDB is a tiny educational database engine written in Go. It exists as a playgro git clone https://github.com/askorykh/godb.git cd godb -# Run the REPL server (creates ./data when using the filestore) +# Run the REPL server (in-memory storage) go run ./cmd/godb-server ``` @@ -50,15 +48,13 @@ COMMIT; ``` -### Storage backends +### Storage backend -By default the REPL wires the engine to the on-disk filestore located in `./data`. It uses a straightforward file format and an append-only WAL for durability. On startup, the filestore replays committed WAL entries to rebuild table files. Rollbacks still only cancel the in-memory engine transaction—the on-disk table files are not reverted yet. See [`internal/storage/filestore/README.md`](internal/storage/filestore/README.md) for details. - -If you want a pure in-memory experience (no files written), switch to the `memstore` engine inside `cmd/godb-server/main.go` by swapping the initialization block. +The REPL uses the in-memory storage engine to match the article walkthroughs and keep the footprint tiny. ### Transactions -The engine understands `BEGIN`, `COMMIT`, and `ROLLBACK` to group multiple statements. Transactions are executed against the configured storage backend. With the default filestore backend, commits fsync the WAL before returning; rollbacks do not undo writes on disk yet, but committed WAL entries are replayed on startup. +The engine understands `BEGIN`, `COMMIT`, and `ROLLBACK` to group multiple statements. Transactions are executed against the configured storage backend. In the in-memory backend used for the articles, commit simply swaps the staged tables into place and rollback is a no-op. ## Running tests @@ -75,10 +71,7 @@ internal/ engine/ # DB engine, execution planner, and simple evaluator sql/ # SQL parser and AST definitions storage/ - filestore/ # On-disk storage with WAL and recovery memstore/ # In-memory storage implementation - index/ - btree/ # WIP B-tree index structures used by the filestore ``` ## Architecture @@ -88,20 +81,15 @@ graph TD; REPL --> Parser[SQL parser]; Parser --> Engine[Execution engine]; Engine --> Storage[Storage interface]; - Storage --> Filestore[On-disk store + WAL]; Storage --> Memstore[In-memory store]; ``` - `cmd/godb-server` reads input, handles meta commands, and forwards SQL to the engine. - `internal/sql` parses SQL into AST nodes and validates column types. - `internal/engine` executes statements (create, insert, select, update, delete) against the storage implementation. -- `internal/storage/filestore` provides the default on-disk storage layer with WAL and recovery. - `internal/storage/memstore` provides an in-memory table storage layer used for testing/experiments. - ## Roadmap (very rough) -- Improve on-disk storage (rollback/undo, durability tests, compaction) - Better query planner / optimizer -- Indexes integrated into query execution - Richer SQL surface and multi-statement transaction semantics - Maybe: distributed experiments later diff --git a/cmd/godb-server/main.go b/cmd/godb-server/main.go index 0779408..fc3fe84 100644 --- a/cmd/godb-server/main.go +++ b/cmd/godb-server/main.go @@ -4,7 +4,7 @@ import ( "bufio" "errors" "fmt" - "goDB/internal/storage/filestore" + "goDB/internal/storage/memstore" "io" "log" @@ -17,21 +17,15 @@ import ( func main() { fmt.Println("GoDB server starting (REPL mode)…") - // choose storage implementation - // mem := memstore.New() - // eng := engine.New(mem) - - fs, err := filestore.New("./data") - if err != nil { - log.Fatalf("failed to init filestore: %v", err) - } - eng := engine.New(fs) + // choose storage implementation (in-memory for the article code) + mem := memstore.New() + eng := engine.New(mem) if err := eng.Start(); err != nil { log.Fatalf("engine start failed: %v", err) } - fmt.Println("Engine started successfully (using on-disk filestore at ./data).") + fmt.Println("Engine started successfully (using in-memory storage).") fmt.Println("Type SQL statements like:") fmt.Println(" CREATE TABLE users (id INT, name STRING, active BOOL);") fmt.Println(" INSERT INTO users VALUES (1, 'Alice', true);") @@ -124,9 +118,11 @@ func handleMetaCommand(line string, eng *engine.DBEngine) bool { fmt.Println() fmt.Println(" SELECT * FROM tableName;") fmt.Println(" SELECT col1, col2, ... FROM tableName;") - fmt.Println(" SELECT col1, col2 FROM tableName WHERE column = literal;") - fmt.Println(" - WHERE: supports only equality (=)") + fmt.Println(" SELECT col1, col2 FROM tableName WHERE column literal;") + fmt.Println(" - WHERE comparisons: =, !=, <, <=, >, >=") fmt.Println(" - WHERE literals: INT, FLOAT, STRING ('text'), BOOL") + fmt.Println(" - ORDER BY column [ASC|DESC]") + fmt.Println(" - LIMIT n") fmt.Println() fmt.Println("Meta commands:") fmt.Println(" .tables List available tables") diff --git a/internal/engine/article4_select_test.go b/internal/engine/article4_select_test.go new file mode 100644 index 0000000..b6bc2fc --- /dev/null +++ b/internal/engine/article4_select_test.go @@ -0,0 +1,105 @@ +package engine + +import ( + "testing" + + "goDB/internal/sql" + "goDB/internal/storage/memstore" +) + +// Article 4 coverage: projection, WHERE filtering, comparisons, ORDER BY, and LIMIT. +func TestArticle4_SelectFeatures(t *testing.T) { + store := memstore.New() + eng := New(store) + + if err := eng.Start(); err != nil { + t.Fatalf("Start failed: %v", err) + } + + createSQL := "CREATE TABLE users (id INT, name STRING, age INT);" + stmt, err := sql.Parse(createSQL) + if err != nil { + t.Fatalf("Parse CREATE failed: %v", err) + } + if _, _, err := eng.Execute(stmt); err != nil { + t.Fatalf("Execute CREATE failed: %v", err) + } + + inserts := []string{ + "INSERT INTO users VALUES (1, 'Ada', 30);", + "INSERT INTO users VALUES (2, 'Bea', 18);", + "INSERT INTO users VALUES (3, 'Cara', 21);", + "INSERT INTO users VALUES (4, 'Drew', 16);", + "INSERT INTO users VALUES (5, 'Eli', 22);", + } + for _, q := range inserts { + stmt, err := sql.Parse(q) + if err != nil { + t.Fatalf("Parse INSERT failed for %q: %v", q, err) + } + if _, _, err := eng.Execute(stmt); err != nil { + t.Fatalf("Execute INSERT failed for %q: %v", q, err) + } + } + + // Projection + WHERE with ">" + ORDER BY + LIMIT + selectSQL := "SELECT name, age FROM users WHERE age > 18 ORDER BY age LIMIT 3;" + selStmt, err := sql.Parse(selectSQL) + if err != nil { + t.Fatalf("Parse SELECT failed: %v", err) + } + + cols, rows, err := eng.Execute(selStmt) + if err != nil { + t.Fatalf("Execute SELECT failed: %v", err) + } + + if len(cols) != 2 || cols[0] != "name" || cols[1] != "age" { + t.Fatalf("unexpected projection: %#v", cols) + } + + if len(rows) != 3 { + t.Fatalf("expected 3 rows after LIMIT, got %d", len(rows)) + } + + gotNames := []string{rows[0][0].S, rows[1][0].S, rows[2][0].S} + want := []string{"Cara", "Eli", "Ada"} // ages 21, 22, 30 (ordered asc) + for i := range want { + if gotNames[i] != want[i] { + t.Fatalf("unexpected order at %d: got %q want %q (names=%v)", i, gotNames[i], want[i], gotNames) + } + } + + // WHERE with equality + eqSQL := "SELECT id FROM users WHERE age = 18;" + eqStmt, err := sql.Parse(eqSQL) + if err != nil { + t.Fatalf("Parse SELECT (=) failed: %v", err) + } + cols, rows, err = eng.Execute(eqStmt) + if err != nil { + t.Fatalf("Execute SELECT (=) failed: %v", err) + } + + if len(rows) != 1 || rows[0][0].I64 != 2 { + t.Fatalf("unexpected equality results: cols=%v rows=%v", cols, rows) + } + + // WHERE with "<" and ORDER BY to keep determinism + ltSQL := "SELECT name FROM users WHERE age < 18 ORDER BY name;" + ltStmt, err := sql.Parse(ltSQL) + if err != nil { + t.Fatalf("Parse SELECT (<) failed: %v", err) + } + cols, rows, err = eng.Execute(ltStmt) + if err != nil { + t.Fatalf("Execute SELECT (<) failed: %v", err) + } + + if len(cols) != 1 || cols[0] != "name" { + t.Fatalf("unexpected projection for < query: %v", cols) + } + if len(rows) != 1 || rows[0][0].S != "Drew" { + t.Fatalf("unexpected < query results: rows=%v", rows) + } +} diff --git a/internal/engine/engine_execute.go b/internal/engine/engine_execute.go index a44fb80..96c676c 100644 --- a/internal/engine/engine_execute.go +++ b/internal/engine/engine_execute.go @@ -24,10 +24,6 @@ func (e *DBEngine) Execute(stmt sql.Statement) ([]string, []sql.Row, error) { err := e.CreateTable(s.TableName, s.Columns) return nil, nil, err - case *sql.CreateIndexStmt: - err := e.store.CreateIndex(s.IndexName, s.TableName, s.ColumnName) - return nil, nil, err - case *sql.InsertStmt: return nil, nil, e.executeInsert(s) diff --git a/internal/index/btree/README.md b/internal/index/btree/README.md deleted file mode 100644 index 86548ff..0000000 --- a/internal/index/btree/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# B-Tree index implementation - -This package implements a simple on-disk B-Tree used by the storage layer for -single-column integer indexes. - -## File layout - -- Every index lives in its own file with magic header `BTREE1` followed by a - root page ID and page count. -- Pages are 4KB and come in two flavors: - - **Leaf pages (type 1):** sorted `[key, RID]` pairs. Each entry stores the - indexed `int64` key plus the `(pageID, slotID)` of the row inside the - filestore heap page. - - **Internal pages (type 2):** child pointers interleaved with separator keys - to guide navigation toward leaves. - -See [`page.go`](page.go) for the exact header and slot encoding details used by -both page types. - -## Manager and API - -- `Manager` (in [`manager.go`](manager.go)) caches open indexes and materializes - filenames using the `table_column.idx` convention inside the database - directory. -- `Index` (defined in [`index.go`](index.go)) supports `Insert`, `Search`, and - deletion operations. The current implementation focuses on inserts and lookups; - delete paths are still marked TODO in `file.go`. - -Index pages are split on insert when they run out of space, propagating new -separator keys upward and creating new roots as needed. diff --git a/internal/index/btree/btree_test.go b/internal/index/btree/btree_test.go deleted file mode 100644 index d26f356..0000000 --- a/internal/index/btree/btree_test.go +++ /dev/null @@ -1,345 +0,0 @@ -package btree - -import ( - "path/filepath" - "testing" -) - -func TestLeafInsertAndSearch(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, "idx.idx") - - idxIface, err := OpenFileIndex(path, Meta{TableName: "t", Column: "id"}) - if err != nil { - t.Fatalf("OpenFileIndex failed: %v", err) - } - idx := idxIface.(*fileIndex) - defer idx.Close() - - rid := RID{PageID: 1, SlotID: 10} - if err := idx.Insert(42, rid); err != nil { - t.Fatalf("Insert failed: %v", err) - } - - got, err := idx.Search(42) - if err != nil { - t.Fatalf("Search failed: %v", err) - } - if len(got) != 1 { - t.Fatalf("expected 1 RID, got %d", len(got)) - } - if got[0] != rid { - t.Fatalf("RID mismatch: got %+v, want %+v", got[0], rid) - } -} - -func TestLeafInsertOrderAndDuplicates(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, "idx.idx") - - idxIface, err := OpenFileIndex(path, Meta{TableName: "t", Column: "id"}) - if err != nil { - t.Fatalf("OpenFileIndex failed: %v", err) - } - idx := idxIface.(*fileIndex) - defer idx.Close() - - // Insert out of order + duplicates - rids := []RID{ - {PageID: 1, SlotID: 1}, - {PageID: 1, SlotID: 2}, - {PageID: 1, SlotID: 3}, - } - _ = idx.Insert(50, rids[0]) - _ = idx.Insert(10, rids[1]) - _ = idx.Insert(50, rids[2]) - - // Check duplicates for 50 - got, err := idx.Search(50) - if err != nil { - t.Fatalf("Search failed: %v", err) - } - if len(got) != 2 { - t.Fatalf("expected 2 RIDs for key 50, got %d", len(got)) - } -} - -func TestLeafSplitMaintainsRIDPairs(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, "idx.idx") - - idxIface, err := OpenFileIndex(path, Meta{TableName: "t", Column: "id"}) - if err != nil { - t.Fatalf("OpenFileIndex failed: %v", err) - } - idx := idxIface.(*fileIndex) - defer idx.Close() - - // Fill a leaf to capacity to force a split. - total := maxLeafKeys + 1 - for i := 0; i < total; i++ { - rid := RID{PageID: uint32(i + 1), SlotID: uint16(i)} - if err := idx.Insert(Key(i), rid); err != nil { - t.Fatalf("Insert %d failed: %v", i, err) - } - } - - // Ensure searches return the matching RID after the split. - checkKeys := []int{0, int(total / 2), total - 1} - for _, k := range checkKeys { - got, err := idx.Search(Key(k)) - if err != nil { - t.Fatalf("Search %d failed: %v", k, err) - } - if len(got) != 1 { - t.Fatalf("expected 1 RID for key %d, got %d", k, len(got)) - } - want := RID{PageID: uint32(k + 1), SlotID: uint16(k)} - if got[0] != want { - t.Fatalf("RID mismatch for key %d: got %+v, want %+v", k, got[0], want) - } - } -} - -func TestLeafSplitCreatesNewRootInternal(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, "idx.idx") - - idxIface, err := OpenFileIndex(path, Meta{TableName: "t", Column: "id"}) - if err != nil { - t.Fatalf("OpenFileIndex failed: %v", err) - } - idx := idxIface.(*fileIndex) - defer idx.Close() - - // Fill a leaf to capacity to force a split and create a new root internal node. - total := maxLeafKeys + 1 - for i := 0; i < total; i++ { - rid := RID{PageID: uint32(i + 1), SlotID: uint16(i)} - if err := idx.Insert(Key(i), rid); err != nil { - t.Fatalf("Insert %d failed: %v", i, err) - } - } - - rootPage, err := idx.readPage(idx.rootPageID) - if err != nil { - t.Fatalf("read root failed: %v", err) - } - rh := readPageHeader(rootPage) - if rh.PageType != PageTypeInternal { - t.Fatalf("root type = %d, want internal", rh.PageType) - } - if rh.NumKeys != 1 { - t.Fatalf("root NumKeys = %d, want 1", rh.NumKeys) - } - - children, keys, err := internalReadAll(rootPage, rh) - if err != nil { - t.Fatalf("internalReadAll failed: %v", err) - } - if len(children) != 2 { - t.Fatalf("expected 2 children, got %d", len(children)) - } - sep := keys[0] - if sep != Key(total/2) { - t.Fatalf("separator key = %d, want %d", sep, Key(total/2)) - } - - // Validate child leaf counts - leftPage, err := idx.readPage(children[0]) - if err != nil { - t.Fatalf("read left child failed: %v", err) - } - rightPage, err := idx.readPage(children[1]) - if err != nil { - t.Fatalf("read right child failed: %v", err) - } - - lh := readPageHeader(leftPage) - rhh := readPageHeader(rightPage) - if lh.NumKeys != uint32(total/2) { - t.Fatalf("left leaf keys = %d, want %d", lh.NumKeys, total/2) - } - if rhh.NumKeys != uint32(total/2) { - t.Fatalf("right leaf keys = %d, want %d", rhh.NumKeys, total/2) - } -} - -func TestInternalSplitGrowsTreeHeight(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, "idx.idx") - - idxIface, err := OpenFileIndex(path, Meta{TableName: "t", Column: "id"}) - if err != nil { - t.Fatalf("OpenFileIndex failed: %v", err) - } - idx := idxIface.(*fileIndex) - defer idx.Close() - - // Insert enough keys to overflow the first internal root and force a higher-level split. - total := (maxInternalKeys+1)*maxLeafKeys + 1 - for i := 0; i < total; i++ { - rid := RID{PageID: uint32(i + 1), SlotID: uint16(i)} - if err := idx.Insert(Key(i), rid); err != nil { - t.Fatalf("Insert %d failed: %v", i, err) - } - } - - rootPage, err := idx.readPage(idx.rootPageID) - if err != nil { - t.Fatalf("read root failed: %v", err) - } - rh := readPageHeader(rootPage) - if rh.PageType != PageTypeInternal { - t.Fatalf("root type = %d, want internal", rh.PageType) - } - - rootChildren, rootKeys, err := internalReadAll(rootPage, rh) - if err != nil { - t.Fatalf("internalReadAll failed: %v", err) - } - if len(rootChildren) < 2 { - t.Fatalf("expected root to have at least 2 children after split, got %d", len(rootChildren)) - } - if rh.NumKeys == 1 { - // Ideal case: single promoted key with two children. - } else if rh.NumKeys > uint32(maxInternalKeys) { - t.Fatalf("root NumKeys = %d exceeds max %d", rh.NumKeys, maxInternalKeys) - } - - // Children of the new root should be internal nodes (tree height = 3). - leftChild, err := idx.readPage(rootChildren[0]) - if err != nil { - t.Fatalf("read left child failed: %v", err) - } - rightChild, err := idx.readPage(rootChildren[1]) - if err != nil { - t.Fatalf("read right child failed: %v", err) - } - - if lh := readPageHeader(leftChild); lh.PageType != PageTypeInternal { - t.Fatalf("left child type = %d, want internal", lh.PageType) - } - if rhh := readPageHeader(rightChild); rhh.PageType != PageTypeInternal { - t.Fatalf("right child type = %d, want internal", rhh.PageType) - } - - // The promoted separator should fall within the inserted key range. - if sep := rootKeys[0]; sep <= 0 || sep >= Key(total-1) { - t.Fatalf("separator key %d outside expected range", sep) - } - - // Spot-check searches across the tree height. - checkKeys := []int{0, total / 3, total - 1} - for _, k := range checkKeys { - got, err := idx.Search(Key(k)) - if err != nil { - t.Fatalf("Search %d failed: %v", k, err) - } - if len(got) != 1 { - t.Fatalf("expected 1 RID for key %d, got %d", k, len(got)) - } - want := RID{PageID: uint32(k + 1), SlotID: uint16(k)} - if got[0] != want { - t.Fatalf("RID mismatch for key %d: got %+v, want %+v", k, got[0], want) - } - } -} - -func TestDeleteRemovesRIDAndBorrows(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, "idx.idx") - - idxIface, err := OpenFileIndex(path, Meta{TableName: "t", Column: "id"}) - if err != nil { - t.Fatalf("OpenFileIndex failed: %v", err) - } - idx := idxIface.(*fileIndex) - defer idx.Close() - - total := maxLeafKeys + 1 // force split into two leaves - for i := 0; i < total; i++ { - rid := RID{PageID: uint32(i + 1), SlotID: uint16(i)} - if err := idx.Insert(Key(i), rid); err != nil { - t.Fatalf("Insert %d failed: %v", i, err) - } - } - - // Delete a couple from the leftmost leaf to trigger a borrow from the right sibling. - for _, k := range []Key{0, 1} { - if err := idx.Delete(k, RID{PageID: uint32(k + 1), SlotID: uint16(k)}); err != nil { - t.Fatalf("Delete %d failed: %v", k, err) - } - if got, _ := idx.Search(k); len(got) != 0 { - t.Fatalf("expected key %d to be removed", k) - } - } - - rootPage, err := idx.readPage(idx.rootPageID) - if err != nil { - t.Fatalf("read root failed: %v", err) - } - rh := readPageHeader(rootPage) - if rh.PageType != PageTypeInternal { - t.Fatalf("root type = %d, want internal", rh.PageType) - } - children, keys, err := internalReadAll(rootPage, rh) - if err != nil { - t.Fatalf("internalReadAll failed: %v", err) - } - if len(children) != 2 { - t.Fatalf("expected 2 children after borrow, got %d", len(children)) - } - - leftPage, _ := idx.readPage(children[0]) - rightPage, _ := idx.readPage(children[1]) - if lh := readPageHeader(leftPage); lh.NumKeys != uint32(minLeafKeys) { - t.Fatalf("left leaf keys = %d, want %d after borrow", lh.NumKeys, minLeafKeys) - } - if rhh := readPageHeader(rightPage); rhh.NumKeys != uint32(minLeafKeys) { - t.Fatalf("right leaf keys = %d, want %d after borrow", rhh.NumKeys, minLeafKeys) - } - - if keys[0] != Key(minLeafKeys+2) { - t.Fatalf("separator after borrow = %d, want %d", keys[0], Key(minLeafKeys+2)) - } -} - -func TestDeleteKeyCollapsesRoot(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, "idx.idx") - - idxIface, err := OpenFileIndex(path, Meta{TableName: "t", Column: "id"}) - if err != nil { - t.Fatalf("OpenFileIndex failed: %v", err) - } - idx := idxIface.(*fileIndex) - defer idx.Close() - - total := maxLeafKeys + 1 - for i := 0; i < total; i++ { - rid := RID{PageID: uint32(i + 1), SlotID: uint16(i)} - if err := idx.Insert(Key(i), rid); err != nil { - t.Fatalf("Insert %d failed: %v", i, err) - } - } - - // Remove all keys to force merges up to the root. - for i := 0; i < total; i++ { - if err := idx.DeleteKey(Key(i)); err != nil { - t.Fatalf("DeleteKey %d failed: %v", i, err) - } - } - - rootPage, err := idx.readPage(idx.rootPageID) - if err != nil { - t.Fatalf("read root failed: %v", err) - } - rh := readPageHeader(rootPage) - if rh.PageType != PageTypeLeaf { - t.Fatalf("root type = %d, want leaf after collapse", rh.PageType) - } - if rh.NumKeys != 0 { - t.Fatalf("root NumKeys = %d, want 0 after full deletion", rh.NumKeys) - } -} diff --git a/internal/index/btree/file.go b/internal/index/btree/file.go deleted file mode 100644 index 1c0d98f..0000000 --- a/internal/index/btree/file.go +++ /dev/null @@ -1,1217 +0,0 @@ -package btree - -import ( - "encoding/binary" - "fmt" - "io" - "os" - "sort" -) - -const ( - fileHeaderSize = len(indexFileMagic) + 8 // "BTREE1" + root + pageCount - - leafEntrySize = 16 // 8 bytes key + 8 bytes RID - internalEntrySize = 12 // child(4) + key(8) - - maxLeafKeys = (PageSize - 16) / leafEntrySize - maxInternalKeys = (PageSize - 16 - 4) / internalEntrySize // 4 bytes for initial child0 - - minLeafKeys = maxLeafKeys / 2 - minInternalKeys = maxInternalKeys / 2 -) - -type fileIndex struct { - f *os.File - meta Meta - rootPageID uint32 - pageCount uint32 -} - -// Insert implements Index.Insert for fileIndex (without splits yet). -func (idx *fileIndex) Insert(key Key, rid RID) error { - leafID, leafPage, path, err := idx.findLeafForKeyWithPath(key) - if err != nil { - return err - } - - h := readPageHeader(leafPage) - if h.PageType != PageTypeLeaf { - return fmt.Errorf("btree: Insert: expected leaf, got type %d", h.PageType) - } - n := h.NumKeys - - // Fast path: leaf has room - if n < uint32(maxLeafKeys) { - // Simple sorted insert (linear search + shift) - var pos uint32 - for pos = 0; pos < n; pos++ { - k := leafGetKey(leafPage, pos) - if key < k { - break - } - } - - if pos < n { - moveStart := 16 + int(pos)*leafEntrySize - moveBytes := int(n-pos) * leafEntrySize - copy(leafPage[moveStart+leafEntrySize:moveStart+leafEntrySize+moveBytes], - leafPage[moveStart:moveStart+moveBytes]) - } - - leafSetEntry(leafPage, pos, key, rid) - h.NumKeys = n + 1 - writePageHeader(leafPage, h) - return idx.writePage(leafID, leafPage) - } - - // Leaf is full → split. - keys, rids := leafReadAll(leafPage, h) - - // Add new entry - keys = append(keys, key) - rids = append(rids, rid) - - // Build entries before sorting to keep keys and RIDs paired. - type entry struct { - k Key - r RID - } - entries := make([]entry, len(keys)) - for i := range keys { - entries[i] = entry{k: keys[i], r: rids[i]} - } - // Sort by key (stable so duplicates maintain insert order) - sort.SliceStable(entries, func(i, j int) bool { - return entries[i].k < entries[j].k - }) - - // Compute split point - total := len(entries) - split := total / 2 - - leftEntries := entries[:split] - rightEntries := entries[split:] - - // Overwrite left (existing leaf) - leftKeys := make([]Key, len(leftEntries)) - leftRIDs := make([]RID, len(leftEntries)) - for i, e := range leftEntries { - leftKeys[i] = e.k - leftRIDs[i] = e.r - } - leafWriteAll(leafPage, leftKeys, leftRIDs) - if err := idx.writePage(leafID, leafPage); err != nil { - return err - } - - // Create right leaf - rightID, rightPage, err := idx.allocPage(PageTypeLeaf) - if err != nil { - return err - } - rightKeys := make([]Key, len(rightEntries)) - rightRIDs := make([]RID, len(rightEntries)) - for i, e := range rightEntries { - rightKeys[i] = e.k - rightRIDs[i] = e.r - } - leafWriteAll(rightPage, rightKeys, rightRIDs) - if err := idx.writePage(rightID, rightPage); err != nil { - return err - } - - // Separator key is first key of right leaf - sepKey := rightKeys[0] - - // Insert separator into parent (may create new root). - if err := idx.insertIntoParent(leafID, rightID, sepKey, path); err != nil { - return err - } - - return nil -} - -func (idx *fileIndex) Delete(key Key, rid RID) error { - leafID, leafPage, path, err := idx.findLeafForKeyWithPath(key) - if err != nil { - return err - } - - h := readPageHeader(leafPage) - if h.PageType != PageTypeLeaf { - return fmt.Errorf("btree: Delete: expected leaf, got type %d", h.PageType) - } - - keys, rids := leafReadAll(leafPage, h) - if len(keys) == 0 { - return nil - } - - oldFirst := keys[0] - idxToDelete := -1 - for i := range keys { - if keys[i] == key && rids[i] == rid { - idxToDelete = i - break - } - } - if idxToDelete == -1 { - return nil - } - - keys = append(keys[:idxToDelete], keys[idxToDelete+1:]...) - rids = append(rids[:idxToDelete], rids[idxToDelete+1:]...) - - leafWriteAll(leafPage, keys, rids) - if err := idx.writePage(leafID, leafPage); err != nil { - return err - } - - if leafID == idx.rootPageID { - return nil - } - - if len(keys) > 0 && keys[0] != oldFirst { - if err := idx.updateAncestorMinKeys(leafID, path); err != nil { - return err - } - } - - if len(keys) < minLeafKeys { - if err := idx.rebalanceAfterDelete(leafID, path); err != nil { - return err - } - } - - return nil -} - -func (idx *fileIndex) DeleteKey(key Key) error { - leafID, leafPage, path, err := idx.findLeafForKeyWithPath(key) - if err != nil { - return err - } - - h := readPageHeader(leafPage) - if h.PageType != PageTypeLeaf { - return fmt.Errorf("btree: DeleteKey: expected leaf, got type %d", h.PageType) - } - - keys, rids := leafReadAll(leafPage, h) - filteredKeys := make([]Key, 0, len(keys)) - filteredRIDs := make([]RID, 0, len(rids)) - for i := range keys { - if keys[i] == key { - continue - } - filteredKeys = append(filteredKeys, keys[i]) - filteredRIDs = append(filteredRIDs, rids[i]) - } - - if len(filteredKeys) == len(keys) { - return nil - } - - oldFirst := Key(0) - if len(keys) > 0 { - oldFirst = keys[0] - } - - leafWriteAll(leafPage, filteredKeys, filteredRIDs) - if err := idx.writePage(leafID, leafPage); err != nil { - return err - } - - if leafID != idx.rootPageID && len(filteredKeys) > 0 && filteredKeys[0] != oldFirst { - if err := idx.updateAncestorMinKeys(leafID, path); err != nil { - return err - } - } - - if leafID != idx.rootPageID && len(filteredKeys) < minLeafKeys { - if err := idx.rebalanceAfterDelete(leafID, path); err != nil { - return err - } - } - - return nil -} - -// Search implements Index.Search: return all RIDs for a given key. -func (idx *fileIndex) Search(key Key) ([]RID, error) { - _, p, err := idx.findLeafForKey(key) - if err != nil { - return nil, err - } - - h := readPageHeader(p) - if h.PageType != PageTypeLeaf { - return nil, fmt.Errorf("btree: Search: expected leaf, got type %d", h.PageType) - } - n := h.NumKeys - - // Binary search for first position >= key - lo, hi := uint32(0), n - for lo < hi { - mid := (lo + hi) / 2 - k := leafGetKey(p, mid) - if key > k { - lo = mid + 1 - } else { - hi = mid - } - } - - // Collect all equal keys from lo onwards - var rids []RID - for i := lo; i < n; i++ { - k := leafGetKey(p, i) - if k != key { - break - } - rids = append(rids, leafGetRID(p, i)) - } - return rids, nil -} - -func (idx *fileIndex) Close() error { - if idx.f != nil { - err := idx.f.Close() - idx.f = nil - return err - } - return nil -} - -// File header layout: -// [magic 6 bytes][rootPageID 4][pageCount 4] -// total = 14 bytes -func writeFileHeader(f *os.File, root, pages uint32) error { - if _, err := f.Seek(0, io.SeekStart); err != nil { - return err - } - if _, err := f.Write([]byte(indexFileMagic)); err != nil { - return err - } - - buf := make([]byte, 8) - binary.LittleEndian.PutUint32(buf[0:4], root) - binary.LittleEndian.PutUint32(buf[4:8], pages) - - _, err := f.Write(buf) - return err -} - -func readFileHeader(f *os.File) (root uint32, pages uint32, err error) { - if _, err = f.Seek(0, io.SeekStart); err != nil { - return - } - - magic := make([]byte, 6) - if _, err = io.ReadFull(f, magic); err != nil { - return - } - if string(magic) != indexFileMagic { - err = fmt.Errorf("btree: bad index magic") - return - } - - buf := make([]byte, 8) - if _, err = io.ReadFull(f, buf); err != nil { - return - } - root = binary.LittleEndian.Uint32(buf[0:4]) - pages = binary.LittleEndian.Uint32(buf[4:8]) - return -} -func OpenFileIndex(path string, meta Meta) (Index, error) { - f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0o644) - if err != nil { - return nil, err - } - - fi, err := f.Stat() - if err != nil { - return nil, err - } - - idx := &fileIndex{ - f: f, - meta: meta, - } - - if fi.Size() == 0 { - // brand new index: create a single leaf root - rootPage := make([]byte, PageSize) - h := PageHeader{ - PageType: PageTypeLeaf, - ParentPageID: 0, - NumKeys: 0, - } - writePageHeader(rootPage, h) - - // Write header + first leaf page - if err := writeFileHeader(f, 0, 1); err != nil { - return nil, err - } - if _, err := f.Write(rootPage); err != nil { - return nil, err - } - - idx.rootPageID = 0 - idx.pageCount = 1 - return idx, nil - } - - // Existing index → read header - root, pages, err := readFileHeader(f) - if err != nil { - return nil, err - } - idx.rootPageID = root - idx.pageCount = pages - return idx, nil -} -func (idx *fileIndex) pageOffset(pageID uint32) int64 { - return int64(fileHeaderSize) + int64(pageID)*PageSize -} -func (idx *fileIndex) readPage(pageID uint32) ([]byte, error) { - p := make([]byte, PageSize) - off := idx.pageOffset(pageID) - if _, err := idx.f.ReadAt(p, off); err != nil { - return nil, fmt.Errorf("btree: read page %d: %w", pageID, err) - } - return p, nil -} -func (idx *fileIndex) writePage(pageID uint32, p []byte) error { - if len(p) != PageSize { - return fmt.Errorf("btree: writePage: wrong page size %d", len(p)) - } - off := idx.pageOffset(pageID) - if _, err := idx.f.WriteAt(p, off); err != nil { - return fmt.Errorf("btree: write page %d: %w", pageID, err) - } - return nil -} - -// findLeafForKey walks from the root down to the leaf where `key` belongs. -// It returns (pageID, pageBytes). -func (idx *fileIndex) findLeafForKey(key Key) (uint32, []byte, error) { - pageID := idx.rootPageID - - for { - p, err := idx.readPage(pageID) - if err != nil { - return 0, nil, err - } - h := readPageHeader(p) - - switch h.PageType { - case PageTypeLeaf: - return pageID, p, nil - - case PageTypeInternal: - n := h.NumKeys - if n == 0 { - return 0, nil, fmt.Errorf("btree: empty internal node at page %d", pageID) - } - - // Choose child: - // if key < key0 -> child0 - // else find largest i with key >= key_i -> child_{i+1} - var childIdx uint32 - var i uint32 - for i = 0; i < n; i++ { - k := internalGetKey(p, i) - if key < k { - childIdx = i - break - } - } - if i == n { - // key >= all keys -> rightmost child - childIdx = n - } - - childPageID := internalGetChild(p, childIdx) - pageID = childPageID - - default: - return 0, nil, fmt.Errorf("btree: unknown page type %d at page %d", h.PageType, pageID) - } - } -} -func (idx *fileIndex) allocPage(pageType uint8) (uint32, []byte, error) { - pageID := idx.pageCount - idx.pageCount++ - - p := make([]byte, PageSize) - h := PageHeader{ - PageType: pageType, - ParentPageID: 0, // we won't rely on this yet - NumKeys: 0, - } - writePageHeader(p, h) - - // Write page to disk - if err := idx.writePage(pageID, p); err != nil { - return 0, nil, err - } - - // Update file header (rootPageID unchanged) - if err := writeFileHeader(idx.f, idx.rootPageID, idx.pageCount); err != nil { - return 0, nil, err - } - - return pageID, p, nil -} -func leafReadAll(p []byte, h PageHeader) ([]Key, []RID) { - n := h.NumKeys - keys := make([]Key, n) - rids := make([]RID, n) - for i := uint32(0); i < n; i++ { - keys[i] = leafGetKey(p, i) - rids[i] = leafGetRID(p, i) - } - return keys, rids -} - -func leafWriteAll(p []byte, keys []Key, rids []RID) { - if len(keys) != len(rids) { - panic("leafWriteAll: keys and rids length mismatch") - } - h := PageHeader{ - PageType: PageTypeLeaf, - ParentPageID: 0, // we ignore for now - NumKeys: uint32(len(keys)), - } - writePageHeader(p, h) - - for i := range keys { - leafSetEntry(p, uint32(i), keys[i], rids[i]) - } -} -func internalReadAll(p []byte, h PageHeader) ([]uint32, []Key, error) { - n := h.NumKeys - children := make([]uint32, n+1) - keys := make([]Key, n) - - off := 16 - if len(p) < off+4 { - return nil, nil, fmt.Errorf("btree: corrupt internal page header area") - } - - // child0 - children[0] = binary.LittleEndian.Uint32(p[off : off+4]) - off += 4 - - for i := uint32(0); i < n; i++ { - if len(p) < off+8+4 { - return nil, nil, fmt.Errorf("btree: corrupt internal page") - } - k := int64(binary.LittleEndian.Uint64(p[off : off+8])) - off += 8 - keys[i] = Key(k) - - children[i+1] = binary.LittleEndian.Uint32(p[off : off+4]) - off += 4 - } - - return children, keys, nil -} - -func internalWriteAll(p []byte, h PageHeader, children []uint32, keys []Key) error { - n := h.NumKeys - if uint32(len(keys)) != n { - return fmt.Errorf("btree: internalWriteAll: keys length mismatch") - } - if len(children) != int(n)+1 { - return fmt.Errorf("btree: internalWriteAll: children length mismatch") - } - - writePageHeader(p, h) - - off := 16 - binary.LittleEndian.PutUint32(p[off:off+4], children[0]) - off += 4 - - for i := uint32(0); i < n; i++ { - binary.LittleEndian.PutUint64(p[off:off+8], uint64(keys[i])) - off += 8 - binary.LittleEndian.PutUint32(p[off:off+4], children[i+1]) - off += 4 - } - - return nil -} - -// findLeafForKeyWithPath walks from root to leaf and returns -// (leafPageID, leafPageBytes, pathOfPageIDs), where path[len-1] = leaf. -func (idx *fileIndex) findLeafForKeyWithPath(key Key) (uint32, []byte, []uint32, error) { - pageID := idx.rootPageID - var path []uint32 - - for { - path = append(path, pageID) - - p, err := idx.readPage(pageID) - if err != nil { - return 0, nil, nil, err - } - h := readPageHeader(p) - - switch h.PageType { - case PageTypeLeaf: - return pageID, p, path, nil - - case PageTypeInternal: - n := h.NumKeys - if n == 0 { - return 0, nil, nil, fmt.Errorf("btree: empty internal node at page %d", pageID) - } - - children, keys, err := internalReadAll(p, h) - if err != nil { - return 0, nil, nil, err - } - - // Decide which child to follow - var childIdx int - i := 0 - for i = 0; i < int(n); i++ { - if key < keys[i] { - break - } - } - childIdx = i // i in [0..n], if i==n: rightmost - - pageID = children[childIdx] - - default: - return 0, nil, nil, fmt.Errorf("btree: unknown page type %d at page %d", h.PageType, pageID) - } - } -} - -// insertIntoParent is called after splitting a leaf: -// leftID: old leaf page -// rightID: new leaf page -// sepKey: first key of right leaf -// path: path from root to leftID (leaf is last element). -func (idx *fileIndex) insertIntoParent(leftID, rightID uint32, sepKey Key, path []uint32) error { - // If left was root, create a new root internal. - if len(path) == 1 { - // New root internal with two children and one key. - rootID, rootPage, err := idx.allocPage(PageTypeInternal) - if err != nil { - return err - } - - h := PageHeader{ - PageType: PageTypeInternal, - ParentPageID: 0, - NumKeys: 1, - } - children := []uint32{leftID, rightID} - keys := []Key{sepKey} - - if err := internalWriteAll(rootPage, h, children, keys); err != nil { - return err - } - if err := idx.writePage(rootID, rootPage); err != nil { - return err - } - - // Update in-memory and on-disk header - idx.rootPageID = rootID - if err := writeFileHeader(idx.f, idx.rootPageID, idx.pageCount); err != nil { - return err - } - - return nil - } - - // Non-root: parent is the second-to-last pageID in path. - parentID := path[len(path)-2] - parentPath := path[:len(path)-1] - parentPage, err := idx.readPage(parentID) - if err != nil { - return err - } - hp := readPageHeader(parentPage) - if hp.PageType != PageTypeInternal { - return fmt.Errorf("btree: parent of leaf is not internal (page %d)", parentID) - } - - children, keys, err := internalReadAll(parentPage, hp) - if err != nil { - return err - } - - // Find position of leftID in children - var pos int = -1 - for i, c := range children { - if c == leftID { - pos = i - break - } - } - if pos == -1 { - return fmt.Errorf("btree: parent does not reference left child %d", leftID) - } - - // Insert sepKey at keys[pos], and rightID at children[pos+1]. - // children: len = n+1, keys: len = n - n := int(hp.NumKeys) - - // Insert child at pos+1 - children = append(children, 0) // grow by 1 - copy(children[pos+2:], children[pos+1:]) - children[pos+1] = rightID - - // Insert key at pos - keys = append(keys, 0) - copy(keys[pos+1:], keys[pos:]) - keys[pos] = sepKey - - hp.NumKeys = uint32(n + 1) - - if hp.NumKeys <= uint32(maxInternalKeys) { - if err := internalWriteAll(parentPage, hp, children, keys); err != nil { - return err - } - if err := idx.writePage(parentID, parentPage); err != nil { - return err - } - return nil - } - - // Split full internal node. - totalKeys := int(hp.NumKeys) - mid := totalKeys / 2 - promote := keys[mid] - - leftKeys := append([]Key(nil), keys[:mid]...) - leftChildren := append([]uint32(nil), children[:mid+1]...) - - rightKeys := append([]Key(nil), keys[mid+1:]...) - rightChildren := append([]uint32(nil), children[mid+1:]...) - - // Rewrite left (existing parent) - hp.NumKeys = uint32(len(leftKeys)) - if err := internalWriteAll(parentPage, hp, leftChildren, leftKeys); err != nil { - return err - } - if err := idx.writePage(parentID, parentPage); err != nil { - return err - } - - // Create right sibling - rightParentID, rightParentPage, err := idx.allocPage(PageTypeInternal) - if err != nil { - return err - } - rightHeader := PageHeader{PageType: PageTypeInternal, ParentPageID: 0, NumKeys: uint32(len(rightKeys))} - if err := internalWriteAll(rightParentPage, rightHeader, rightChildren, rightKeys); err != nil { - return err - } - if err := idx.writePage(rightParentID, rightParentPage); err != nil { - return err - } - - return idx.insertIntoParent(parentID, rightParentID, promote, parentPath) -} - -// findMinKey walks down the subtree rooted at pageID and returns its minimal key. -func (idx *fileIndex) findMinKey(pageID uint32) (Key, bool, error) { - p, err := idx.readPage(pageID) - if err != nil { - return 0, false, err - } - h := readPageHeader(p) - switch h.PageType { - case PageTypeLeaf: - if h.NumKeys == 0 { - return 0, false, nil - } - return leafGetKey(p, 0), true, nil - case PageTypeInternal: - children, _, err := internalReadAll(p, h) - if err != nil { - return 0, false, err - } - return idx.findMinKey(children[0]) - default: - return 0, false, fmt.Errorf("btree: findMinKey: unknown page type %d", h.PageType) - } -} - -// updateAncestorMinKeys ensures separator keys on the path remain accurate after -// a leaf/internal first-key change. `path` must include the child as its last element. -func (idx *fileIndex) updateAncestorMinKeys(childID uint32, path []uint32) error { - if len(path) < 2 { - return nil - } - - parentID := path[len(path)-2] - parentPage, err := idx.readPage(parentID) - if err != nil { - return err - } - ph := readPageHeader(parentPage) - if ph.PageType != PageTypeInternal { - return fmt.Errorf("btree: expected internal parent, got type %d", ph.PageType) - } - - children, keys, err := internalReadAll(parentPage, ph) - if err != nil { - return err - } - - pos := -1 - for i, c := range children { - if c == childID { - pos = i - break - } - } - if pos == -1 { - return fmt.Errorf("btree: parent %d missing child %d", parentID, childID) - } - - minKey, ok, err := idx.findMinKey(childID) - if err != nil { - return err - } - if ok && pos > 0 && keys[pos-1] != minKey { - keys[pos-1] = minKey - if err := internalWriteAll(parentPage, ph, children, keys); err != nil { - return err - } - if err := idx.writePage(parentID, parentPage); err != nil { - return err - } - } - - // Propagate upwards since parent's first key may have changed. - return idx.updateAncestorMinKeys(parentID, path[:len(path)-1]) -} - -func (idx *fileIndex) rebalanceAfterDelete(nodeID uint32, path []uint32) error { - if len(path) < 2 { - return nil - } - - parentID := path[len(path)-2] - parentPath := path[:len(path)-1] - - parentPage, err := idx.readPage(parentID) - if err != nil { - return err - } - ph := readPageHeader(parentPage) - if ph.PageType != PageTypeInternal { - return fmt.Errorf("btree: rebalance: parent type %d is not internal", ph.PageType) - } - - children, keys, err := internalReadAll(parentPage, ph) - if err != nil { - return err - } - - pos := -1 - for i, c := range children { - if c == nodeID { - pos = i - break - } - } - if pos == -1 { - return fmt.Errorf("btree: parent %d missing child %d", parentID, nodeID) - } - - nodePage, err := idx.readPage(nodeID) - if err != nil { - return err - } - nh := readPageHeader(nodePage) - - switch nh.PageType { - case PageTypeLeaf: - return idx.rebalanceLeaf(nodeID, nodePage, pos, parentID, parentPath, parentPage, ph, children, keys) - case PageTypeInternal: - return idx.rebalanceInternal(nodeID, nodePage, pos, parentID, parentPath, parentPage, ph, children, keys) - default: - return fmt.Errorf("btree: unknown node type %d during rebalance", nh.PageType) - } -} - -func (idx *fileIndex) rebalanceLeaf(nodeID uint32, nodePage []byte, pos int, parentID uint32, parentPath []uint32, parentPage []byte, ph PageHeader, children []uint32, keys []Key) error { - nh := readPageHeader(nodePage) - nodeKeys, nodeRIDs := leafReadAll(nodePage, nh) - - // Borrow from left sibling if possible. - if pos > 0 { - leftID := children[pos-1] - leftPage, err := idx.readPage(leftID) - if err != nil { - return err - } - lh := readPageHeader(leftPage) - leftKeys, leftRIDs := leafReadAll(leftPage, lh) - if len(leftKeys) > minLeafKeys { - borrowedKey := leftKeys[len(leftKeys)-1] - borrowedRID := leftRIDs[len(leftRIDs)-1] - - leftKeys = leftKeys[:len(leftKeys)-1] - leftRIDs = leftRIDs[:len(leftRIDs)-1] - nodeKeys = append([]Key{borrowedKey}, nodeKeys...) - nodeRIDs = append([]RID{borrowedRID}, nodeRIDs...) - - leafWriteAll(leftPage, leftKeys, leftRIDs) - if err := idx.writePage(leftID, leftPage); err != nil { - return err - } - - leafWriteAll(nodePage, nodeKeys, nodeRIDs) - if err := idx.writePage(nodeID, nodePage); err != nil { - return err - } - - keys[pos-1] = nodeKeys[0] - if err := internalWriteAll(parentPage, ph, children, keys); err != nil { - return err - } - if err := idx.writePage(parentID, parentPage); err != nil { - return err - } - - return idx.updateAncestorMinKeys(nodeID, append(parentPath, nodeID)) - } - } - - // Borrow from right sibling. - if pos+1 < len(children) { - rightID := children[pos+1] - rightPage, err := idx.readPage(rightID) - if err != nil { - return err - } - rh := readPageHeader(rightPage) - rightKeys, rightRIDs := leafReadAll(rightPage, rh) - if len(rightKeys) > minLeafKeys { - borrowedKey := rightKeys[0] - borrowedRID := rightRIDs[0] - - rightKeys = rightKeys[1:] - rightRIDs = rightRIDs[1:] - nodeKeys = append(nodeKeys, borrowedKey) - nodeRIDs = append(nodeRIDs, borrowedRID) - - leafWriteAll(nodePage, nodeKeys, nodeRIDs) - if err := idx.writePage(nodeID, nodePage); err != nil { - return err - } - - leafWriteAll(rightPage, rightKeys, rightRIDs) - if err := idx.writePage(rightID, rightPage); err != nil { - return err - } - - if len(rightKeys) > 0 { - keys[pos] = rightKeys[0] - } - if err := internalWriteAll(parentPage, ph, children, keys); err != nil { - return err - } - if err := idx.writePage(parentID, parentPage); err != nil { - return err - } - - return idx.updateAncestorMinKeys(nodeID, append(parentPath, nodeID)) - } - } - - // Merge with sibling. - if pos > 0 { - leftID := children[pos-1] - leftPage, err := idx.readPage(leftID) - if err != nil { - return err - } - lh := readPageHeader(leftPage) - leftKeys, leftRIDs := leafReadAll(leftPage, lh) - - mergedKeys := append(leftKeys, nodeKeys...) - mergedRIDs := append(leftRIDs, nodeRIDs...) - - leafWriteAll(leftPage, mergedKeys, mergedRIDs) - if err := idx.writePage(leftID, leftPage); err != nil { - return err - } - - // Remove node entry from parent. - children = append(children[:pos], children[pos+1:]...) - keys = append(keys[:pos-1], keys[pos:]...) - ph.NumKeys = uint32(len(keys)) - if err := internalWriteAll(parentPage, ph, children, keys); err != nil { - return err - } - if err := idx.writePage(parentID, parentPage); err != nil { - return err - } - - if err := idx.handleParentAfterMerge(parentID, parentPath, children, keys); err != nil { - return err - } - - return idx.updateAncestorMinKeys(leftID, append(parentPath, leftID)) - } - - // Merge with right sibling. - rightID := children[pos+1] - rightPage, err := idx.readPage(rightID) - if err != nil { - return err - } - rh := readPageHeader(rightPage) - rightKeys, rightRIDs := leafReadAll(rightPage, rh) - - mergedKeys := append(nodeKeys, rightKeys...) - mergedRIDs := append(nodeRIDs, rightRIDs...) - - leafWriteAll(nodePage, mergedKeys, mergedRIDs) - if err := idx.writePage(nodeID, nodePage); err != nil { - return err - } - - children = append(children[:pos+1], children[pos+2:]...) - keys = append(keys[:pos], keys[pos+1:]...) - ph.NumKeys = uint32(len(keys)) - if err := internalWriteAll(parentPage, ph, children, keys); err != nil { - return err - } - if err := idx.writePage(parentID, parentPage); err != nil { - return err - } - - if err := idx.handleParentAfterMerge(parentID, parentPath, children, keys); err != nil { - return err - } - - return idx.updateAncestorMinKeys(nodeID, append(parentPath, nodeID)) -} - -func (idx *fileIndex) rebalanceInternal(nodeID uint32, nodePage []byte, pos int, parentID uint32, parentPath []uint32, parentPage []byte, ph PageHeader, children []uint32, keys []Key) error { - nh := readPageHeader(nodePage) - nodeChildren, nodeKeys, err := internalReadAll(nodePage, nh) - if err != nil { - return err - } - - // Borrow from left sibling if possible. - if pos > 0 { - leftID := children[pos-1] - leftPage, err := idx.readPage(leftID) - if err != nil { - return err - } - lh := readPageHeader(leftPage) - leftChildren, leftKeys, err := internalReadAll(leftPage, lh) - if err != nil { - return err - } - if len(leftKeys) > minInternalKeys { - borrowedKey := keys[pos-1] - borrowedChild := leftChildren[len(leftChildren)-1] - - keys[pos-1] = leftKeys[len(leftKeys)-1] - leftKeys = leftKeys[:len(leftKeys)-1] - leftChildren = leftChildren[:len(leftChildren)-1] - - nodeKeys = append([]Key{borrowedKey}, nodeKeys...) - nodeChildren = append([]uint32{borrowedChild}, nodeChildren...) - - lh.NumKeys = uint32(len(leftKeys)) - if err := internalWriteAll(leftPage, lh, leftChildren, leftKeys); err != nil { - return err - } - if err := idx.writePage(leftID, leftPage); err != nil { - return err - } - - nh.NumKeys = uint32(len(nodeKeys)) - if err := internalWriteAll(nodePage, nh, nodeChildren, nodeKeys); err != nil { - return err - } - if err := idx.writePage(nodeID, nodePage); err != nil { - return err - } - - if err := internalWriteAll(parentPage, ph, children, keys); err != nil { - return err - } - if err := idx.writePage(parentID, parentPage); err != nil { - return err - } - - return idx.updateAncestorMinKeys(nodeID, append(parentPath, nodeID)) - } - } - - // Borrow from right sibling. - if pos+1 < len(children) { - rightID := children[pos+1] - rightPage, err := idx.readPage(rightID) - if err != nil { - return err - } - rh := readPageHeader(rightPage) - rightChildren, rightKeys, err := internalReadAll(rightPage, rh) - if err != nil { - return err - } - if len(rightKeys) > minInternalKeys { - borrowedKey := keys[pos] - borrowedChild := rightChildren[0] - - keys[pos] = rightKeys[0] - rightKeys = rightKeys[1:] - rightChildren = rightChildren[1:] - - nodeKeys = append(nodeKeys, borrowedKey) - nodeChildren = append(nodeChildren, borrowedChild) - - rh.NumKeys = uint32(len(rightKeys)) - if err := internalWriteAll(rightPage, rh, rightChildren, rightKeys); err != nil { - return err - } - if err := idx.writePage(rightID, rightPage); err != nil { - return err - } - - nh.NumKeys = uint32(len(nodeKeys)) - if err := internalWriteAll(nodePage, nh, nodeChildren, nodeKeys); err != nil { - return err - } - if err := idx.writePage(nodeID, nodePage); err != nil { - return err - } - - if err := internalWriteAll(parentPage, ph, children, keys); err != nil { - return err - } - if err := idx.writePage(parentID, parentPage); err != nil { - return err - } - - return idx.updateAncestorMinKeys(nodeID, append(parentPath, nodeID)) - } - } - - // Merge with sibling. - if pos > 0 { - leftID := children[pos-1] - leftPage, err := idx.readPage(leftID) - if err != nil { - return err - } - lh := readPageHeader(leftPage) - leftChildren, leftKeys, err := internalReadAll(leftPage, lh) - if err != nil { - return err - } - - mergedKeys := append(leftKeys, keys[pos-1]) - mergedKeys = append(mergedKeys, nodeKeys...) - mergedChildren := append(leftChildren, nodeChildren...) - - lh.NumKeys = uint32(len(mergedKeys)) - if err := internalWriteAll(leftPage, lh, mergedChildren, mergedKeys); err != nil { - return err - } - if err := idx.writePage(leftID, leftPage); err != nil { - return err - } - - children = append(children[:pos], children[pos+1:]...) - keys = append(keys[:pos-1], keys[pos:]...) - ph.NumKeys = uint32(len(keys)) - if err := internalWriteAll(parentPage, ph, children, keys); err != nil { - return err - } - if err := idx.writePage(parentID, parentPage); err != nil { - return err - } - - if err := idx.handleParentAfterMerge(parentID, parentPath, children, keys); err != nil { - return err - } - - return idx.updateAncestorMinKeys(leftID, append(parentPath, leftID)) - } - - // Merge with right sibling. - rightID := children[pos+1] - rightPage, err := idx.readPage(rightID) - if err != nil { - return err - } - rh := readPageHeader(rightPage) - rightChildren, rightKeys, err := internalReadAll(rightPage, rh) - if err != nil { - return err - } - - mergedKeys := append(nodeKeys, keys[pos]) - mergedKeys = append(mergedKeys, rightKeys...) - mergedChildren := append(nodeChildren, rightChildren...) - - nh.NumKeys = uint32(len(mergedKeys)) - if err := internalWriteAll(nodePage, nh, mergedChildren, mergedKeys); err != nil { - return err - } - if err := idx.writePage(nodeID, nodePage); err != nil { - return err - } - - children = append(children[:pos+1], children[pos+2:]...) - keys = append(keys[:pos], keys[pos+1:]...) - ph.NumKeys = uint32(len(keys)) - if err := internalWriteAll(parentPage, ph, children, keys); err != nil { - return err - } - if err := idx.writePage(parentID, parentPage); err != nil { - return err - } - - if err := idx.handleParentAfterMerge(parentID, parentPath, children, keys); err != nil { - return err - } - - return idx.updateAncestorMinKeys(nodeID, append(parentPath, nodeID)) -} - -func (idx *fileIndex) handleParentAfterMerge(parentID uint32, parentPath []uint32, children []uint32, keys []Key) error { - if parentID == idx.rootPageID { - if len(keys) == 0 && len(children) == 1 { - idx.rootPageID = children[0] - if err := writeFileHeader(idx.f, idx.rootPageID, idx.pageCount); err != nil { - return err - } - } - return nil - } - - parentPage, err := idx.readPage(parentID) - if err != nil { - return err - } - ph := readPageHeader(parentPage) - if int(ph.NumKeys) >= minInternalKeys { - return nil - } - - return idx.rebalanceAfterDelete(parentID, parentPath) -} diff --git a/internal/index/btree/index.go b/internal/index/btree/index.go deleted file mode 100644 index bd5ba34..0000000 --- a/internal/index/btree/index.go +++ /dev/null @@ -1,35 +0,0 @@ -package btree - -import "fmt" - -// For now we only support int64 keys. -type Key = int64 - -// Meta carries basic information about an index. -type Meta struct { - TableName string // e.g. "users" - Column string // e.g. "id" -} - -// Index describes the operations a B-Tree index supports. -type Index interface { - // Insert adds a mapping key -> rid. - Insert(key Key, rid RID) error - - // Delete removes a specific mapping key -> rid. - // If rid doesn't exist for that key, it's a no-op. - Delete(key Key, rid RID) error - - // DeleteKey removes all RIDs for a given key (optional, but handy). - DeleteKey(key Key) error - - // Search returns all RIDs for a key. - Search(key Key) ([]RID, error) - - // Close flushes and closes the index file. - Close() error -} - -// ErrNotFound is returned when a key is not present in the index. -// (Search may just return empty slice + nil instead; we keep this for flexibility.) -var ErrNotFound = fmt.Errorf("btree: key not found") diff --git a/internal/index/btree/manager.go b/internal/index/btree/manager.go deleted file mode 100644 index 94d5756..0000000 --- a/internal/index/btree/manager.go +++ /dev/null @@ -1,73 +0,0 @@ -package btree - -import ( - "path/filepath" - "sync" -) - -// Manager manages B-Tree indexes in a directory (usually the db dir). -type Manager struct { - dir string - mu sync.Mutex - open map[string]Index // key: indexFileName or "table.column" -} - -// NewManager creates a new index manager rooted at dir. -func NewManager(dir string) *Manager { - return &Manager{ - dir: dir, - open: make(map[string]Index), - } -} - -// indexFileName is a simple convention: table_column.idx -func indexFileName(table, col string) string { - return table + "_" + col + ".idx" -} - -// key for open map -func indexKey(table, col string) string { - return table + "." + col -} - -// OpenOrCreateIndex returns an Index for (table, col), creating the B-Tree -// file if needed. -func (m *Manager) OpenOrCreateIndex(table, col string) (Index, error) { - m.mu.Lock() - defer m.mu.Unlock() - - k := indexKey(table, col) - if idx, ok := m.open[k]; ok { - return idx, nil - } - - fileName := indexFileName(table, col) - path := filepath.Join(m.dir, fileName) - - // We'll implement OpenFileIndex in the next step (B-Tree on disk). - idx, err := OpenFileIndex(path, Meta{ - TableName: table, - Column: col, - }) - if err != nil { - return nil, err - } - - m.open[k] = idx - return idx, nil -} - -// CloseAll closes all open indexes. -func (m *Manager) CloseAll() error { - m.mu.Lock() - defer m.mu.Unlock() - - var firstErr error - for k, idx := range m.open { - if err := idx.Close(); err != nil && firstErr == nil { - firstErr = err - } - delete(m.open, k) - } - return firstErr -} diff --git a/internal/index/btree/page.go b/internal/index/btree/page.go deleted file mode 100644 index 4a736d0..0000000 --- a/internal/index/btree/page.go +++ /dev/null @@ -1,77 +0,0 @@ -package btree - -import ( - "encoding/binary" - "errors" -) - -const ( - PageSize = 4096 - - PageTypeLeaf = 1 - PageTypeInternal = 2 - - indexFileMagic = "BTREE1" // 6 bytes -) - -var ( - ErrBadPage = errors.New("btree: bad page") -) - -// PageHeader describes the fixed part of an index page. -type PageHeader struct { - PageType uint8 - ParentPageID uint32 - NumKeys uint32 -} - -func readPageHeader(p []byte) PageHeader { - return PageHeader{ - PageType: p[0], - ParentPageID: binary.LittleEndian.Uint32(p[4:8]), - NumKeys: binary.LittleEndian.Uint32(p[8:12]), - } -} - -func writePageHeader(p []byte, h PageHeader) { - p[0] = h.PageType - // p[1:4] unused - binary.LittleEndian.PutUint32(p[4:8], h.ParentPageID) - binary.LittleEndian.PutUint32(p[8:12], h.NumKeys) -} - -func leafGetKey(p []byte, idx uint32) Key { - off := 16 + int(idx)*leafEntrySize // skip header (16 bytes) - return int64(binary.LittleEndian.Uint64(p[off : off+8])) -} - -func leafGetRID(p []byte, idx uint32) RID { - off := 16 + int(idx)*leafEntrySize + 8 - pageID := binary.LittleEndian.Uint32(p[off : off+4]) - slotID := binary.LittleEndian.Uint16(p[off+4 : off+6]) - return RID{PageID: pageID, SlotID: slotID} -} - -func leafSetEntry(p []byte, idx uint32, key Key, rid RID) { - off := 16 + int(idx)*leafEntrySize - binary.LittleEndian.PutUint64(p[off:off+8], uint64(key)) - off += 8 - binary.LittleEndian.PutUint32(p[off:off+4], rid.PageID) - binary.LittleEndian.PutUint16(p[off+4:off+6], rid.SlotID) -} - -func internalGetChild(p []byte, idx uint32) uint32 { - off := 16 + int(idx)*internalEntrySize - return binary.LittleEndian.Uint32(p[off : off+4]) -} - -func internalGetKey(p []byte, idx uint32) Key { - off := 16 + int(idx)*internalEntrySize + 4 - return int64(binary.LittleEndian.Uint64(p[off : off+8])) -} - -func internalSetEntry(p []byte, idx uint32, child uint32, key Key) { - off := 16 + int(idx)*internalEntrySize - binary.LittleEndian.PutUint32(p[off:off+4], child) - binary.LittleEndian.PutUint64(p[off+4:off+12], uint64(key)) -} diff --git a/internal/index/btree/types.go b/internal/index/btree/types.go deleted file mode 100644 index 0056a48..0000000 --- a/internal/index/btree/types.go +++ /dev/null @@ -1,8 +0,0 @@ -package btree - -// RID identifies a row in a heap page (table file). -// pageID is the heap page number, slotID is the row slot within that page. -type RID struct { - PageID uint32 - SlotID uint16 -} diff --git a/internal/sql/ast.go b/internal/sql/ast.go index 2d98a25..73c5c85 100644 --- a/internal/sql/ast.go +++ b/internal/sql/ast.go @@ -97,10 +97,3 @@ type OrderByClause struct { Column string Desc bool // false = ASC (default), true = DESC } -type CreateIndexStmt struct { - IndexName string - TableName string - ColumnName string -} - -func (*CreateIndexStmt) stmtNode() {} diff --git a/internal/sql/parse_index.go b/internal/sql/parse_index.go deleted file mode 100644 index 7767d27..0000000 --- a/internal/sql/parse_index.go +++ /dev/null @@ -1,30 +0,0 @@ -package sql - -import ( - "fmt" - "strings" -) - -// parseCreateIndex parses a CREATE INDEX statement. -// Format: CREATE INDEX index_name ON table_name (column_name) -func parseCreateIndex(q string) (*CreateIndexStmt, error) { - q = strings.TrimSpace(q) - parts := strings.Fields(q) - - if len(parts) != 6 || - !strings.EqualFold(parts[0], "CREATE") || - !strings.EqualFold(parts[1], "INDEX") || - !strings.EqualFold(parts[3], "ON") || - !strings.HasPrefix(parts[5], "(") || - !strings.HasSuffix(parts[5], ")") { - return nil, fmt.Errorf("invalid CREATE INDEX format") - } - - stmt := &CreateIndexStmt{ - IndexName: parts[2], - TableName: parts[4], - ColumnName: strings.Trim(parts[5], "()"), - } - - return stmt, nil -} diff --git a/internal/sql/parser.go b/internal/sql/parser.go index 3a9d87e..d5596a1 100644 --- a/internal/sql/parser.go +++ b/internal/sql/parser.go @@ -31,8 +31,6 @@ func Parse(query string) (Statement, error) { switch tokens[1] { case "TABLE": return parseCreateTable(q) - case "INDEX": - return parseCreateIndex(q) } } return nil, fmt.Errorf("invalid CREATE statement") @@ -54,7 +52,7 @@ func Parse(query string) (Statement, error) { case "ROLLBACK": return parseRollback(q) default: - return nil, fmt.Errorf("unsupported statement (supported: CREATE TABLE, CREATE INDEX, INSERT, SELECT, UPDATE, DELETE, BEGIN, COMMIT, ROLLBACK)") + return nil, fmt.Errorf("unsupported statement (supported: CREATE TABLE, INSERT, SELECT, UPDATE, DELETE, BEGIN, COMMIT, ROLLBACK)") } } diff --git a/internal/storage/README.md b/internal/storage/README.md index 3c0f050..5c45151 100644 --- a/internal/storage/README.md +++ b/internal/storage/README.md @@ -10,17 +10,11 @@ implementations that back the SQL executor. - `Tx` operations cover table scans, inserts, delete/update helpers, and a full-table `ReplaceAll` used by the SQL UPDATE/DELETE implementations. - `RowPredicate` and `RowUpdater` callbacks power the row-level filtering and - rewrite logic used by the filestore and memstore backends. + rewrite logic used by the memstore backend. See [`storage.go`](storage.go) for the exact signatures and comments. -## Implementations +## Implementation -- [`memstore`](memstore) is an in-memory reference engine used by tests and to - keep the code paths simple when persistence is not required. -- [`filestore`](filestore) is the default on-disk backend. It stores one file - per table, maintains a WAL for durability, and rebuilds tables by replaying - committed transactions during recovery. - -Both engines share the same interface so the REPL and engine code can switch -between backends without changes. +[`memstore`](memstore) is an in-memory reference engine used by tests and to +keep the code paths simple when persistence is not required. diff --git a/internal/storage/filestore/README.md b/internal/storage/filestore/README.md deleted file mode 100644 index 66f9fd6..0000000 --- a/internal/storage/filestore/README.md +++ /dev/null @@ -1,103 +0,0 @@ -# Filestore backend - -The filestore backend is the default on-disk engine used by the REPL. It stores -one `.godb` file per table plus a shared write-ahead log (`wal.log`) in the same -directory. - -## Table file layout - -Each table file is a binary stream composed of a schema header followed by 4KB -heap pages: - -``` -[header][pages...] - -header: - magic : 5 bytes "GODB1" - numCols : uint16 - columns... : repeated numCols times - nameLen : uint16 - name : nameLen bytes (UTF-8) - type : uint8 (matches `sql.DataType`) - -page (4096 bytes): - magic : 4 bytes "GPG1" - pageID : uint32 - pageType : uint8 (1 = heap) - numSlots : uint16 - freeStart : uint16 (offset where the next row bytes can be written) - row area : variable - slot dir : grows backward from the end of the page - slot i: [offset uint16][length uint16]; deleted slots use offset 0xFFFF - -rows inside a page: - encoded payload for each column (same format as the header types): - INT : int64 (little endian) - FLOAT : float64 (little endian) - STRING : uint32 length + bytes - BOOL : 1 byte (0 or 1) - NULL : no payload -``` - -## WAL format - -Durability is provided by a single append-only WAL (`wal.log`). The current -version uses the magic prefix `GODBWAL2` and encodes records as: - -``` -[magic "GODBWAL2"][records...] - -record header: - recType : uint8 - txID : uint64 - payload : varies by type - -record types: - 1 = BEGIN (no payload) - 2 = COMMIT (no payload) - 3 = ROLLBACK (no payload) - 4 = INSERT (payload: tableNameLen uint16, tableName bytes, - rowCount uint32 = 1, encoded row) - 5 = REPLACEALL (payload: tableNameLen uint16, tableName bytes, - rowCount uint32, repeated encoded rows) - 6 = DELETE (payload: tableNameLen uint16, tableName bytes, - rowCount uint32 = 1, encoded deleted row) - 7 = UPDATE (payload: tableNameLen uint16, tableName bytes, - rowCount uint32 = 2, encoded [oldRow, newRow]) -``` - -WAL writes are fsynced on `COMMIT` and `ROLLBACK`. Table pages are updated -before commit, so redo-only recovery depends on WAL entries to rebuild state -after a crash. - -## Recovery process - -On startup the engine replays the WAL to rebuild durable table contents: - -1. Load the header/schema for every existing table file. -2. Truncate each table back to just its header (clearing all pages). -3. Parse `wal.log` into per-transaction op lists, tracking `COMMIT`/`ROLLBACK`. -4. Replay committed, non-rolled-back transactions in log order into an - in-memory row list per table applying `INSERT`, `REPLACEALL`, `DELETE`, and - `UPDATE` semantics. -5. Write the rebuilt rows back out via `ReplaceAll`, regenerating heap pages. - -Uncommitted or rolled-back transactions are ignored during replay so their -changes do not survive recovery. - -## Transaction semantics - -- `BEGIN`/`COMMIT`/`ROLLBACK` are understood by the engine and logged in the - WAL. `COMMIT` fsyncs the WAL to ensure durability of prior writes. -- Mutations (`INSERT`, `UPDATE`, `DELETE`) update table files immediately; - `ROLLBACK` does not undo those on-disk changes until a restart, when recovery - filters out rolled-back transactions while rebuilding from the WAL. -- `REPLACEALL` is used by engine-level UPDATE/DELETE implementations to rewrite - whole tables and is fully logged for recovery. - -## Tips for experimenting - -- Data is written to the `./data` directory by default when running the REPL - entrypoint. -- To reset the on-disk state, stop the REPL and remove the directory: - `rm -rf ./data`. diff --git a/internal/storage/filestore/filestore.go b/internal/storage/filestore/filestore.go deleted file mode 100644 index d45967f..0000000 --- a/internal/storage/filestore/filestore.go +++ /dev/null @@ -1,321 +0,0 @@ -package filestore - -import ( - "errors" - "fmt" - "goDB/internal/index/btree" - "goDB/internal/sql" - "goDB/internal/storage" - "io" - "os" - "path/filepath" - "strings" - "sync" -) - -type indexInfo struct { - name string - tableName string - columnName string - btree btree.Index -} - -// FileEngine is a simple on-disk storage engine. -type FileEngine struct { - dir string - wal *walLogger - - mu sync.Mutex - nextTxID uint64 - indexMgr *btree.Manager - - idxMu sync.RWMutex - indexes map[string]map[string]*indexInfo // tableName -> columnName -> info -} - -// New creates a new FileEngine storing all tables in dir. -func New(dir string) (*FileEngine, error) { - if err := os.MkdirAll(dir, 0o755); err != nil { - return nil, fmt.Errorf("filestore: create dir: %w", err) - } - - w, err := newWAL(dir) - if err != nil { - return nil, fmt.Errorf("filestore: init WAL: %w", err) - } - - e := &FileEngine{ - dir: dir, - wal: w, - nextTxID: 1, - indexes: make(map[string]map[string]*indexInfo), - } - - e.indexMgr = btree.NewManager(dir) - - // Load existing indexes from disk. - entries, err := os.ReadDir(dir) - if err != nil { - return nil, fmt.Errorf("filestore: read dir to load indexes: %w", err) - } - for _, ent := range entries { - name := ent.Name() - if strings.HasSuffix(name, ".idx") { - parts := strings.Split(strings.TrimSuffix(name, ".idx"), "_") - if len(parts) == 2 { - tableName := parts[0] - columnName := parts[1] - - bt, err := e.indexMgr.OpenOrCreateIndex(tableName, columnName) - if err != nil { - return nil, fmt.Errorf("filestore: could not open existing index %s: %w", name, err) - } - if e.indexes[tableName] == nil { - e.indexes[tableName] = make(map[string]*indexInfo) - } - e.indexes[tableName][columnName] = &indexInfo{ - name: name, // Use filename as internal name - tableName: tableName, - columnName: columnName, - btree: bt, - } - } - } - } - - // Recover database state from WAL on startup. - if err := e.recoverFromWAL(); err != nil { - return nil, fmt.Errorf("filestore: recovery failed: %w", err) - } - - return e, nil -} - -func (e *FileEngine) CreateIndex(indexName, tableName, columnName string) error { - e.idxMu.RLock() - if columns, ok := e.indexes[tableName]; ok { - if _, exists := columns[columnName]; exists { - e.idxMu.RUnlock() - return fmt.Errorf("filestore: index on %s.%s already exists", tableName, columnName) - } - } - e.idxMu.RUnlock() - - path := e.tablePath(tableName) - f, err := os.Open(path) - if err != nil { - return fmt.Errorf("filestore: open table for index creation: %w", err) - } - defer f.Close() - - cols, err := readHeader(f) - if err != nil { - return fmt.Errorf("filestore: read header for index creation: %w", err) - } - headerEnd, err := f.Seek(0, io.SeekCurrent) - if err != nil { - return fmt.Errorf("filestore: seek after header for index creation: %w", err) - } - - colIdx := -1 - for i, c := range cols { - if strings.EqualFold(c.Name, columnName) { - colIdx = i - break - } - } - if colIdx == -1 { - return fmt.Errorf("filestore: column %q not found in table %q", columnName, tableName) - } - if cols[colIdx].Type != sql.TypeInt { - return fmt.Errorf("filestore: cannot create index on non-integer column %q", columnName) - } - - bt, err := e.indexMgr.OpenOrCreateIndex(tableName, columnName) - if err != nil { - return fmt.Errorf("filestore: could not create index: %w", err) - } - - fi, err := f.Stat() - if err != nil { - return fmt.Errorf("filestore: stat table for index creation: %w", err) - } - fileSize := fi.Size() - if fileSize < headerEnd { - return fmt.Errorf("filestore: corrupt file, size < header") - } - dataBytes := fileSize - headerEnd - if dataBytes > 0 { - if dataBytes%PageSize != 0 { - return fmt.Errorf("filestore: corrupt data (not multiple of page size)") - } - numPages := uint32(dataBytes / PageSize) - - for pageID := uint32(0); pageID < numPages; pageID++ { - p := make(pageBuf, PageSize) - offset := headerEnd + int64(pageID)*PageSize - if _, err := f.ReadAt(p, offset); err != nil { - return fmt.Errorf("filestore: read page %d for index creation: %w", pageID, err) - } - - err := p.iterateRows(len(cols), func(slotID uint16, r sql.Row) error { - val := r[colIdx] - if val.Type == sql.TypeNull { - return nil - } - rid := btree.RID{PageID: pageID, SlotID: slotID} - if err := bt.Insert(val.I64, rid); err != nil { - return fmt.Errorf("error building index: %w", err) - } - return nil - }) - if err != nil { - return fmt.Errorf("filestore: iterate rows in page %d for index creation: %w", pageID, err) - } - } - } - - // Register the index in the engine's in-memory map. - e.idxMu.Lock() - defer e.idxMu.Unlock() - - if e.indexes[tableName] == nil { - e.indexes[tableName] = make(map[string]*indexInfo) - } - e.indexes[tableName][columnName] = &indexInfo{ - name: indexName, - tableName: tableName, - columnName: columnName, - btree: bt, - } - - return nil -} - -// ListTables returns all *.godb files in the storage directory. -func (e *FileEngine) ListTables() ([]string, error) { - entries, err := os.ReadDir(e.dir) - if err != nil { - return nil, fmt.Errorf("filestore: list tables: %w", err) - } - - var tables []string - for _, ent := range entries { - name := ent.Name() - if strings.HasSuffix(name, ".godb") { - t := strings.TrimSuffix(name, ".godb") - tables = append(tables, t) - } - } - return tables, nil -} - -// TableSchema reads the schema header of the given table. -func (e *FileEngine) TableSchema(name string) ([]sql.Column, error) { - path := e.tablePath(name) - - f, err := os.Open(path) - if err != nil { - return nil, fmt.Errorf("filestore: open table for schema: %w", err) - } - defer f.Close() - - cols, err := readHeader(f) - if err != nil { - return nil, fmt.Errorf("filestore: read header in schema: %w", err) - } - - return cols, nil -} - -func (e *FileEngine) tablePath(name string) string { - return filepath.Join(e.dir, name+".godb") -} - -// CreateTable creates a new table file with the given schema. -func (e *FileEngine) CreateTable(name string, cols []sql.Column) error { - path := e.tablePath(name) - - if _, err := os.Stat(path); err == nil { - return fmt.Errorf("filestore: table %q already exists", name) - } else if !errors.Is(err, os.ErrNotExist) { - return fmt.Errorf("filestore: check existing table: %w", err) - } - - f, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o644) - if err != nil { - return fmt.Errorf("filestore: create table file: %w", err) - } - defer f.Close() - - if err := writeHeader(f, cols); err != nil { - _ = f.Close() - _ = os.Remove(path) - return fmt.Errorf("filestore: write header: %w", err) - } - - return nil -} - -// Begin starts a new (very simple) transaction. -func (e *FileEngine) Begin(readOnly bool) (storage.Tx, error) { - tx := &fileTx{ - eng: e, - readOnly: readOnly, - closed: false, - id: 0, - } - - if !readOnly { - e.mu.Lock() - txID := e.nextTxID - e.nextTxID++ - e.mu.Unlock() - - tx.id = txID - - if err := e.wal.appendBegin(txID); err != nil { - return nil, fmt.Errorf("filestore: WAL BEGIN: %w", err) - } - } - - return tx, nil -} - -func (e *FileEngine) Commit(tx storage.Tx) error { - ft, err := e.validateTx(tx) - if err != nil { - return err - } - - if !ft.readOnly && ft.id != 0 { - if err := e.wal.appendCommit(ft.id); err != nil { - return fmt.Errorf("filestore: WAL COMMIT: %w", err) - } - if err := e.wal.Sync(); err != nil { - return fmt.Errorf("filestore: WAL sync on commit: %w", err) - } - } - - ft.closed = true - return nil -} - -func (e *FileEngine) Rollback(tx storage.Tx) error { - ft, err := e.validateTx(tx) - if err != nil { - return err - } - - if !ft.readOnly && ft.id != 0 { - if err := e.wal.appendRollback(ft.id); err != nil { - return fmt.Errorf("filestore: WAL ROLLBACK: %w", err) - } - if err := e.wal.Sync(); err != nil { - return fmt.Errorf("filestore: WAL sync on rollback: %w", err) - } - } - - ft.closed = true - return nil -} diff --git a/internal/storage/filestore/filestore_recovery_test.go b/internal/storage/filestore/filestore_recovery_test.go deleted file mode 100644 index b28402f..0000000 --- a/internal/storage/filestore/filestore_recovery_test.go +++ /dev/null @@ -1,416 +0,0 @@ -package filestore - -import ( - "goDB/internal/sql" - "goDB/internal/storage" - "os" - "path/filepath" - "testing" -) - -// Helper: read all rows from a table using a read-only tx. -func scanAll(t *testing.T, fs *FileEngine, table string) ([]string, []sql.Row) { - t.Helper() - tx, err := fs.Begin(true) - if err != nil { - t.Fatalf("Begin(readOnly) failed: %v", err) - } - defer fs.Commit(tx) // no-op for readOnly, but keeps API consistent - - cols, rows, err := tx.Scan(table) - if err != nil { - t.Fatalf("Scan(%q) failed: %v", table, err) - } - return cols, rows -} - -// Recovery should replay committed INSERTs from WAL on startup. -func TestFilestore_Recovery_ReplaysCommittedInserts(t *testing.T) { - dir := t.TempDir() - - // First "process": create engine, table, insert data, commit. - fs1, err := New(dir) - if err != nil { - t.Fatalf("New(fs1) failed: %v", err) - } - - cols := []sql.Column{ - {Name: "id", Type: sql.TypeInt}, - {Name: "name", Type: sql.TypeString}, - } - - if err := fs1.CreateTable("users", cols); err != nil { - t.Fatalf("CreateTable(users) failed: %v", err) - } - - // tx1: insert two rows and commit - tx1, err := fs1.Begin(false) - if err != nil { - t.Fatalf("Begin(tx1) failed: %v", err) - } - _ = tx1.Insert("users", sql.Row{ - {Type: sql.TypeInt, I64: 1}, - {Type: sql.TypeString, S: "Alice"}, - }) - _ = tx1.Insert("users", sql.Row{ - {Type: sql.TypeInt, I64: 2}, - {Type: sql.TypeString, S: "Bob"}, - }) - if err := fs1.Commit(tx1); err != nil { - t.Fatalf("Commit(tx1) failed: %v", err) - } - - // Optional: verify pre-restart state - _, rowsBefore := scanAll(t, fs1, "users") - if len(rowsBefore) != 2 { - t.Fatalf("before restart: expected 2 rows, got %d", len(rowsBefore)) - } - - // "Restart": create a new engine instance pointing to the same dir. - fs2, err := New(dir) - if err != nil { - t.Fatalf("New(fs2) failed: %v", err) - } - - colsAfter, rowsAfter := scanAll(t, fs2, "users") - if len(colsAfter) != 2 || colsAfter[0] != "id" || colsAfter[1] != "name" { - t.Fatalf("after restart: unexpected cols: %v", colsAfter) - } - - if len(rowsAfter) != 2 { - t.Fatalf("after restart: expected 2 rows, got %d", len(rowsAfter)) - } - - ids := []int64{rowsAfter[0][0].I64, rowsAfter[1][0].I64} - names := []string{rowsAfter[0][1].S, rowsAfter[1][1].S} - - // Order should be preserved by simple replay (tx only used INSERTs). - if ids[0] != 1 || ids[1] != 2 || names[0] != "Alice" || names[1] != "Bob" { - t.Fatalf("after restart: unexpected data: ids=%v, names=%v", ids, names) - } -} - -// Recovery should ignore rolled-back transactions: data they wrote should disappear after restart. -func TestFilestore_Recovery_IgnoresRolledBackTx(t *testing.T) { - dir := t.TempDir() - - fs1, err := New(dir) - if err != nil { - t.Fatalf("New(fs1) failed: %v", err) - } - - cols := []sql.Column{ - {Name: "id", Type: sql.TypeInt}, - } - - if err := fs1.CreateTable("t", cols); err != nil { - t.Fatalf("CreateTable(t) failed: %v", err) - } - - // tx1: committed insert of id=1 - tx1, err := fs1.Begin(false) - if err != nil { - t.Fatalf("Begin(tx1) failed: %v", err) - } - _ = tx1.Insert("t", sql.Row{{Type: sql.TypeInt, I64: 1}}) - if err := fs1.Commit(tx1); err != nil { - t.Fatalf("Commit(tx1) failed: %v", err) - } - - // tx2: insert id=2 but rollback - tx2, err := fs1.Begin(false) - if err != nil { - t.Fatalf("Begin(tx2) failed: %v", err) - } - _ = tx2.Insert("t", sql.Row{{Type: sql.TypeInt, I64: 2}}) - if err := fs1.Rollback(tx2); err != nil { - t.Fatalf("Rollback(tx2) failed: %v", err) - } - - // Before restart, because our current filestore writes directly to the table - // even for tx2, we may see both rows: - _, rowsBefore := scanAll(t, fs1, "t") - if len(rowsBefore) != 2 { - t.Fatalf("before restart: expected 2 rows (no undo), got %d", len(rowsBefore)) - } - - // Restart: recovery should rebuild table only from committed txs. - fs2, err := New(dir) - if err != nil { - t.Fatalf("New(fs2) failed: %v", err) - } - - _, rowsAfter := scanAll(t, fs2, "t") - if len(rowsAfter) != 1 { - t.Fatalf("after restart: expected 1 row (rolled-back tx ignored), got %d", len(rowsAfter)) - } - if rowsAfter[0][0].I64 != 1 { - t.Fatalf("after restart: expected id=1, got %d", rowsAfter[0][0].I64) - } -} - -// Recovery should use WAL only if present and non-empty. -func TestFilestore_Recovery_NoWalFileIsNoop(t *testing.T) { - dir := t.TempDir() - - // First start: no WAL yet, New should succeed and recovery do nothing. - fs1, err := New(dir) - if err != nil { - t.Fatalf("New(fs1) failed: %v", err) - } - - cols := []sql.Column{ - {Name: "id", Type: sql.TypeInt}, - } - if err := fs1.CreateTable("t", cols); err != nil { - t.Fatalf("CreateTable(t) failed: %v", err) - } - - // No writes, no WAL records. - // Restart: must not error, and table should still exist with empty rows. - fs2, err := New(dir) - if err != nil { - t.Fatalf("New(fs2) failed: %v", err) - } - - tables, err := fs2.ListTables() - if err != nil { - t.Fatalf("ListTables failed: %v", err) - } - if len(tables) != 1 || tables[0] != "t" { - t.Fatalf("unexpected tables after restart: %v", tables) - } - - _, rows := scanAll(t, fs2, "t") - if len(rows) != 0 { - t.Fatalf("expected no rows in t after restart, got %d", len(rows)) - } -} - -// Optional sanity check: WAL file exists and is non-empty after some writes. -func TestFilestore_Recovery_WalExistsAndGrows(t *testing.T) { - dir := t.TempDir() - - fs, err := New(dir) - if err != nil { - t.Fatalf("New failed: %v", err) - } - - cols := []sql.Column{ - {Name: "id", Type: sql.TypeInt}, - } - if err := fs.CreateTable("t", cols); err != nil { - t.Fatalf("CreateTable failed: %v", err) - } - - tx, _ := fs.Begin(false) - _ = tx.Insert("t", sql.Row{{Type: sql.TypeInt, I64: 123}}) - if err := fs.Commit(tx); err != nil { - t.Fatalf("Commit failed: %v", err) - } - - walPath := filepath.Join(dir, "wal.log") - info, err := os.Stat(walPath) - if err != nil { - t.Fatalf("wal.log not found: %v", err) - } - if info.Size() <= int64(len("GODBWAL2")) { - t.Fatalf("wal.log too small, no records? size=%d", info.Size()) - } -} -func TestFilestore_Recovery_Delete_Replayed(t *testing.T) { - dir := t.TempDir() - - fs1, err := New(dir) - if err != nil { - t.Fatalf("New(fs1) failed: %v", err) - } - - cols := []sql.Column{ - {Name: "id", Type: sql.TypeInt}, - } - if err := fs1.CreateTable("t", cols); err != nil { - t.Fatalf("CreateTable(t) failed: %v", err) - } - - // Insert id=1 and id=2 in committed tx - tx1, _ := fs1.Begin(false) - _ = tx1.Insert("t", sql.Row{{Type: sql.TypeInt, I64: 1}}) - _ = tx1.Insert("t", sql.Row{{Type: sql.TypeInt, I64: 2}}) - if err := fs1.Commit(tx1); err != nil { - t.Fatalf("Commit(tx1) failed: %v", err) - } - - // Delete id=2 and commit - tx2, _ := fs1.Begin(false) - pred := func(row sql.Row) (bool, error) { - return row[0].I64 == 2, nil - } - if err := tx2.DeleteWhere("t", storage.RowPredicate(pred)); err != nil { - t.Fatalf("DeleteWhere failed: %v", err) - } - if err := fs1.Commit(tx2); err != nil { - t.Fatalf("Commit(tx2) failed: %v", err) - } - - // Restart - fs2, err := New(dir) - if err != nil { - t.Fatalf("New(fs2) failed: %v", err) - } - - _, rows := scanAll(t, fs2, "t") - if len(rows) != 1 || rows[0][0].I64 != 1 { - t.Fatalf("after restart: expected only id=1, got rows=%v", rows) - } -} -func TestFilestore_Recovery_Delete_RollbackIgnored(t *testing.T) { - dir := t.TempDir() - - fs1, err := New(dir) - if err != nil { - t.Fatalf("New(fs1) failed: %v", err) - } - - cols := []sql.Column{ - {Name: "id", Type: sql.TypeInt}, - } - if err := fs1.CreateTable("t", cols); err != nil { - t.Fatalf("CreateTable(t) failed: %v", err) - } - - // Insert committed rows: 1,2 - tx1, _ := fs1.Begin(false) - _ = tx1.Insert("t", sql.Row{{Type: sql.TypeInt, I64: 1}}) - _ = tx1.Insert("t", sql.Row{{Type: sql.TypeInt, I64: 2}}) - if err := fs1.Commit(tx1); err != nil { - t.Fatalf("Commit(tx1) failed: %v", err) - } - - // Delete id=2 but rollback - tx2, _ := fs1.Begin(false) - pred := func(row sql.Row) (bool, error) { - return row[0].I64 == 2, nil - } - if err := tx2.DeleteWhere("t", storage.RowPredicate(pred)); err != nil { - t.Fatalf("DeleteWhere failed: %v", err) - } - if err := fs1.Rollback(tx2); err != nil { - t.Fatalf("Rollback(tx2) failed: %v", err) - } - - // Before restart (in-process) we might see id=1 only (no undo), - // but after restart WAL-based recovery must ignore rolled-back deletes. - fs2, err := New(dir) - if err != nil { - t.Fatalf("New(fs2) failed: %v", err) - } - - _, rows := scanAll(t, fs2, "t") - if len(rows) != 2 { - t.Fatalf("after restart: expected 2 rows (rollback of delete), got %d", len(rows)) - } -} -func TestFilestore_Recovery_Update_Replayed(t *testing.T) { - dir := t.TempDir() - - fs1, err := New(dir) - if err != nil { - t.Fatalf("New(fs1) failed: %v", err) - } - - cols := []sql.Column{ - {Name: "id", Type: sql.TypeInt}, - {Name: "name", Type: sql.TypeString}, - } - if err := fs1.CreateTable("users", cols); err != nil { - t.Fatalf("CreateTable(users) failed: %v", err) - } - - tx1, _ := fs1.Begin(false) - _ = tx1.Insert("users", sql.Row{ - {Type: sql.TypeInt, I64: 1}, - {Type: sql.TypeString, S: "Alice"}, - }) - if err := fs1.Commit(tx1); err != nil { - t.Fatalf("Commit(tx1) failed: %v", err) - } - - // Update name from Alice -> Bob and commit - tx2, _ := fs1.Begin(false) - pred := func(r sql.Row) (bool, error) { - return r[0].I64 == 1, nil - } - updater := func(r sql.Row) (sql.Row, error) { - r[1].S = "Bob" - return r, nil - } - if err := tx2.UpdateWhere("users", storage.RowPredicate(pred), storage.RowUpdater(updater)); err != nil { - t.Fatalf("UpdateWhere failed: %v", err) - } - if err := fs1.Commit(tx2); err != nil { - t.Fatalf("Commit(tx2) failed: %v", err) - } - - fs2, err := New(dir) - if err != nil { - t.Fatalf("New(fs2) failed: %v", err) - } - - _, rows := scanAll(t, fs2, "users") - if len(rows) != 1 || rows[0][1].S != "Bob" { - t.Fatalf("after restart: expected name=Bob, got rows=%v", rows) - } -} -func TestFilestore_Recovery_Update_RollbackIgnored(t *testing.T) { - dir := t.TempDir() - - fs1, err := New(dir) - if err != nil { - t.Fatalf("New(fs1) failed: %v", err) - } - - cols := []sql.Column{ - {Name: "id", Type: sql.TypeInt}, - {Name: "name", Type: sql.TypeString}, - } - if err := fs1.CreateTable("users", cols); err != nil { - t.Fatalf("CreateTable(users) failed: %v", err) - } - - tx1, _ := fs1.Begin(false) - _ = tx1.Insert("users", sql.Row{ - {Type: sql.TypeInt, I64: 1}, - {Type: sql.TypeString, S: "Alice"}, - }) - if err := fs1.Commit(tx1); err != nil { - t.Fatalf("Commit(tx1) failed: %v", err) - } - - // Update Alice -> Bob but rollback - tx2, _ := fs1.Begin(false) - pred := func(r sql.Row) (bool, error) { - return r[0].I64 == 1, nil - } - updater := func(r sql.Row) (sql.Row, error) { - r[1].S = "Bob" - return r, nil - } - if err := tx2.UpdateWhere("users", storage.RowPredicate(pred), storage.RowUpdater(updater)); err != nil { - t.Fatalf("UpdateWhere failed: %v", err) - } - if err := fs1.Rollback(tx2); err != nil { - t.Fatalf("Rollback(tx2) failed: %v", err) - } - - // After restart, WAL should ignore this rolled-back update, so we see Alice. - fs2, err := New(dir) - if err != nil { - t.Fatalf("New(fs2) failed: %v", err) - } - - _, rows := scanAll(t, fs2, "users") - if len(rows) != 1 || rows[0][1].S != "Alice" { - t.Fatalf("after restart: expected name=Alice, got rows=%v", rows) - } -} diff --git a/internal/storage/filestore/filestore_test.go b/internal/storage/filestore/filestore_test.go deleted file mode 100644 index ea60579..0000000 --- a/internal/storage/filestore/filestore_test.go +++ /dev/null @@ -1,368 +0,0 @@ -package filestore - -import ( - "errors" - "goDB/internal/sql" - "os" - "path/filepath" - "testing" -) - -// Basic: create table, verify file exists, read schema. -func TestFilestore_CreateTableAndSchema(t *testing.T) { - dir := t.TempDir() - - fs, err := New(dir) - if err != nil { - t.Fatalf("New failed: %v", err) - } - - cols := []sql.Column{ - {Name: "id", Type: sql.TypeInt}, - {Name: "name", Type: sql.TypeString}, - } - - if err := fs.CreateTable("users", cols); err != nil { - t.Fatalf("CreateTable failed: %v", err) - } - - // ListTables - tables, err := fs.ListTables() - if err != nil { - t.Fatalf("ListTables failed: %v", err) - } - if len(tables) != 1 || tables[0] != "users" { - t.Fatalf("unexpected tables: %v", tables) - } - - // Schema - schema, err := fs.TableSchema("users") - if err != nil { - t.Fatalf("TableSchema failed: %v", err) - } - if len(schema) != 2 || schema[0].Name != "id" || schema[1].Name != "name" { - t.Fatalf("unexpected schema: %v", schema) - } - - // And file must exist - path := filepath.Join(dir, "users.godb") - if _, err := filepath.Glob(path); err != nil { - t.Fatalf("file not created: %v", err) - } -} - -// Insert → Commit → Re-open → Scan -func TestFilestore_InsertAndScan(t *testing.T) { - dir := t.TempDir() - - fs, err := New(dir) - if err != nil { - t.Fatalf("New failed: %v", err) - } - - cols := []sql.Column{ - {Name: "id", Type: sql.TypeInt}, - {Name: "name", Type: sql.TypeString}, - } - - if err := fs.CreateTable("users", cols); err != nil { - t.Fatalf("CreateTable failed: %v", err) - } - - // Insert inside tx - tx, err := fs.Begin(false) - if err != nil { - t.Fatalf("Begin failed: %v", err) - } - - row := sql.Row{ - {Type: sql.TypeInt, I64: 1}, - {Type: sql.TypeString, S: "Alice"}, - } - - if err := tx.Insert("users", row); err != nil { - t.Fatalf("Insert failed: %v", err) - } - - if err := fs.Commit(tx); err != nil { - t.Fatalf("Commit failed: %v", err) - } - - // New tx (read-only) - tx2, err := fs.Begin(true) - if err != nil { - t.Fatalf("Begin2 failed: %v", err) - } - - names, rows, err := tx2.Scan("users") - if err != nil { - t.Fatalf("Scan failed: %v", err) - } - - if len(names) != 2 || names[0] != "id" || names[1] != "name" { - t.Fatalf("unexpected names: %v", names) - } - - if len(rows) != 1 { - t.Fatalf("expected 1 row, got %d", len(rows)) - } - - if rows[0][0].I64 != 1 || rows[0][1].S != "Alice" { - t.Fatalf("unexpected rows: %v", rows) - } -} - -// Test ReplaceAll -func TestFilestore_ReplaceAll(t *testing.T) { - dir := t.TempDir() - - fs, err := New(dir) - if err != nil { - t.Fatalf("New failed: %v", err) - } - - cols := []sql.Column{ - {Name: "id", Type: sql.TypeInt}, - {Name: "active", Type: sql.TypeBool}, - } - - if err := fs.CreateTable("flags", cols); err != nil { - t.Fatalf("CreateTable failed: %v", err) - } - - // Insert initial data - tx, err := fs.Begin(false) - if err != nil { - t.Fatalf("Begin failed: %v", err) - } - if err := tx.Insert("flags", sql.Row{{Type: sql.TypeInt, I64: 1}, {Type: sql.TypeBool, B: true}}); err != nil { - t.Fatalf("Insert1 failed: %v", err) - } - if err := tx.Insert("flags", sql.Row{{Type: sql.TypeInt, I64: 2}, {Type: sql.TypeBool, B: false}}); err != nil { - t.Fatalf("Insert2 failed: %v", err) - } - if err := fs.Commit(tx); err != nil { - t.Fatalf("Commit failed: %v", err) - } - - // Replace all - tx2, err := fs.Begin(false) - if err != nil { - t.Fatalf("Begin2 failed: %v", err) - } - newRows := []sql.Row{ - {{Type: sql.TypeInt, I64: 99}, {Type: sql.TypeBool, B: false}}, - } - if err := tx2.ReplaceAll("flags", newRows); err != nil { - t.Fatalf("ReplaceAll failed: %v", err) - } - if err := fs.Commit(tx2); err != nil { - t.Fatalf("Commit2 failed: %v", err) - } - - // Read back - tx3, err := fs.Begin(true) - if err != nil { - t.Fatalf("Begin3 failed: %v", err) - } - _, rows, err := tx3.Scan("flags") - if err != nil { - t.Fatalf("Scan failed: %v", err) - } - if len(rows) != 1 || rows[0][0].I64 != 99 { - t.Fatalf("unexpected rows: %v", rows) - } -} - -// Rollback does NOT undo writes (documented) -func TestFilestore_Rollback_NoUndo(t *testing.T) { - dir := t.TempDir() - fs, err := New(dir) - if err != nil { - t.Fatalf("New failed: %v", err) - } - - cols := []sql.Column{ - {Name: "id", Type: sql.TypeInt}, - } - - if err := fs.CreateTable("t", cols); err != nil { - t.Fatalf("CreateTable failed: %v", err) - } - - tx, err := fs.Begin(false) - if err != nil { - t.Fatalf("Begin failed: %v", err) - } - if err := tx.Insert("t", sql.Row{{Type: sql.TypeInt, I64: 1}}); err != nil { - t.Fatalf("Insert failed: %v", err) - } - if err := fs.Rollback(tx); err != nil { - t.Fatalf("Rollback failed: %v", err) - } // does NOT undo writes - - // Scan should still see row - tx2, err := fs.Begin(true) - if err != nil { - t.Fatalf("Begin2 failed: %v", err) - } - _, rows, err := tx2.Scan("t") - if err != nil { - t.Fatalf("Scan failed: %v", err) - } - if len(rows) != 1 { - t.Fatalf("expected 1 row, got %d", len(rows)) - } -} - -func TestFilestore_CommitRollbackValidation(t *testing.T) { - dir := t.TempDir() - fs, err := New(dir) - if err != nil { - t.Fatalf("New failed: %v", err) - } - - if err := fs.Commit(nil); err == nil { - t.Fatalf("expected error committing nil tx") - } - if err := fs.Rollback(nil); err == nil { - t.Fatalf("expected error rolling back nil tx") - } - - // Commit marks transaction closed. - tx, err := fs.Begin(false) - if err != nil { - t.Fatalf("Begin failed: %v", err) - } - if err := fs.Commit(tx); err != nil { - t.Fatalf("Commit failed: %v", err) - } - if err := fs.Commit(tx); err == nil { - t.Fatalf("expected commit on closed tx to fail") - } - - // Rollback also closes the transaction. - tx2, err := fs.Begin(false) - if err != nil { - t.Fatalf("Begin2 failed: %v", err) - } - if err := fs.Rollback(tx2); err != nil { - t.Fatalf("Rollback failed: %v", err) - } - if err := fs.Rollback(tx2); err == nil { - t.Fatalf("expected rollback on closed tx to fail") - } -} - -func TestFilestore_CreateTableTooManyColumns(t *testing.T) { - dir := t.TempDir() - fs, err := New(dir) - if err != nil { - t.Fatalf("New failed: %v", err) - } - - cols := make([]sql.Column, 0x10000) - for i := range cols { - cols[i] = sql.Column{Name: "c", Type: sql.TypeInt} - } - - err = fs.CreateTable("big", cols) - if err == nil { - t.Fatalf("expected error for too many columns") - } - - // Ensure the file is not left behind when table creation fails. - path := filepath.Join(dir, "big.godb") - if _, statErr := os.Stat(path); !errors.Is(statErr, os.ErrNotExist) { - t.Fatalf("table file should not remain after failure") - } -} - -func TestFilestore_CreateIndex(t *testing.T) { - dir := t.TempDir() - fs, err := New(dir) - if err != nil { - t.Fatalf("New failed: %v", err) - } - - cols := []sql.Column{{Name: "id", Type: sql.TypeInt}} - if err := fs.CreateTable("users", cols); err != nil { - t.Fatalf("CreateTable failed: %v", err) - } - - tx, err := fs.Begin(false) - if err != nil { - t.Fatalf("Begin failed: %v", err) - } - if err := tx.Insert("users", sql.Row{{Type: sql.TypeInt, I64: 10}}); err != nil { - t.Fatalf("Insert failed: %v", err) - } - if err := tx.Insert("users", sql.Row{{Type: sql.TypeInt, I64: 20}}); err != nil { - t.Fatalf("Insert failed: %v", err) - } - if err := fs.Commit(tx); err != nil { - t.Fatalf("Commit failed: %v", err) - } - - if err := fs.CreateIndex("idx_id", "users", "id"); err != nil { - t.Fatalf("CreateIndex failed: %v", err) - } - - // Verify index contents - bt, err := fs.indexMgr.OpenOrCreateIndex("users", "id") - if err != nil { - t.Fatalf("OpenOrCreateIndex failed: %v", err) - } - - rids, err := bt.Search(10) - if err != nil || len(rids) != 1 || rids[0].PageID != 0 || rids[0].SlotID != 0 { - t.Fatalf("index search for key 10 failed") - } - - rids, err = bt.Search(20) - if err != nil || len(rids) != 1 || rids[0].PageID != 0 || rids[0].SlotID != 1 { - t.Fatalf("index search for key 20 failed") - } - - // Insert a new row and check if the index is updated - tx2, err := fs.Begin(false) - if err != nil { - t.Fatalf("Begin2 failed: %v", err) - } - if err := tx2.Insert("users", sql.Row{{Type: sql.TypeInt, I64: 30}}); err != nil { - t.Fatalf("Insert failed: %v", err) - } - if err := fs.Commit(tx2); err != nil { - t.Fatalf("Commit2 failed: %v", err) - } - - rids, err = bt.Search(30) - if err != nil || len(rids) != 1 || rids[0].PageID != 0 || rids[0].SlotID != 2 { - t.Fatalf("index search for key 30 failed after insert") - } -} - -func TestFilestore_CreateIndexErrors(t *testing.T) { - dir := t.TempDir() - fs, err := New(dir) - if err != nil { - t.Fatalf("New failed: %v", err) - } - - cols := []sql.Column{{Name: "id", Type: sql.TypeInt}, {Name: "name", Type: sql.TypeString}} - if err := fs.CreateTable("users", cols); err != nil { - t.Fatalf("CreateTable failed: %v", err) - } - - if err := fs.CreateIndex("idx_id", "users", "id"); err != nil { - t.Fatalf("CreateIndex failed: %v", err) - } - - if err := fs.CreateIndex("idx_id_dup", "users", "id"); err == nil { - t.Fatalf("expected duplicate index creation to fail") - } - - if err := fs.CreateIndex("idx_name", "users", "name"); err == nil { - t.Fatalf("expected non-integer column index creation to fail") - } -} diff --git a/internal/storage/filestore/format.go b/internal/storage/filestore/format.go deleted file mode 100644 index 991e4bd..0000000 --- a/internal/storage/filestore/format.go +++ /dev/null @@ -1,309 +0,0 @@ -package filestore - -import ( - "bytes" - "encoding/binary" - "fmt" - "goDB/internal/sql" - "io" - "math" -) - -const ( - fileMagic = "GODB1" // 5 bytes magic -) - -// writeHeader writes the table schema to the beginning of the file. -func writeHeader(w io.Writer, cols []sql.Column) error { - if len(cols) > 0xFFFF { - return fmt.Errorf("filestore: too many columns: %d", len(cols)) - } - // magic - if _, err := w.Write([]byte(fileMagic)); err != nil { - return err - } - // numCols as uint16 - if err := binary.Write(w, binary.LittleEndian, uint16(len(cols))); err != nil { - return err - } - - for _, c := range cols { - nameBytes := []byte(c.Name) - if len(nameBytes) > 0xFFFF { - return fmt.Errorf("column name too long: %s", c.Name) - } - // name length - if err := binary.Write(w, binary.LittleEndian, uint16(len(nameBytes))); err != nil { - return err - } - // name bytes - if _, err := w.Write(nameBytes); err != nil { - return err - } - // type as uint8 - if err := binary.Write(w, binary.LittleEndian, uint8(c.Type)); err != nil { - return err - } - } - - return nil -} - -// readHeader reads the schema from the beginning of the file and leaves -// the file position at the start of the first row. -func readHeader(r io.Reader) ([]sql.Column, error) { - magicBuf := make([]byte, len(fileMagic)) - if _, err := io.ReadFull(r, magicBuf); err != nil { - return nil, err - } - if string(magicBuf) != fileMagic { - return nil, fmt.Errorf("filestore: invalid file magic, not a GoDB table file") - } - - var numCols uint16 - if err := binary.Read(r, binary.LittleEndian, &numCols); err != nil { - return nil, err - } - - cols := make([]sql.Column, numCols) - for i := 0; i < int(numCols); i++ { - var nameLen uint16 - if err := binary.Read(r, binary.LittleEndian, &nameLen); err != nil { - return nil, err - } - - nameBytes := make([]byte, nameLen) - if _, err := io.ReadFull(r, nameBytes); err != nil { - return nil, err - } - - var t uint8 - if err := binary.Read(r, binary.LittleEndian, &t); err != nil { - return nil, err - } - - cols[i] = sql.Column{ - Name: string(nameBytes), - Type: sql.DataType(t), - } - } - - return cols, nil -} - -// writeRow encodes a row as a sequence of typed values. -func writeRow(w io.Writer, row sql.Row) error { - for _, v := range row { - // type first - if err := binary.Write(w, binary.LittleEndian, uint8(v.Type)); err != nil { - return err - } - - switch v.Type { - case sql.TypeInt: - if err := binary.Write(w, binary.LittleEndian, v.I64); err != nil { - return err - } - case sql.TypeFloat: - if err := binary.Write(w, binary.LittleEndian, v.F64); err != nil { - return err - } - case sql.TypeString: - b := []byte(v.S) - if len(b) > 0xFFFFFFFF { - return fmt.Errorf("string too long") - } - if err := binary.Write(w, binary.LittleEndian, uint32(len(b))); err != nil { - return err - } - if _, err := w.Write(b); err != nil { - return err - } - case sql.TypeBool: - var b byte - if v.B { - b = 1 - } - if err := binary.Write(w, binary.LittleEndian, b); err != nil { - return err - } - case sql.TypeNull: - // nothing else to write - default: - return fmt.Errorf("writeRow: unsupported value type %v", v.Type) - } - } - - return nil -} - -// readRow decodes a row with the given number of columns. -// Returns io.EOF when there is no more data. -func readRow(r io.Reader, numCols int) (sql.Row, error) { - row := make(sql.Row, numCols) - - for i := 0; i < numCols; i++ { - var t uint8 - if err := binary.Read(r, binary.LittleEndian, &t); err != nil { - if err == io.EOF || err == io.ErrUnexpectedEOF { - // if we hit EOF at first column, propagate EOF; - // if we hit mid-row, treat as error. - if i == 0 { - return nil, io.EOF - } - return nil, fmt.Errorf("readRow: truncated row") - } - return nil, err - } - vt := sql.DataType(t) - - switch vt { - case sql.TypeInt: - var v int64 - if err := binary.Read(r, binary.LittleEndian, &v); err != nil { - return nil, err - } - row[i] = sql.Value{Type: sql.TypeInt, I64: v} - - case sql.TypeFloat: - var v float64 - if err := binary.Read(r, binary.LittleEndian, &v); err != nil { - return nil, err - } - row[i] = sql.Value{Type: sql.TypeFloat, F64: v} - - case sql.TypeString: - var l uint32 - if err := binary.Read(r, binary.LittleEndian, &l); err != nil { - return nil, err - } - buf := make([]byte, l) - if _, err := io.ReadFull(r, buf); err != nil { - return nil, err - } - row[i] = sql.Value{Type: sql.TypeString, S: string(buf)} - - case sql.TypeBool: - var b byte - if err := binary.Read(r, binary.LittleEndian, &b); err != nil { - return nil, err - } - row[i] = sql.Value{Type: sql.TypeBool, B: b != 0} - - case sql.TypeNull: - row[i] = sql.Value{Type: sql.TypeNull} - - default: - return nil, fmt.Errorf("readRow: unsupported value type %v", vt) - } - } - - return row, nil -} - -// readRowFromBytes decodes a row from a byte slice, given numCols. -// It's the same encoding as readRow, but works on a buffer instead of io.Reader. -func readRowFromBytes(buf []byte, numCols int) (sql.Row, error) { - row := make(sql.Row, numCols) - offset := 0 - - readByte := func() (byte, error) { - if offset >= len(buf) { - return 0, fmt.Errorf("readRowFromBytes: unexpected end of buffer") - } - b := buf[offset] - offset++ - return b, nil - } - - _ = func() (uint16, error) { - if offset+2 > len(buf) { - return 0, fmt.Errorf("readRowFromBytes: unexpected end of buffer") - } - v := binary.LittleEndian.Uint16(buf[offset : offset+2]) - offset += 2 - return v, nil - } - - readUint32 := func() (uint32, error) { - if offset+4 > len(buf) { - return 0, fmt.Errorf("readRowFromBytes: unexpected end of buffer") - } - v := binary.LittleEndian.Uint32(buf[offset : offset+4]) - offset += 4 - return v, nil - } - - readInt64 := func() (int64, error) { - if offset+8 > len(buf) { - return 0, fmt.Errorf("readRowFromBytes: unexpected end of buffer") - } - v := int64(binary.LittleEndian.Uint64(buf[offset : offset+8])) - offset += 8 - return v, nil - } - - readFloat64 := func() (float64, error) { - if offset+8 > len(buf) { - return 0, fmt.Errorf("readRowFromBytes: unexpected end of buffer") - } - bits := binary.LittleEndian.Uint64(buf[offset : offset+8]) - offset += 8 - return math.Float64frombits(bits), nil - } - - for i := 0; i < numCols; i++ { - tByte, err := readByte() - if err != nil { - return nil, err - } - vt := sql.DataType(tByte) - - switch vt { - case sql.TypeInt: - v, err := readInt64() - if err != nil { - return nil, err - } - row[i] = sql.Value{Type: sql.TypeInt, I64: v} - case sql.TypeFloat: - v, err := readFloat64() - if err != nil { - return nil, err - } - row[i] = sql.Value{Type: sql.TypeFloat, F64: v} - case sql.TypeString: - l, err := readUint32() - if err != nil { - return nil, err - } - if offset+int(l) > len(buf) { - return nil, fmt.Errorf("readRowFromBytes: invalid string length") - } - s := string(buf[offset : offset+int(l)]) - offset += int(l) - row[i] = sql.Value{Type: sql.TypeString, S: s} - case sql.TypeBool: - b, err := readByte() - if err != nil { - return nil, err - } - row[i] = sql.Value{Type: sql.TypeBool, B: b != 0} - case sql.TypeNull: - row[i] = sql.Value{Type: sql.TypeNull} - default: - return nil, fmt.Errorf("readRowFromBytes: unsupported type %v", vt) - } - } - - return row, nil -} - -// encodeRowToBytes encodes a row into a byte slice using the same format as writeRow. -func encodeRowToBytes(row sql.Row) ([]byte, error) { - var buf bytes.Buffer - if err := writeRow(&buf, row); err != nil { - return nil, err - } - return buf.Bytes(), nil -} diff --git a/internal/storage/filestore/page.go b/internal/storage/filestore/page.go deleted file mode 100644 index 19f724f..0000000 --- a/internal/storage/filestore/page.go +++ /dev/null @@ -1,224 +0,0 @@ -package filestore - -import ( - "encoding/binary" - "fmt" - "goDB/internal/sql" -) - -const ( - PageSize = 4096 - - pageMagic = "GPG1" // GoDB Page v1 - - pageTypeHeap uint8 = 1 -) - -// Page header layout (on disk): -// -// offset size field -// 0 4 magic "GPG1" -// 4 4 pageID (uint32) -// 8 1 pageType (1 = heap) -// 9 1 reserved -// 10 2 numSlots (uint16) -// 12 2 freeStart (uint16) - where next row bytes can be written -// 14 2 reserved -// 16.. row area... -// -// Slot directory is at the end of the page, each slot 4 bytes: -// [offset uint16][length uint16] -// -// Invariants: -// freeStart <= PageSize - numSlots*4 -// slot i is located at: PageSize - (i+1)*4 -// deleted slot: offset == 0xFFFF -// - -// pageBuf is a 4KB page in memory. -type pageBuf []byte - -// newEmptyHeapPage initializes a new heap page with given pageID. -func newEmptyHeapPage(pageID uint32) pageBuf { - buf := make([]byte, PageSize) - // magic - copy(buf[0:4], []byte(pageMagic)) - // pageID - binary.LittleEndian.PutUint32(buf[4:8], pageID) - // pageType - buf[8] = pageTypeHeap - // numSlots = 0 - binary.LittleEndian.PutUint16(buf[10:12], 0) - // freeStart = header end (16) - binary.LittleEndian.PutUint16(buf[12:14], 16) - return buf -} - -func (p pageBuf) pageID() uint32 { - return binary.LittleEndian.Uint32(p[4:8]) -} - -func (p pageBuf) numSlots() uint16 { - return binary.LittleEndian.Uint16(p[10:12]) -} - -func (p pageBuf) setNumSlots(n uint16) { - binary.LittleEndian.PutUint16(p[10:12], n) -} - -func (p pageBuf) freeStart() uint16 { - return binary.LittleEndian.Uint16(p[12:14]) -} - -func (p pageBuf) setFreeStart(off uint16) { - binary.LittleEndian.PutUint16(p[12:14], off) -} - -// slotPos returns the byte index in the page of slot i (0-based). -func slotPos(i uint16) int { - return PageSize - int(i+1)*4 -} - -// getSlot reads slot i (0-based): (offset, length). -func (p pageBuf) getSlot(i uint16) (uint16, uint16) { - pos := slotPos(i) - off := binary.LittleEndian.Uint16(p[pos : pos+2]) - length := binary.LittleEndian.Uint16(p[pos+2 : pos+4]) - return off, length -} - -// setSlot writes slot i (0-based). -func (p pageBuf) setSlot(i uint16, off, length uint16) { - pos := slotPos(i) - binary.LittleEndian.PutUint16(p[pos:pos+2], off) - binary.LittleEndian.PutUint16(p[pos+2:pos+4], length) -} - -// insertRow tries to place an encoded row into the page. -// Returns (slotIndex, error). If there's not enough space, returns error. -func (p pageBuf) insertRow(rowBytes []byte) (uint16, error) { - nSlots := p.numSlots() - freeStart := p.freeStart() - - rowLen := uint16(len(rowBytes)) - - // Check if we have a deleted slot we can reuse. - var reuseSlot *uint16 - for i := uint16(0); i < nSlots; i++ { - off, length := p.getSlot(i) - if off == 0xFFFF && length == 0 { - reuseSlot = &i - break - } - } - - neededForRow := int(rowLen) - neededForNewSlot := 4 // each slot: offset uint16 + length uint16 - - // Compute how much space we need in total - needed := neededForRow - if reuseSlot == nil { - needed += neededForNewSlot - } - - // Current free end = start of slot directory - freeEnd := PageSize - int(nSlots)*4 - - if int(freeStart)+needed > freeEnd { - return 0, fmt.Errorf("page: not enough free space") - } - - // Write row bytes at freeStart - copy(p[freeStart:int(freeStart)+len(rowBytes)], rowBytes) - - var slotIdx uint16 - if reuseSlot != nil { - slotIdx = *reuseSlot - } else { - slotIdx = nSlots - p.setNumSlots(nSlots + 1) - } - - // Point slot to row - p.setSlot(slotIdx, freeStart, rowLen) - p.setFreeStart(freeStart + rowLen) - - return slotIdx, nil -} - -// iterateRows calls fn(slotIndex, row) for each non-deleted row in order. -func (p pageBuf) iterateRows(numCols int, fn func(slot uint16, row sql.Row) error) error { - nSlots := p.numSlots() - for i := uint16(0); i < nSlots; i++ { - off, length := p.getSlot(i) - if off == 0xFFFF || length == 0 { - // deleted / empty slot - continue - } - start := int(off) - end := int(off) + int(length) - if end > len(p) { - return fmt.Errorf("page: corrupt slot %d", i) - } - rowBytes := p[start:end] - // decode rowBytes using readRowFromBytes (we'll add this helper) - row, err := readRowFromBytes(rowBytes, numCols) - if err != nil { - return fmt.Errorf("page: read row at slot %d: %w", i, err) - } - if err := fn(i, row); err != nil { - return err - } - } - return nil -} - -func (p pageBuf) deleteSlot(i uint16) { - // Capture existing offset/length so we can reclaim trailing space if possible. - off, length := p.getSlot(i) - - // Mark as deleted. We use 0xFFFF/0 as the “tombstone” value. - p.setSlot(i, 0xFFFF, 0) - - // If this row occupied the contiguous end of the in-use area, rewind freeStart - // to reclaim space. We walk backwards through rows that end at the current - // freeStart so consecutive deletions reclaim space in order of most recent - // inserts. - freeStart := p.freeStart() - if off != 0xFFFF && length != 0 { - if end := off + length; end == freeStart { - newFreeStart := off - for { - progressed := false - for idx := uint16(0); idx < p.numSlots(); idx++ { - o, l := p.getSlot(idx) - if o == 0xFFFF || l == 0 { - continue - } - if o+l == newFreeStart { - newFreeStart = o - progressed = true - } - } - if !progressed { - break - } - } - p.setFreeStart(newFreeStart) - } - } - - // Shrink slot directory by dropping tombstones at the end. This allows - // future inserts to reclaim the slot-directory space in addition to row data. - nSlots := p.numSlots() - for nSlots > 0 { - lastIdx := nSlots - 1 - o, l := p.getSlot(lastIdx) - if o == 0xFFFF && l == 0 { - nSlots-- - p.setNumSlots(nSlots) - continue - } - break - } -} diff --git a/internal/storage/filestore/page_test.go b/internal/storage/filestore/page_test.go deleted file mode 100644 index 8b5bd8e..0000000 --- a/internal/storage/filestore/page_test.go +++ /dev/null @@ -1,150 +0,0 @@ -package filestore - -import ( - "bytes" - "goDB/internal/sql" - "testing" -) - -// helper: encode a row into []byte using the same format as writeRow/readRow. -func encodeRow(t *testing.T, row sql.Row) []byte { - t.Helper() - var buf bytes.Buffer - if err := writeRow(&buf, row); err != nil { - t.Fatalf("writeRow failed: %v", err) - } - return buf.Bytes() -} - -func TestPage_InsertAndIterateRows(t *testing.T) { - // simple schema: 3 columns - numCols := 3 - - p := newEmptyHeapPage(1) - - row1 := sql.Row{ - {Type: sql.TypeInt, I64: 1}, - {Type: sql.TypeString, S: "Alice"}, - {Type: sql.TypeBool, B: true}, - } - row2 := sql.Row{ - {Type: sql.TypeInt, I64: 2}, - {Type: sql.TypeString, S: "Bob"}, - {Type: sql.TypeBool, B: false}, - } - - // insert two rows - slot0, err := p.insertRow(encodeRow(t, row1)) - if err != nil { - t.Fatalf("insertRow(row1) failed: %v", err) - } - if slot0 != 0 { - t.Fatalf("expected first slot index 0, got %d", slot0) - } - - slot1, err := p.insertRow(encodeRow(t, row2)) - if err != nil { - t.Fatalf("insertRow(row2) failed: %v", err) - } - if slot1 != 1 { - t.Fatalf("expected second slot index 1, got %d", slot1) - } - - if p.numSlots() != 2 { - t.Fatalf("expected numSlots=2, got %d", p.numSlots()) - } - - // iterate and collect rows - var got []sql.Row - err = p.iterateRows(numCols, func(slot uint16, r sql.Row) error { - got = append(got, r) - return nil - }) - if err != nil { - t.Fatalf("iterateRows failed: %v", err) - } - - if len(got) != 2 { - t.Fatalf("expected 2 rows, got %d", len(got)) - } - - // verify content and order - if got[0][0].I64 != 1 || got[0][1].S != "Alice" || !got[0][2].B { - t.Fatalf("unexpected first row: %+v", got[0]) - } - if got[1][0].I64 != 2 || got[1][1].S != "Bob" || got[1][2].B { - t.Fatalf("unexpected second row: %+v", got[1]) - } -} - -func TestPage_NotEnoughSpace(t *testing.T) { - p := newEmptyHeapPage(1) - - // Make a big string so that we can almost fill the page. - // We don't need exact numbers, just something large. - largeStr := make([]byte, 3000) - for i := range largeStr { - largeStr[i] = 'x' - } - - row := sql.Row{ - {Type: sql.TypeInt, I64: 1}, - {Type: sql.TypeString, S: string(largeStr)}, - } - - // Insert until we run out of space. - var count int - for { - _, err := p.insertRow(encodeRow(t, row)) - if err != nil { - // we expect to hit "not enough free space" eventually - break - } - count++ - } - - if count == 0 { - t.Fatalf("expected at least one row to fit into the page, got 0") - } -} - -func TestPage_DeletedSlotIsSkipped(t *testing.T) { - numCols := 2 - p := newEmptyHeapPage(1) - - row1 := sql.Row{ - {Type: sql.TypeInt, I64: 1}, - {Type: sql.TypeString, S: "Alice"}, - } - row2 := sql.Row{ - {Type: sql.TypeInt, I64: 2}, - {Type: sql.TypeString, S: "Bob"}, - } - - // Insert two rows -> slots 0 and 1 - if _, err := p.insertRow(encodeRow(t, row1)); err != nil { - t.Fatalf("insertRow(row1) failed: %v", err) - } - if _, err := p.insertRow(encodeRow(t, row2)); err != nil { - t.Fatalf("insertRow(row2) failed: %v", err) - } - - // Simulate deletion of slot 0 by marking it as deleted. - p.setSlot(0, 0xFFFF, 0) - - var got []sql.Row - err := p.iterateRows(numCols, func(slot uint16, r sql.Row) error { - got = append(got, r) - return nil - }) - if err != nil { - t.Fatalf("iterateRows failed: %v", err) - } - - if len(got) != 1 { - t.Fatalf("expected 1 visible row after deletion, got %d", len(got)) - } - if got[0][0].I64 != 2 || got[0][1].S != "Bob" { - t.Fatalf("unexpected remaining row: %+v", got[0]) - } -} diff --git a/internal/storage/filestore/recovery.go b/internal/storage/filestore/recovery.go deleted file mode 100644 index a54dff5..0000000 --- a/internal/storage/filestore/recovery.go +++ /dev/null @@ -1,370 +0,0 @@ -package filestore - -import ( - "encoding/binary" - "fmt" - "goDB/internal/sql" - "goDB/internal/storage" - "io" - "os" - "path/filepath" -) - -type walOpType int - -const ( - walOpInsert walOpType = iota - walOpReplaceAll - walOpDelete - walOpUpdate -) - -type walOp struct { - typ walOpType - table string - rows []sql.Row // semantics depend on typ: - // Insert: rows = [row1, row2, ...] - // ReplaceAll: rows = full table snapshot - // Delete: rows = [row1, row2, ...] to remove - // Update: rows = [old1, new1, old2, new2, ...] -} - -type walTxState struct { - id uint64 - ops []walOp - committed bool - rolled bool - order int -} - -func (e *FileEngine) recoverFromWAL() error { - walPath := filepath.Join(e.dir, "wal.log") - - info, err := os.Stat(walPath) - if err != nil { - if os.IsNotExist(err) { - return nil // no WAL, nothing to recover - } - return fmt.Errorf("recovery: stat WAL: %w", err) - } - - if info.Size() <= int64(len(walMagic)) { - return nil // WAL only has magic, no records - } - - // 1) Load schemas for all existing tables - tableNames, err := e.ListTables() - if err != nil { - return fmt.Errorf("recovery: list tables: %w", err) - } - - schemas := make(map[string][]sql.Column) - for _, t := range tableNames { - cols, err := e.TableSchema(t) - if err != nil { - return fmt.Errorf("recovery: read schema for %q: %w", t, err) - } - schemas[t] = cols - } - - // 2) Truncate data for all tables (keep header) - for _, t := range tableNames { - path := e.tablePath(t) - f, err := os.OpenFile(path, os.O_RDWR, 0o644) - if err != nil { - return fmt.Errorf("recovery: open table %q: %w", t, err) - } - - cols := schemas[t] - - if err := f.Truncate(0); err != nil { - f.Close() - return fmt.Errorf("recovery: truncate table %q: %w", t, err) - } - if _, err := f.Seek(0, io.SeekStart); err != nil { - f.Close() - return fmt.Errorf("recovery: seek table %q: %w", t, err) - } - if err := writeHeader(f, cols); err != nil { - f.Close() - return fmt.Errorf("recovery: write header for %q: %w", t, err) - } - f.Close() - } - - // 3) Parse WAL into txStates - f, err := os.Open(walPath) - if err != nil { - return fmt.Errorf("recovery: open WAL: %w", err) - } - defer f.Close() - - // skip magic - if _, err := f.Seek(int64(len(walMagic)), io.SeekStart); err != nil { - return fmt.Errorf("recovery: seek WAL: %w", err) - } - - txStates := make(map[uint64]*walTxState) - var txOrder []uint64 - getTx := func(id uint64) *walTxState { - if s, ok := txStates[id]; ok { - return s - } - s := &walTxState{id: id, order: len(txOrder)} - txStates[id] = s - txOrder = append(txOrder, id) - return s - } - - for { - var recType uint8 - if err := binary.Read(f, binary.LittleEndian, &recType); err != nil { - if err == io.EOF { - break - } - return fmt.Errorf("recovery: read recType: %w", err) - } - - var txID uint64 - if err := binary.Read(f, binary.LittleEndian, &txID); err != nil { - return fmt.Errorf("recovery: read txID: %w", err) - } - txState := getTx(txID) - - switch recType { - case walRecBegin: - // nothing extra - case walRecCommit: - txState.committed = true - case walRecRollback: - txState.rolled = true - - case walRecInsert, walRecReplaceAll, walRecDelete, walRecUpdate: - // common header: table name + rowCount - var nameLen uint16 - if err := binary.Read(f, binary.LittleEndian, &nameLen); err != nil { - return fmt.Errorf("recovery: read table name len: %w", err) - } - nameBytes := make([]byte, nameLen) - if _, err := io.ReadFull(f, nameBytes); err != nil { - return fmt.Errorf("recovery: read table name: %w", err) - } - table := string(nameBytes) - - var rowCount uint32 - if err := binary.Read(f, binary.LittleEndian, &rowCount); err != nil { - return fmt.Errorf("recovery: read rowCount: %w", err) - } - - cols, ok := schemas[table] - if !ok { - return fmt.Errorf("recovery: table %q in WAL but not in schema map", table) - } - - rows := make([]sql.Row, 0, rowCount) - for i := uint32(0); i < rowCount; i++ { - r, err := readRow(f, len(cols)) - if err != nil { - return fmt.Errorf("recovery: read row: %w", err) - } - rows = append(rows, r) - } - - var opType walOpType - switch recType { - case walRecInsert: - opType = walOpInsert - case walRecReplaceAll: - opType = walOpReplaceAll - case walRecDelete: - opType = walOpDelete - case walRecUpdate: - opType = walOpUpdate - } - - txState.ops = append(txState.ops, walOp{ - typ: opType, - table: table, - rows: rows, - }) - - default: - return fmt.Errorf("recovery: unknown WAL record type %d", recType) - } - } - - // 4) Replay committed txs into an in-memory view of each table - rowsByTable := make(map[string][]sql.Row) - - for _, txID := range txOrder { - s := txStates[txID] - if !s.committed || s.rolled { - continue - } - - for _, op := range s.ops { - switch op.typ { - case walOpInsert: - // Append rows - rowsByTable[op.table] = append(rowsByTable[op.table], op.rows...) - - case walOpReplaceAll: - // Replace full contents - copied := make([]sql.Row, len(op.rows)) - copy(copied, op.rows) - rowsByTable[op.table] = copied - - case walOpDelete: - // Remove matching rows (first match per entry) - cur := rowsByTable[op.table] - for _, delRow := range op.rows { - for i := 0; i < len(cur); i++ { - if equalRow(cur[i], delRow) { - cur = append(cur[:i], cur[i+1:]...) - break - } - } - } - rowsByTable[op.table] = cur - - case walOpUpdate: - // rows = [old1, new1, old2, new2, ...] - cur := rowsByTable[op.table] - if len(op.rows)%2 != 0 { - return fmt.Errorf("recovery: update op has odd rows length for table %q", op.table) - } - for i := 0; i < len(op.rows); i += 2 { - oldRow := op.rows[i] - newRow := op.rows[i+1] - - for j := 0; j < len(cur); j++ { - if equalRow(cur[j], oldRow) { - cur[j] = newRow - break - } - } - } - rowsByTable[op.table] = cur - } - } - } - - // 5) Write rebuilt contents back to disk via ReplaceAll (page-based) - for table, rows := range rowsByTable { - tx := &fileTx{ - eng: e, - readOnly: false, - closed: false, - id: 0, // don't log recovery writes into WAL - } - if err := tx.ReplaceAll(table, rows); err != nil { - return fmt.Errorf("recovery: rebuild table %q: %w", table, err) - } - } - - return nil -} - -func (e *FileEngine) applyTxOps(s *walTxState, schemas map[string][]sql.Column) error { - for _, op := range s.ops { - switch op.typ { - case walOpInsert: - path := e.tablePath(op.table) - f, err := os.OpenFile(path, os.O_RDWR, 0o644) - if err != nil { - return fmt.Errorf("recovery: open table %q for insert: %w", op.table, err) - } - - if _, err := f.Seek(0, io.SeekEnd); err != nil { - f.Close() - return fmt.Errorf("recovery: seek end for %q: %w", op.table, err) - } - for _, r := range op.rows { - if err := writeRow(f, r); err != nil { - f.Close() - return fmt.Errorf("recovery: write row for %q: %w", op.table, err) - } - } - f.Close() - - case walOpReplaceAll: - path := e.tablePath(op.table) - f, err := os.OpenFile(path, os.O_RDWR, 0o644) - if err != nil { - return fmt.Errorf("recovery: open table %q for replace: %w", op.table, err) - } - cols := schemas[op.table] - - if err := f.Truncate(0); err != nil { - f.Close() - return fmt.Errorf("recovery: truncate table %q: %w", op.table, err) - } - if _, err := f.Seek(0, io.SeekStart); err != nil { - f.Close() - return fmt.Errorf("recovery: seek table %q: %w", op.table, err) - } - if err := writeHeader(f, cols); err != nil { - f.Close() - return fmt.Errorf("recovery: write header for %q: %w", op.table, err) - } - for _, r := range op.rows { - if err := writeRow(f, r); err != nil { - f.Close() - return fmt.Errorf("recovery: write row for %q: %w", op.table, err) - } - } - f.Close() - } - } - return nil -} - -func (e *FileEngine) validateTx(tx storage.Tx) (*fileTx, error) { - if tx == nil { - return nil, fmt.Errorf("filestore: transaction is nil") - } - - ft, ok := tx.(*fileTx) - if !ok { - return nil, fmt.Errorf("filestore: invalid transaction type") - } - - if ft.closed { - return nil, fmt.Errorf("filestore: tx is closed") - } - - return ft, nil -} -func equalRow(a, b sql.Row) bool { - if len(a) != len(b) { - return false - } - for i := range a { - if a[i].Type != b[i].Type { - return false - } - switch a[i].Type { - case sql.TypeInt: - if a[i].I64 != b[i].I64 { - return false - } - case sql.TypeFloat: - if a[i].F64 != b[i].F64 { - return false - } - case sql.TypeString: - if a[i].S != b[i].S { - return false - } - case sql.TypeBool: - if a[i].B != b[i].B { - return false - } - case sql.TypeNull: - // all nulls equal - default: - return false - } - } - return true -} diff --git a/internal/storage/filestore/tx.go b/internal/storage/filestore/tx.go deleted file mode 100644 index 54d9ad2..0000000 --- a/internal/storage/filestore/tx.go +++ /dev/null @@ -1,628 +0,0 @@ -package filestore - -import ( - "fmt" - "goDB/internal/index/btree" - "goDB/internal/sql" - "goDB/internal/storage" - "io" - "os" - "strings" -) - -// fileTx implements storage.Tx for FileEngine. -type fileTx struct { - eng *FileEngine - readOnly bool - closed bool - id uint64 // 0 = no WAL tracking (read-only or not started) -} - -func (tx *fileTx) DeleteWhere(tableName string, pred storage.RowPredicate) error { - if tx.closed { - return fmt.Errorf("filestore: tx is closed") - } - if tx.readOnly { - return fmt.Errorf("filestore: cannot delete in read-only tx") - } - - path := tx.eng.tablePath(tableName) - f, err := os.OpenFile(path, os.O_RDWR, 0o644) - if err != nil { - return fmt.Errorf("filestore: open table for delete: %w", err) - } - defer f.Close() - - // Read header to get schema and header size. - cols, err := readHeader(f) - if err != nil { - return fmt.Errorf("filestore: read header in delete: %w", err) - } - headerEnd, err := f.Seek(0, io.SeekCurrent) - if err != nil { - return fmt.Errorf("filestore: seek after header in delete: %w", err) - } - - // Determine number of pages. - fi, err := f.Stat() - if err != nil { - return fmt.Errorf("filestore: stat table in delete: %w", err) - } - fileSize := fi.Size() - if fileSize < headerEnd { - return fmt.Errorf("filestore: corrupt file, size < header") - } - dataBytes := fileSize - headerEnd - if dataBytes == 0 { - // no pages, nothing to delete - return nil - } - if dataBytes%PageSize != 0 { - return fmt.Errorf("filestore: corrupt data in delete (not multiple of page size)") - } - numPages := uint32(dataBytes / PageSize) - - for pageID := uint32(0); pageID < numPages; pageID++ { - p := make(pageBuf, PageSize) - offset := headerEnd + int64(pageID)*PageSize - - if _, err := f.ReadAt(p, offset); err != nil { - return fmt.Errorf("filestore: read page %d in delete: %w", pageID, err) - } - - nSlots := p.numSlots() - for i := uint16(0); i < nSlots; i++ { - off, length := p.getSlot(i) - if off == 0xFFFF || length == 0 { - // already deleted / empty - continue - } - - start := int(off) - end := start + int(length) - if end > len(p) { - return fmt.Errorf("filestore: corrupt slot %d in delete", i) - } - - rowBytes := p[start:end] - row, err := readRowFromBytes(rowBytes, len(cols)) - if err != nil { - return fmt.Errorf("filestore: read row in delete: %w", err) - } - - match, err := pred(row) - if err != nil { - return err - } - if match { - // WAL: log delete - if !tx.readOnly && tx.id != 0 { - if err := tx.eng.wal.appendDelete(tx.id, tableName, row); err != nil { - return fmt.Errorf("filestore: WAL appendDelete: %w", err) - } - } - p.deleteSlot(i) - } - } - - // Write modified page back to disk. - if _, err := f.WriteAt(p, offset); err != nil { - return fmt.Errorf("filestore: write page %d in delete: %w", pageID, err) - } - } - - // NOTE: currently we do NOT log per-row deletes in WAL, so crash recovery - // may not restore these deletes. We’ll address WAL integration later. - return nil -} - -func (tx *fileTx) UpdateWhere(tableName string, pred storage.RowPredicate, updater storage.RowUpdater) error { - if tx.closed { - return fmt.Errorf("filestore: tx is closed") - } - if tx.readOnly { - return fmt.Errorf("filestore: cannot update in read-only tx") - } - - path := tx.eng.tablePath(tableName) - f, err := os.OpenFile(path, os.O_RDWR, 0o644) - if err != nil { - return fmt.Errorf("filestore: open table for update: %w", err) - } - defer f.Close() - - // Read table schema from header - cols, err := readHeader(f) - if err != nil { - return fmt.Errorf("filestore: read header in update: %w", err) - } - headerEnd, err := f.Seek(0, io.SeekCurrent) - if err != nil { - return fmt.Errorf("filestore: seek after header in update: %w", err) - } - - fi, err := f.Stat() - if err != nil { - return fmt.Errorf("filestore: stat table in update: %w", err) - } - fileSize := fi.Size() - if fileSize < headerEnd { - return fmt.Errorf("filestore: corrupt file, size < header") - } - dataBytes := fileSize - headerEnd - if dataBytes == 0 { - // no pages -> nothing to update - return nil - } - if dataBytes%PageSize != 0 { - return fmt.Errorf("filestore: corrupt data in update (not multiple of page size)") - } - numPages := uint32(dataBytes / PageSize) - - var extraRows []sql.Row // updated rows that no longer fit in place - - for pageID := uint32(0); pageID < numPages; pageID++ { - p := make(pageBuf, PageSize) - offset := headerEnd + int64(pageID)*PageSize - - if _, err := f.ReadAt(p, offset); err != nil { - return fmt.Errorf("filestore: read page %d in update: %w", pageID, err) - } - - nSlots := p.numSlots() - - for i := uint16(0); i < nSlots; i++ { - off, length := p.getSlot(i) - if off == 0xFFFF || length == 0 { - // deleted or empty - continue - } - - start := int(off) - end := start + int(length) - if end > len(p) { - return fmt.Errorf("filestore: corrupt slot %d in update", i) - } - - oldBytes := p[start:end] - oldRow, err := readRowFromBytes(oldBytes, len(cols)) - if err != nil { - return fmt.Errorf("filestore: read row in update: %w", err) - } - - match, err := pred(oldRow) - if err != nil { - return err - } - if !match { - continue - } - - // Apply updater on a copy so WAL retains the original values. - origRow := cloneRow(oldRow) - newRow, err := updater(cloneRow(oldRow)) - if err != nil { - return err - } - - newBytes, err := encodeRowToBytes(newRow) - if err != nil { - return fmt.Errorf("filestore: encode updated row: %w", err) - } - - if len(newBytes) <= int(length) { - // In-place update: log UPDATE, then overwrite. - if !tx.readOnly && tx.id != 0 { - if err := tx.eng.wal.appendUpdate(tx.id, tableName, origRow, newRow); err != nil { - return fmt.Errorf("filestore: WAL appendUpdate: %w", err) - } - } - - copy(p[start:start+len(newBytes)], newBytes) - p.setSlot(i, off, uint16(len(newBytes))) - } else { - // New row is larger: log DELETE(old), delete slot, and reinsert via Insert (which logs INSERT). - if !tx.readOnly && tx.id != 0 { - if err := tx.eng.wal.appendDelete(tx.id, tableName, origRow); err != nil { - return fmt.Errorf("filestore: WAL appendDelete (update-grow): %w", err) - } - } - - p.deleteSlot(i) - extraRows = append(extraRows, newRow) - } - - } - - // Write modified page back - if _, err := f.WriteAt(p, offset); err != nil { - return fmt.Errorf("filestore: write page %d in update: %w", pageID, err) - } - } - - // Reinsertion step for updated rows that did not fit in place. - for _, r := range extraRows { - if err := tx.Insert(tableName, r); err != nil { - return fmt.Errorf("filestore: insert expanded updated row: %w", err) - } - } - - return nil -} - -// Insert using a page structure -func (tx *fileTx) Insert(tableName string, row sql.Row) error { - if tx.closed { - return fmt.Errorf("filestore: tx is closed") - } - if tx.readOnly { - return fmt.Errorf("filestore: cannot insert in read-only transaction") - } - - if !tx.readOnly && tx.id != 0 { - if err := tx.eng.wal.appendInsert(tx.id, tableName, row); err != nil { - return fmt.Errorf("filestore: WAL appendInsert: %w", err) - } - } - - path := tx.eng.tablePath(tableName) - f, err := os.OpenFile(path, os.O_RDWR, 0o644) - if err != nil { - return fmt.Errorf("filestore: open table for insert: %w", err) - } - defer f.Close() - - cols, err := readHeader(f) - if err != nil { - return fmt.Errorf("filestore: read header in insert: %w", err) - } - if len(row) != len(cols) { - return fmt.Errorf("filestore: row has %d values, expected %d", len(row), len(cols)) - } - headerEnd, err := f.Seek(0, io.SeekCurrent) - if err != nil { - return fmt.Errorf("filestore: seek after header: %w", err) - } - - fi, err := f.Stat() - if err != nil { - return fmt.Errorf("filestore: stat table: %w", err) - } - fileSize := fi.Size() - if fileSize < headerEnd { - return fmt.Errorf("filestore: corrupt file, size < header") - } - - dataBytes := fileSize - headerEnd - var numPages uint32 - if dataBytes > 0 { - if dataBytes%PageSize != 0 { - return fmt.Errorf("filestore: corrupt data section (not multiple of page size)") - } - numPages = uint32(dataBytes / PageSize) - } else { - numPages = 0 - } - - rowBytes, err := encodeRowToBytes(row) - if err != nil { - return fmt.Errorf("filestore: encode row: %w", err) - } - - var pageID uint32 - var slotID uint16 - - writePage := func(id uint32, p pageBuf) error { - offset := headerEnd + int64(id)*PageSize - if _, err := f.WriteAt(p, offset); err != nil { - return fmt.Errorf("filestore: write page %d: %w", id, err) - } - return nil - } - - if numPages == 0 { - p := newEmptyHeapPage(0) - slotID, err = p.insertRow(rowBytes) - if err != nil { - return fmt.Errorf("filestore: insert into empty page: %w", err) - } - pageID = 0 - if err := writePage(pageID, p); err != nil { - return err - } - } else { - lastID := numPages - 1 - p := make(pageBuf, PageSize) - offset := headerEnd + int64(lastID)*PageSize - if _, err := f.ReadAt(p, offset); err != nil { - return fmt.Errorf("filestore: read last page: %w", err) - } - - slotID, err = p.insertRow(rowBytes) - if err == nil { - pageID = lastID - if err := writePage(pageID, p); err != nil { - return err - } - } else { - newID := numPages - p = newEmptyHeapPage(newID) - slotID, err = p.insertRow(rowBytes) - if err != nil { - return fmt.Errorf("filestore: insert into new page: %w", err) - } - pageID = newID - if err := writePage(pageID, p); err != nil { - return err - } - } - } - - // Update indexes - tx.eng.idxMu.RLock() - defer tx.eng.idxMu.RUnlock() - - if tableIndexes, ok := tx.eng.indexes[tableName]; ok { - for colIdx, col := range cols { - if idx, ok := tableIndexes[col.Name]; ok { - val := row[colIdx] - if val.Type != sql.TypeNull { - rid := btree.RID{PageID: pageID, SlotID: slotID} - if err := idx.btree.Insert(val.I64, rid); err != nil { - return fmt.Errorf("error updating index for column %q: %w", col.Name, err) - } - } - } - } - } - - return nil -} - -// Scan reads all rows from the table file. -func (tx *fileTx) Scan(tableName string) ([]string, []sql.Row, error) { - if tx.closed { - return nil, nil, fmt.Errorf("filestore: tx is closed") - } - - path := tx.eng.tablePath(tableName) - f, err := os.Open(path) - if err != nil { - return nil, nil, fmt.Errorf("filestore: open table for scan: %w", err) - } - defer f.Close() - - cols, err := readHeader(f) - if err != nil { - return nil, nil, fmt.Errorf("filestore: read header in scan: %w", err) - } - headerEnd, err := f.Seek(0, io.SeekCurrent) - if err != nil { - return nil, nil, fmt.Errorf("filestore: seek after header: %w", err) - } - - colNames := make([]string, len(cols)) - for i, c := range cols { - colNames[i] = c.Name - } - - fi, err := f.Stat() - if err != nil { - return nil, nil, fmt.Errorf("filestore: stat table in scan: %w", err) - } - fileSize := fi.Size() - if fileSize < headerEnd { - return nil, nil, fmt.Errorf("filestore: corrupt file, size < header") - } - dataBytes := fileSize - headerEnd - if dataBytes == 0 { - return colNames, nil, nil - } - if dataBytes%PageSize != 0 { - return nil, nil, fmt.Errorf("filestore: corrupt data (not multiple of page size)") - } - numPages := uint32(dataBytes / PageSize) - - var rows []sql.Row - for pageID := uint32(0); pageID < numPages; pageID++ { - p := make(pageBuf, PageSize) - offset := headerEnd + int64(pageID)*PageSize - if _, err := f.ReadAt(p, offset); err != nil { - return nil, nil, fmt.Errorf("filestore: read page %d: %w", pageID, err) - } - - err := p.iterateRows(len(cols), func(slot uint16, r sql.Row) error { - rows = append(rows, r) - return nil - }) - if err != nil { - return nil, nil, fmt.Errorf("filestore: iterate rows in page %d: %w", pageID, err) - } - } - - return colNames, rows, nil -} - -// ReplaceAll truncates the table file and rewrites header + rows. -func (tx *fileTx) ReplaceAll(tableName string, rows []sql.Row) error { - if tx.closed { - return fmt.Errorf("filestore: tx is closed") - } - if tx.readOnly { - return fmt.Errorf("filestore: cannot replace in read-only transaction") - } - - if !tx.readOnly && tx.id != 0 { - if err := tx.eng.wal.appendReplaceAll(tx.id, tableName, rows); err != nil { - return fmt.Errorf("filestore: WAL appendReplaceAll: %w", err) - } - } - - path := tx.eng.tablePath(tableName) - f, err := os.OpenFile(path, os.O_RDWR, 0o644) - if err != nil { - return fmt.Errorf("filestore: open table for replace: %w", err) - } - defer f.Close() - - cols, err := readHeader(f) - if err != nil { - return fmt.Errorf("filestore: read header in replace: %w", err) - } - headerEnd, err := f.Seek(0, io.SeekCurrent) - if err != nil { - return fmt.Errorf("filestore: seek after header in replace: %w", err) - } - if len(cols) == 0 { - return fmt.Errorf("filestore: replace on table %q with no columns", tableName) - } - - for i, r := range rows { - if len(r) != len(cols) { - return fmt.Errorf("filestore: replace row %d length mismatch: got %d, expected %d", - i, len(r), len(cols)) - } - } - - tx.eng.idxMu.RLock() - idxInfos, hasIndexes := tx.eng.indexes[tableName] - tx.eng.idxMu.RUnlock() - - indexColumns := make(map[int]*indexInfo) - if hasIndexes { - for colName, info := range idxInfos { - colIdx := -1 - for i, c := range cols { - if strings.EqualFold(c.Name, colName) { - colIdx = i - break - } - } - if colIdx == -1 { - return fmt.Errorf("filestore: index on unknown column %q for table %q", colName, tableName) - } - indexColumns[colIdx] = info - } - } - - oldKeys := make(map[int]map[btree.Key]struct{}) - if len(indexColumns) > 0 { - fi, err := f.Stat() - if err != nil { - return fmt.Errorf("filestore: stat table in replace: %w", err) - } - fileSize := fi.Size() - if fileSize < headerEnd { - return fmt.Errorf("filestore: corrupt file, size < header") - } - dataBytes := fileSize - headerEnd - if dataBytes%PageSize != 0 { - return fmt.Errorf("filestore: corrupt data in replace (not multiple of page size)") - } - numPages := uint32(dataBytes / PageSize) - - for pageID := uint32(0); pageID < numPages; pageID++ { - p := make(pageBuf, PageSize) - offset := headerEnd + int64(pageID)*PageSize - if _, err := f.ReadAt(p, offset); err != nil { - return fmt.Errorf("filestore: read page %d in replace: %w", pageID, err) - } - - if err := p.iterateRows(len(cols), func(_ uint16, r sql.Row) error { - for colIdx := range indexColumns { - val := r[colIdx] - if val.Type == sql.TypeNull { - continue - } - if oldKeys[colIdx] == nil { - oldKeys[colIdx] = make(map[btree.Key]struct{}) - } - oldKeys[colIdx][val.I64] = struct{}{} - } - return nil - }); err != nil { - return err - } - } - } - - for colIdx, keys := range oldKeys { - idx := indexColumns[colIdx] - for key := range keys { - if err := idx.btree.DeleteKey(key); err != nil { - return fmt.Errorf("filestore: clear index %q: %w", idx.name, err) - } - } - } - - if err := f.Truncate(0); err != nil { - return fmt.Errorf("filestore: truncate in replace: %w", err) - } - if _, err := f.Seek(0, io.SeekStart); err != nil { - return fmt.Errorf("filestore: seek start in replace: %w", err) - } - if err := writeHeader(f, cols); err != nil { - return fmt.Errorf("filestore: write header in replace: %w", err) - } - headerEnd, err = f.Seek(0, io.SeekCurrent) - if err != nil { - return fmt.Errorf("filestore: seek after header in replace: %w", err) - } - - pageID := uint32(0) - p := newEmptyHeapPage(pageID) - - writePage := func(id uint32, pg pageBuf) error { - offset := headerEnd + int64(id)*PageSize - if _, err := f.WriteAt(pg, offset); err != nil { - return fmt.Errorf("filestore: write page %d in replace: %w", id, err) - } - return nil - } - - for _, r := range rows { - rowBytes, err := encodeRowToBytes(r) - if err != nil { - return fmt.Errorf("filestore: encode row in replace: %w", err) - } - - var slotID uint16 - slotID, err = p.insertRow(rowBytes) - if err != nil { - if err := writePage(pageID, p); err != nil { - return err - } - pageID++ - p = newEmptyHeapPage(pageID) - slotID, err = p.insertRow(rowBytes) - if err != nil { - return fmt.Errorf("filestore: insert into new page in replace: %w", err) - } - } - - for colIdx, idx := range indexColumns { - val := r[colIdx] - if val.Type == sql.TypeNull { - continue - } - rid := btree.RID{PageID: pageID, SlotID: slotID} - if err := idx.btree.Insert(val.I64, rid); err != nil { - return fmt.Errorf("filestore: update index %q in replace: %w", idx.name, err) - } - } - } - - if len(rows) > 0 { - if err := writePage(pageID, p); err != nil { - return err - } - } - - return nil -} - -func cloneRow(r sql.Row) sql.Row { - dup := make(sql.Row, len(r)) - copy(dup, r) - return dup -} diff --git a/internal/storage/filestore/wal.go b/internal/storage/filestore/wal.go deleted file mode 100644 index f8eb042..0000000 --- a/internal/storage/filestore/wal.go +++ /dev/null @@ -1,258 +0,0 @@ -package filestore - -import ( - "encoding/binary" - "fmt" - "goDB/internal/sql" - "io" - "os" - "path/filepath" - "sync" -) - -// WAL file format (version 2): -// -// magic: "GODBWAL2" (8 bytes) -// -// then a sequence of records: -// recType: uint8 -// txID: uint64 -// ... type-specific payload ... -// -// Types: -// BEGIN: recType = 1, payload: none -// COMMIT: recType = 2, payload: none -// ROLLBACK: recType = 3, payload: none -// INSERT: recType = 4, payload: -// tableNameLen: uint16 -// tableName: bytes -// rowCount: uint32 (must be 1 for INSERT) -// row data: encoded row (see writeRow) -// REPLACEALL: recType = 5, payload: -// tableNameLen: uint16 -// tableName: bytes -// rowCount: uint32 -// row data: repeated rowCount times - -const ( - walMagic = "GODBWAL2" - - walRecBegin uint8 = 1 - walRecCommit uint8 = 2 - walRecRollback uint8 = 3 - walRecInsert uint8 = 4 - walRecReplaceAll uint8 = 5 - walRecDelete uint8 = 6 - walRecUpdate uint8 = 7 -) - -// walLogger is a simple append-only WAL writer. -type walLogger struct { - mu sync.Mutex - f *os.File - path string -} - -// newWAL opens or creates WAL file and ensures correct magic header. -func newWAL(dir string) (*walLogger, error) { - path := filepath.Join(dir, "wal.log") - - f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o644) - if err != nil { - return nil, fmt.Errorf("wal: open: %w", err) - } - - info, err := f.Stat() - if err != nil { - f.Close() - return nil, fmt.Errorf("wal: stat: %w", err) - } - - if info.Size() == 0 { - // new file -> write magic - if _, err := f.Write([]byte(walMagic)); err != nil { - f.Close() - return nil, fmt.Errorf("wal: write magic: %w", err) - } - } else { - // existing file -> verify magic - magicBuf := make([]byte, len(walMagic)) - if _, err := f.ReadAt(magicBuf, 0); err != nil { - f.Close() - return nil, fmt.Errorf("wal: read magic: %w", err) - } - if string(magicBuf) != walMagic { - f.Close() - return nil, fmt.Errorf("wal: invalid magic, not a GoDB WAL v2 file") - } - } - - // Seek to end for appends - if _, err := f.Seek(0, io.SeekEnd); err != nil { - f.Close() - return nil, fmt.Errorf("wal: seek end: %w", err) - } - - return &walLogger{ - f: f, - path: path, - }, nil -} - -// Close closes the WAL file. -func (w *walLogger) Close() error { - w.mu.Lock() - defer w.mu.Unlock() - if w.f == nil { - return nil - } - err := w.f.Close() - w.f = nil - return err -} - -// Sync flushes WAL to disk. -func (w *walLogger) Sync() error { - w.mu.Lock() - defer w.mu.Unlock() - if w.f == nil { - return fmt.Errorf("wal: closed") - } - return w.f.Sync() -} - -// appendBegin writes a BEGIN record for txID. -func (w *walLogger) appendBegin(txID uint64) error { - return w.appendNoPayload(walRecBegin, txID) -} - -// appendCommit writes a COMMIT record for txID. -func (w *walLogger) appendCommit(txID uint64) error { - return w.appendNoPayload(walRecCommit, txID) -} - -// appendRollback writes a ROLLBACK record for txID. -func (w *walLogger) appendRollback(txID uint64) error { - return w.appendNoPayload(walRecRollback, txID) -} - -func (w *walLogger) appendNoPayload(recType uint8, txID uint64) error { - w.mu.Lock() - defer w.mu.Unlock() - if w.f == nil { - return fmt.Errorf("wal: closed") - } - - // recType - if err := binary.Write(w.f, binary.LittleEndian, recType); err != nil { - return err - } - // txID - if err := binary.Write(w.f, binary.LittleEndian, txID); err != nil { - return err - } - return nil -} - -// appendInsert logs an INSERT record for txID. -func (w *walLogger) appendInsert(txID uint64, table string, row sql.Row) error { - w.mu.Lock() - defer w.mu.Unlock() - if w.f == nil { - return fmt.Errorf("wal: closed") - } - - if err := w.writeRecordHeader(txID, walRecInsert, table, 1); err != nil { - return err - } - if err := writeRow(w.f, row); err != nil { - return fmt.Errorf("wal: write row: %w", err) - } - return nil -} - -// appendReplaceAll logs a REPLACEALL record for txID. -func (w *walLogger) appendReplaceAll(txID uint64, table string, rows []sql.Row) error { - w.mu.Lock() - defer w.mu.Unlock() - if w.f == nil { - return fmt.Errorf("wal: closed") - } - - if err := w.writeRecordHeader(txID, walRecReplaceAll, table, len(rows)); err != nil { - return err - } - for _, r := range rows { - if err := writeRow(w.f, r); err != nil { - return fmt.Errorf("wal: write row: %w", err) - } - } - return nil -} - -func (w *walLogger) writeRecordHeader(txID uint64, recType uint8, table string, rowCount int) error { - if w.f == nil { - return fmt.Errorf("wal: closed") - } - - // recType - if err := binary.Write(w.f, binary.LittleEndian, recType); err != nil { - return err - } - // txID - if err := binary.Write(w.f, binary.LittleEndian, txID); err != nil { - return err - } - - nameBytes := []byte(table) - if len(nameBytes) > 0xFFFF { - return fmt.Errorf("wal: table name too long") - } - if err := binary.Write(w.f, binary.LittleEndian, uint16(len(nameBytes))); err != nil { - return err - } - if _, err := w.f.Write(nameBytes); err != nil { - return err - } - - if err := binary.Write(w.f, binary.LittleEndian, uint32(rowCount)); err != nil { - return err - } - - return nil -} -func (w *walLogger) appendDelete(txID uint64, table string, row sql.Row) error { - w.mu.Lock() - defer w.mu.Unlock() - if w.f == nil { - return fmt.Errorf("wal: closed") - } - - if err := w.writeRecordHeader(txID, walRecDelete, table, 1); err != nil { - return err - } - if err := writeRow(w.f, row); err != nil { - return fmt.Errorf("wal: write delete row: %w", err) - } - return nil -} - -func (w *walLogger) appendUpdate(txID uint64, table string, oldRow, newRow sql.Row) error { - w.mu.Lock() - defer w.mu.Unlock() - if w.f == nil { - return fmt.Errorf("wal: closed") - } - - // rowCount = 2: [oldRow, newRow] - if err := w.writeRecordHeader(txID, walRecUpdate, table, 2); err != nil { - return err - } - if err := writeRow(w.f, oldRow); err != nil { - return fmt.Errorf("wal: write old row in update: %w", err) - } - if err := writeRow(w.f, newRow); err != nil { - return fmt.Errorf("wal: write new row in update: %w", err) - } - return nil -} diff --git a/internal/storage/filestore/wal_test.go b/internal/storage/filestore/wal_test.go deleted file mode 100644 index 979d53f..0000000 --- a/internal/storage/filestore/wal_test.go +++ /dev/null @@ -1,86 +0,0 @@ -package filestore - -import ( - "encoding/binary" - "goDB/internal/sql" - "io" - "os" - "path/filepath" - "testing" -) - -func TestFilestore_WAL_IsWritten(t *testing.T) { - dir := t.TempDir() - - fs, err := New(dir) - if err != nil { - t.Fatalf("New failed: %v", err) - } - - cols := []sql.Column{ - {Name: "id", Type: sql.TypeInt}, - } - if err := fs.CreateTable("t", cols); err != nil { - t.Fatalf("CreateTable failed: %v", err) - } - - tx, err := fs.Begin(false) - if err != nil { - t.Fatalf("Begin failed: %v", err) - } - - row := sql.Row{{Type: sql.TypeInt, I64: 42}} - if err := tx.Insert("t", row); err != nil { - t.Fatalf("Insert failed: %v", err) - } - - if err := fs.Commit(tx); err != nil { - t.Fatalf("Commit failed: %v", err) - } - - // Check WAL file exists and is non-empty - walPath := filepath.Join(dir, "wal.log") - info, err := os.Stat(walPath) - if err != nil { - t.Fatalf("wal.log not found: %v", err) - } - if info.Size() <= int64(len("GODBWAL1")) { - t.Fatalf("wal.log too small, no records? size=%d", info.Size()) - } -} -func TestFilestore_WAL_BeginCommit(t *testing.T) { - dir := t.TempDir() - - fs, err := New(dir) - if err != nil { - t.Fatalf("New failed: %v", err) - } - - cols := []sql.Column{{Name: "id", Type: sql.TypeInt}} - if err := fs.CreateTable("t", cols); err != nil { - t.Fatalf("CreateTable failed: %v", err) - } - - tx, _ := fs.Begin(false) - _ = fs.Commit(tx) - - walPath := filepath.Join(dir, "wal.log") - f, err := os.Open(walPath) - if err != nil { - t.Fatalf("open wal: %v", err) - } - defer f.Close() - - // skip magic - if _, err := f.Seek(int64(len("GODBWAL2")), io.SeekStart); err != nil { - t.Fatalf("seek: %v", err) - } - - var recType uint8 - if err := binary.Read(f, binary.LittleEndian, &recType); err != nil { - t.Fatalf("read recType: %v", err) - } - if recType != 1 { // BEGIN - t.Fatalf("expected first record to be BEGIN (1), got %d", recType) - } -} diff --git a/internal/storage/memstore/memstore.go b/internal/storage/memstore/memstore.go index 6919f94..3f37f74 100644 --- a/internal/storage/memstore/memstore.go +++ b/internal/storage/memstore/memstore.go @@ -2,11 +2,9 @@ package memstore import ( "fmt" - "goDB/internal/index/btree" "goDB/internal/sql" "goDB/internal/storage" "sort" - "strings" "sync" ) @@ -16,18 +14,9 @@ type table struct { rows []sql.Row // stored rows } -type index struct { - name string - tableName string - columnName string - btree btree.Index -} - type memEngine struct { - mu sync.RWMutex - tables map[string]*table - indexes map[string]*index - idxMan *btree.Manager + mu sync.RWMutex + tables map[string]*table } // New creates a new in-memory storage engine with the default data directory. @@ -37,73 +26,10 @@ func New() storage.Engine { // NewWithDir creates a new in-memory storage engine with the given data directory. func NewWithDir(dir string) storage.Engine { + _ = dir // kept for API compatibility; unused in pure in-memory mode return &memEngine{ - tables: make(map[string]*table), - indexes: make(map[string]*index), - idxMan: btree.NewManager(dir), - } -} - -func (e *memEngine) CreateIndex(indexName, tableName, columnName string) error { - e.mu.Lock() - defer e.mu.Unlock() - - if _, exists := e.indexes[indexName]; exists { - return fmt.Errorf("index %q already exists", indexName) - } - - for _, idx := range e.indexes { - if strings.EqualFold(idx.tableName, tableName) && strings.EqualFold(idx.columnName, columnName) { - return fmt.Errorf("index on %s.%s already exists", tableName, columnName) - } - } - - tbl, ok := e.tables[tableName] - if !ok { - return fmt.Errorf("table %q not found", tableName) - } - - colIdx := -1 - for i, col := range tbl.cols { - if strings.EqualFold(col.Name, columnName) { - colIdx = i - break - } - } - - if colIdx == -1 { - return fmt.Errorf("column %q not found in table %q", columnName, tableName) - } - - if tbl.cols[colIdx].Type != sql.TypeInt { - return fmt.Errorf("cannot create index on non-integer column %q", columnName) - } - - bt, err := e.idxMan.OpenOrCreateIndex(tableName, columnName) - if err != nil { - return fmt.Errorf("could not create index: %w", err) - } - - // Populate the index with existing data. - for i, row := range tbl.rows { - val := row[colIdx] - if val.Type == sql.TypeNull { - continue - } - rid := btree.RID{PageID: 0, SlotID: uint16(i)} - if err := bt.Insert(val.I64, rid); err != nil { - return fmt.Errorf("error building index: %w", err) - } - } - - e.indexes[indexName] = &index{ - name: indexName, - tableName: tableName, - columnName: columnName, - btree: bt, + tables: make(map[string]*table), } - - return nil } func (e *memEngine) ListTables() ([]string, error) { @@ -301,12 +227,7 @@ func (e *memEngine) Commit(tx storage.Tx) error { e.mu.Lock() defer e.mu.Unlock() - oldTables := e.tables e.tables = m.tables - - if err := e.rebuildIndexes(oldTables); err != nil { - return err - } return nil } @@ -316,55 +237,6 @@ func (e *memEngine) Rollback(tx storage.Tx) error { return nil } -func (e *memEngine) rebuildIndexes(oldTables map[string]*table) error { - for _, idx := range e.indexes { - newTbl, ok := e.tables[idx.tableName] - if !ok { - continue - } - - colIdx := -1 - for i, col := range newTbl.cols { - if strings.EqualFold(col.Name, idx.columnName) { - colIdx = i - break - } - } - if colIdx == -1 { - return fmt.Errorf("index %q references unknown column %q", idx.name, idx.columnName) - } - - keysToDelete := make(map[btree.Key]struct{}) - if oldTbl, ok := oldTables[idx.tableName]; ok { - for _, row := range oldTbl.rows { - val := row[colIdx] - if val.Type != sql.TypeNull { - keysToDelete[val.I64] = struct{}{} - } - } - } - - for key := range keysToDelete { - if err := idx.btree.DeleteKey(key); err != nil { - return fmt.Errorf("error clearing index %q: %w", idx.name, err) - } - } - - for slot, row := range newTbl.rows { - val := row[colIdx] - if val.Type == sql.TypeNull { - continue - } - rid := btree.RID{PageID: 0, SlotID: uint16(slot)} - if err := idx.btree.Insert(val.I64, rid); err != nil { - return fmt.Errorf("error rebuilding index %q: %w", idx.name, err) - } - } - } - - return nil -} - // Insert adds a row into a table inside this transaction. func (tx *memTx) Insert(tableName string, row sql.Row) error { if tx.readOnly { diff --git a/internal/storage/memstore/memstore_test.go b/internal/storage/memstore/memstore_test.go index 1609272..6cba8db 100644 --- a/internal/storage/memstore/memstore_test.go +++ b/internal/storage/memstore/memstore_test.go @@ -2,7 +2,6 @@ package memstore import ( "goDB/internal/sql" - "os" "testing" ) @@ -101,78 +100,3 @@ func TestMemstoreCreateInsertScan(t *testing.T) { checkRow(rows[0], 1, "Alice", true) checkRow(rows[1], 2, "Bob", false) } - -func TestMemstoreCreateIndex(t *testing.T) { - // Create a temporary directory for the test. - tempDir, err := os.MkdirTemp("", "godb_test_") - if err != nil { - t.Fatalf("could not create temp dir: %v", err) - } - defer os.RemoveAll(tempDir) - - store := NewWithDir(tempDir) - - // 1. Create table and insert data - _ = store.CreateTable("users", []sql.Column{{Name: "id", Type: sql.TypeInt}}) - tx, _ := store.Begin(false) - _ = tx.Insert("users", sql.Row{{Type: sql.TypeInt, I64: 10}}) - _ = tx.Insert("users", sql.Row{{Type: sql.TypeInt, I64: 20}}) - _ = store.Commit(tx) - - // 2. Create index - err = store.CreateIndex("idx_id", "users", "id") - if err != nil { - t.Fatalf("CreateIndex failed: %v", err) - } - - // 3. Verify index contents - memStore := store.(*memEngine) - idx, ok := memStore.indexes["idx_id"] - if !ok { - t.Fatalf("index not found in memstore") - } - - rids, err := idx.btree.Search(10) - if err != nil || len(rids) != 1 || rids[0].SlotID != 0 { - t.Fatalf("index search for key 10 failed") - } - - rids, err = idx.btree.Search(20) - if err != nil || len(rids) != 1 || rids[0].SlotID != 1 { - t.Fatalf("index search for key 20 failed") - } - - // 4. Insert a new row and check if the index is updated - tx, _ = store.Begin(false) - _ = tx.Insert("users", sql.Row{{Type: sql.TypeInt, I64: 30}}) - _ = store.Commit(tx) - - rids, err = idx.btree.Search(30) - if err != nil || len(rids) != 1 || rids[0].SlotID != 2 { - t.Fatalf("index search for key 30 failed after insert") - } -} - -func TestMemstoreCreateIndexErrors(t *testing.T) { - store := NewWithDir(t.TempDir()) - - // Create a table with mixed column types. - if err := store.CreateTable("users", []sql.Column{{Name: "id", Type: sql.TypeInt}, {Name: "name", Type: sql.TypeString}}); err != nil { - t.Fatalf("CreateTable failed: %v", err) - } - - // Building the index once should succeed. - if err := store.CreateIndex("idx_id", "users", "id"); err != nil { - t.Fatalf("CreateIndex failed: %v", err) - } - - // Creating another index on the same column should fail, even with a different name. - if err := store.CreateIndex("idx_id_dup", "users", "id"); err == nil { - t.Fatalf("expected duplicate index creation to fail") - } - - // Creating an index on a non-integer column should fail. - if err := store.CreateIndex("idx_name", "users", "name"); err == nil { - t.Fatalf("expected non-integer column index creation to fail") - } -} diff --git a/internal/storage/storage.go b/internal/storage/storage.go index 46701f7..6842d56 100644 --- a/internal/storage/storage.go +++ b/internal/storage/storage.go @@ -43,9 +43,6 @@ type Engine interface { // For now, we only support simple "name + list of columns". CreateTable(name string, cols []sql.Column) error - // CreateIndex creates a new index on a table's column. - CreateIndex(indexName, tableName, columnName string) error - // ListTables returns the names of all tables in the engine. ListTables() ([]string, error)