From f69a362b9ad1bb9bfbcde5b77b353dc4335d78c9 Mon Sep 17 00:00:00 2001 From: Arkadiy Kukarkin Date: Wed, 18 Feb 2026 18:21:44 +0100 Subject: [PATCH] fix piece type inference for non-inline preparations the migration heuristic only checked car_blocks.file_id to distinguish data vs dag pieces. non-inline preps don't store file refs in car_blocks (data is on disk), so all their pieces were misclassified as dag. add num_of_files > 0 as a signal, which is only set by the packer. --- model/migrate.go | 13 +++++----- model/migrate_test.go | 58 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/model/migrate.go b/model/migrate.go index db56440a..561c88ca 100644 --- a/model/migrate.go +++ b/model/migrate.go @@ -257,9 +257,11 @@ func fixPostgresSequences(db *gorm.DB) error { } // inferPieceTypes sets piece_type for cars that predate the column. -// A piece is "data" if any of its blocks reference files (contain file content). -// A piece is "dag" if none of its blocks reference files (directory metadata only). -// This is idempotent - only updates rows where piece_type is NULL or empty. +// for inline preps, a car is "data" if any of its car_blocks reference files. +// for non-inline preps, car_blocks don't reference files (data is on disk), +// so we fall back to num_of_files > 0 which is only set by the packer. +// everything else is "dag" (directory metadata only). +// idempotent - only updates rows where piece_type is NULL or empty. func inferPieceTypes(db *gorm.DB) error { dialect := db.Dialector.Name() @@ -283,15 +285,14 @@ func inferPieceTypes(db *gorm.DB) error { if dialect == "sqlite" { query = ` UPDATE cars SET piece_type = ( - CASE WHEN EXISTS ( + CASE WHEN num_of_files > 0 OR EXISTS ( SELECT 1 FROM car_blocks WHERE car_blocks.car_id = cars.id AND car_blocks.file_id IS NOT NULL ) THEN 'data' ELSE 'dag' END ) WHERE piece_type IS NULL OR piece_type = ''` } else { - // postgres/mysql support correlated subquery in CASE query = ` UPDATE cars c SET piece_type = CASE - WHEN EXISTS ( + WHEN c.num_of_files > 0 OR EXISTS ( SELECT 1 FROM car_blocks cb WHERE cb.car_id = c.id AND cb.file_id IS NOT NULL ) THEN 'data' ELSE 'dag' END WHERE c.piece_type IS NULL OR c.piece_type = ''` diff --git a/model/migrate_test.go b/model/migrate_test.go index bd9defef..62491bb5 100644 --- a/model/migrate_test.go +++ b/model/migrate_test.go @@ -76,3 +76,61 @@ func TestFKSetNullOnDelete(t *testing.T) { require.Nil(t, loadedCarBlock.FileID) }) } + +func TestInferPieceTypes(t *testing.T) { + testutil.All(t, func(ctx context.Context, t *testing.T, db *gorm.DB) { + prep := model.Preparation{Name: "test", MaxSize: 1024, PieceSize: 1024} + require.NoError(t, db.Create(&prep).Error) + + storage := model.Storage{Name: "test", Type: "local", Path: "/tmp"} + require.NoError(t, db.Create(&storage).Error) + + attachment := model.SourceAttachment{PreparationID: prep.ID, StorageID: storage.ID} + require.NoError(t, db.Create(&attachment).Error) + + file := model.File{Path: "test.txt", Size: 100, AttachmentID: &attachment.ID} + require.NoError(t, db.Create(&file).Error) + + // inline data piece: car_blocks reference files + inlineCar := model.Car{PieceSize: 1024, PreparationID: &prep.ID, AttachmentID: &attachment.ID} + require.NoError(t, db.Create(&inlineCar).Error) + cb := model.CarBlock{CarOffset: 0, CarID: &inlineCar.ID, FileID: &file.ID} + require.NoError(t, db.Create(&cb).Error) + + // non-inline data piece: no file refs in car_blocks, but num_of_files > 0 + nonInlineCar := model.Car{PieceSize: 1024, NumOfFiles: 5, PreparationID: &prep.ID, AttachmentID: &attachment.ID} + require.NoError(t, db.Create(&nonInlineCar).Error) + + // dag piece: no file refs, num_of_files == 0 + dagCar := model.Car{PieceSize: 1024, PreparationID: &prep.ID, AttachmentID: &attachment.ID} + require.NoError(t, db.Create(&dagCar).Error) + + // all should have empty piece_type + for _, id := range []model.CarID{inlineCar.ID, nonInlineCar.ID, dagCar.ID} { + var c model.Car + require.NoError(t, db.First(&c, id).Error) + require.Empty(t, c.PieceType) + } + + // run migration + require.NoError(t, model.AutoMigrate(db)) + + var c1, c2, c3 model.Car + + require.NoError(t, db.First(&c1, inlineCar.ID).Error) + require.Equal(t, model.DataPiece, c1.PieceType, "inline car with file refs should be data") + + require.NoError(t, db.First(&c2, nonInlineCar.ID).Error) + require.Equal(t, model.DataPiece, c2.PieceType, "non-inline car with num_of_files > 0 should be data") + + require.NoError(t, db.First(&c3, dagCar.ID).Error) + require.Equal(t, model.DagPiece, c3.PieceType, "car with no file refs and num_of_files == 0 should be dag") + + // idempotent: running again should not change anything + require.NoError(t, model.AutoMigrate(db)) + + var c4 model.Car + require.NoError(t, db.First(&c4, nonInlineCar.ID).Error) + require.Equal(t, model.DataPiece, c4.PieceType) + }) +}