From 510173c3a8353aca252bb0c049372e036cc65936 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 4 Dec 2025 14:00:40 +0200 Subject: [PATCH 001/101] Postgres: stream while snapshotting. --- .../src/api/PostgresRouteAPIAdapter.ts | 3 +- .../src/replication/PostgresSnapshotter.ts | 673 +++++++++++++++++ .../src/replication/WalStream.ts | 708 +++--------------- .../src/replication/replication-utils.ts | 65 +- .../module-postgres/test/src/pg_test.test.ts | 22 +- 5 files changed, 861 insertions(+), 610 deletions(-) create mode 100644 modules/module-postgres/src/replication/PostgresSnapshotter.ts diff --git a/modules/module-postgres/src/api/PostgresRouteAPIAdapter.ts b/modules/module-postgres/src/api/PostgresRouteAPIAdapter.ts index 6275676ba..9746df10c 100644 --- a/modules/module-postgres/src/api/PostgresRouteAPIAdapter.ts +++ b/modules/module-postgres/src/api/PostgresRouteAPIAdapter.ts @@ -314,7 +314,8 @@ LEFT JOIN ( AND NOT a.attisdropped AND has_column_privilege(tbl.quoted_name, a.attname, 'SELECT, INSERT, UPDATE, REFERENCES') ) -GROUP BY schemaname, tablename, quoted_name` +GROUP BY schemaname, tablename, quoted_name +ORDER BY schemaname, tablename;` ); await this.typeCache.fetchTypesForSchema(); const rows = pgwire.pgwireRows(results); diff --git a/modules/module-postgres/src/replication/PostgresSnapshotter.ts b/modules/module-postgres/src/replication/PostgresSnapshotter.ts new file mode 100644 index 000000000..bb446bda5 --- /dev/null +++ b/modules/module-postgres/src/replication/PostgresSnapshotter.ts @@ -0,0 +1,673 @@ +import { + container, + logger as defaultLogger, + Logger, + ReplicationAbortedError, + ReplicationAssertionError +} from '@powersync/lib-services-framework'; +import { + getUuidReplicaIdentityBson, + MetricsEngine, + RelationCache, + SourceEntityDescriptor, + SourceTable, + storage +} from '@powersync/service-core'; +import * as pgwire from '@powersync/service-jpgwire'; +import { + DatabaseInputRow, + SqliteInputRow, + SqliteRow, + SqlSyncRules, + TablePattern, + toSyncRulesRow +} from '@powersync/service-sync-rules'; + +import { ReplicationMetric } from '@powersync/service-types'; +import { PgManager } from './PgManager.js'; +import { + checkSourceConfiguration, + checkTableRls, + ensureStorageCompatibility, + getReplicationIdentityColumns +} from './replication-utils.js'; +import { + ChunkedSnapshotQuery, + IdSnapshotQuery, + PrimaryKeyValue, + SimpleSnapshotQuery, + SnapshotQuery +} from './SnapshotQuery.js'; +import { + MissingReplicationSlotError, + POSTGRES_DEFAULT_SCHEMA, + PUBLICATION_NAME, + sendKeepAlive, + WalStreamOptions, + ZERO_LSN +} from './WalStream.js'; +import * as timers from 'node:timers/promises'; +import pDefer, { DeferredPromise } from 'p-defer'; + +interface InitResult { + /** True if initial snapshot is not yet done. */ + needsInitialSync: boolean; + /** True if snapshot must be started from scratch with a new slot. */ + needsNewSlot: boolean; +} + +export class PostgresSnapshotter { + sync_rules: SqlSyncRules; + group_id: number; + + connection_id = 1; + + private logger: Logger; + + private readonly storage: storage.SyncRulesBucketStorage; + private readonly metrics: MetricsEngine; + private readonly slot_name: string; + + private connections: PgManager; + + private abortSignal: AbortSignal; + + private snapshotChunkLength: number; + + private relationCache = new RelationCache((relation: number | SourceTable) => { + if (typeof relation == 'number') { + return relation; + } + return relation.objectId!; + }); + + private queue = new Set(); + private initialSnapshotDone = pDefer(); + + constructor(options: WalStreamOptions) { + this.logger = options.logger ?? defaultLogger; + this.storage = options.storage; + this.metrics = options.metrics; + this.sync_rules = options.storage.getParsedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA }); + this.group_id = options.storage.group_id; + this.slot_name = options.storage.slot_name; + this.connections = options.connections; + this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000; + + this.abortSignal = options.abort_signal; + } + + async getQualifiedTableNames( + batch: storage.BucketStorageBatch, + db: pgwire.PgConnection, + tablePattern: TablePattern + ): Promise { + const schema = tablePattern.schema; + if (tablePattern.connectionTag != this.connections.connectionTag) { + return []; + } + + let tableRows: any[]; + const prefix = tablePattern.isWildcard ? tablePattern.tablePrefix : undefined; + + { + let query = ` + SELECT + c.oid AS relid, + c.relname AS table_name, + (SELECT + json_agg(DISTINCT a.atttypid) + FROM pg_attribute a + WHERE a.attnum > 0 AND NOT a.attisdropped AND a.attrelid = c.oid) + AS column_types + FROM pg_class c + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE n.nspname = $1 + AND c.relkind = 'r'`; + + if (tablePattern.isWildcard) { + query += ' AND c.relname LIKE $2'; + } else { + query += ' AND c.relname = $2'; + } + + const result = await db.query({ + statement: query, + params: [ + { type: 'varchar', value: schema }, + { type: 'varchar', value: tablePattern.tablePattern } + ] + }); + + tableRows = pgwire.pgwireRows(result); + } + + let result: storage.SourceTable[] = []; + + for (let row of tableRows) { + const name = row.table_name as string; + if (typeof row.relid != 'bigint') { + throw new ReplicationAssertionError(`Missing relid for ${name}`); + } + const relid = Number(row.relid as bigint); + + if (prefix && !name.startsWith(prefix)) { + continue; + } + + const rs = await db.query({ + statement: `SELECT 1 FROM pg_publication_tables WHERE pubname = $1 AND schemaname = $2 AND tablename = $3`, + params: [ + { type: 'varchar', value: PUBLICATION_NAME }, + { type: 'varchar', value: tablePattern.schema }, + { type: 'varchar', value: name } + ] + }); + if (rs.rows.length == 0) { + this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`); + continue; + } + + try { + const result = await checkTableRls(db, relid); + if (!result.canRead) { + // We log the message, then continue anyway, since the check does not cover all cases. + this.logger.warn(result.message!); + } + } catch (e) { + // It's possible that we just don't have permission to access pg_roles - log the error and continue. + this.logger.warn(`Could not check RLS access for ${tablePattern.schema}.${name}`, e); + } + + const cresult = await getReplicationIdentityColumns(db, relid); + + const columnTypes = (JSON.parse(row.column_types) as string[]).map((e) => Number(e)); + const table = await this.handleRelation({ + batch, + descriptor: { + name, + schema, + objectId: relid, + replicaIdColumns: cresult.replicationColumns + } as SourceEntityDescriptor, + referencedTypeIds: columnTypes + }); + + result.push(table); + } + return result; + } + + async checkSlot(): Promise { + await checkSourceConfiguration(this.connections.pool, PUBLICATION_NAME); + await ensureStorageCompatibility(this.connections.pool, this.storage.factory); + + const slotName = this.slot_name; + + const status = await this.storage.getStatus(); + const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null; + if (snapshotDone) { + // Snapshot is done, but we still need to check the replication slot status + this.logger.info(`Initial replication already done`); + } + + // Check if replication slot exists + const slot = pgwire.pgwireRows( + await this.connections.pool.query({ + // We specifically want wal_status and invalidation_reason, but it's not available on older versions, + // so we just query *. + statement: 'SELECT * FROM pg_replication_slots WHERE slot_name = $1', + params: [{ type: 'varchar', value: slotName }] + }) + )[0]; + + // Previously we also used pg_catalog.pg_logical_slot_peek_binary_changes to confirm that we can query the slot. + // However, there were some edge cases where the query times out, repeating the query, ultimately + // causing high load on the source database and never recovering automatically. + // We now instead jump straight to replication if the wal_status is not "lost", rather detecting those + // errors during streaming replication, which is a little more robust. + + // We can have: + // 1. needsInitialSync: true, lost slot -> MissingReplicationSlotError (starts new sync rules version). + // Theoretically we could handle this the same as (2). + // 2. needsInitialSync: true, no slot -> create new slot + // 3. needsInitialSync: true, valid slot -> resume initial sync + // 4. needsInitialSync: false, lost slot -> MissingReplicationSlotError (starts new sync rules version) + // 5. needsInitialSync: false, no slot -> MissingReplicationSlotError (starts new sync rules version) + // 6. needsInitialSync: false, valid slot -> resume streaming replication + // The main advantage of MissingReplicationSlotError are: + // 1. If there was a complete snapshot already (cases 4/5), users can still sync from that snapshot while + // we do the reprocessing under a new slot name. + // 2. If there was a partial snapshot (case 1), we can start with the new slot faster by not waiting for + // the partial data to be cleared. + if (slot != null) { + // This checks that the slot is still valid + + // wal_status is present in postgres 13+ + // invalidation_reason is present in postgres 17+ + const lost = slot.wal_status == 'lost'; + if (lost) { + // Case 1 / 4 + throw new MissingReplicationSlotError( + `Replication slot ${slotName} is not valid anymore. invalidation_reason: ${slot.invalidation_reason ?? 'unknown'}` + ); + } + // Case 3 / 6 + return { + needsInitialSync: !snapshotDone, + needsNewSlot: false + }; + } else { + if (snapshotDone) { + // Case 5 + // This will create a new slot, while keeping the current sync rules active + throw new MissingReplicationSlotError(`Replication slot ${slotName} is missing`); + } + // Case 2 + // This will clear data (if any) and re-create the same slot + return { needsInitialSync: true, needsNewSlot: true }; + } + } + + async estimatedCountNumber(db: pgwire.PgConnection, table: storage.SourceTable): Promise { + const results = await db.query({ + statement: `SELECT reltuples::bigint AS estimate + FROM pg_class + WHERE oid = $1::regclass`, + params: [{ value: table.qualifiedName, type: 'varchar' }] + }); + const row = results.rows[0]; + if ((row?.[0] ?? -1n) == -1n) { + return -1; + } else { + return Number(row[0]); + } + } + + public async setupSlot(db: pgwire.PgConnection, status: InitResult) { + // If anything here errors, the entire replication process is aborted, + // and all connections are closed, including this one. + const slotName = this.slot_name; + + if (status.needsNewSlot) { + // This happens when there is no existing replication slot, or if the + // existing one is unhealthy. + // In those cases, we have to start replication from scratch. + // If there is an existing healthy slot, we can skip this and continue + // initial replication where we left off. + await this.storage.clear({ signal: this.abortSignal }); + + await db.query({ + statement: 'SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name = $1', + params: [{ type: 'varchar', value: slotName }] + }); + + // We use the replication connection here, not a pool. + // The replication slot must be created before we start snapshotting tables. + const initReplicationConnection = await this.connections.replicationConnection(); + try { + await initReplicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`); + } finally { + await initReplicationConnection.end(); + } + + this.logger.info(`Created replication slot ${slotName}`); + } + } + + async replicateTable(table: SourceTable) { + const db = await this.connections.snapshotConnection(); + try { + const flushResults = await this.storage.startBatch( + { + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: true + }, + async (batch) => { + await this.snapshotTableInTx(batch, db, table); + // This commit ensures we set keepalive_op. + // It may be better if that is automatically set when flushing. + await batch.commit(ZERO_LSN); + } + ); + this.logger.info(`Flushed snapshot at ${flushResults?.flushed_op}`); + } finally { + await db.end(); + } + } + + async waitForInitialSnapshot() { + await this.initialSnapshotDone.promise; + } + + async replicationLoop() { + try { + if (this.queue.size == 0) { + // Special case where we start with no tables to snapshot + await this.markSnapshotDone(); + } + while (!this.abortSignal.aborted) { + const table = this.queue.values().next().value; + if (table == null) { + this.initialSnapshotDone.resolve(); + await timers.setTimeout(500, { signal: this.abortSignal }); + continue; + } + + await this.replicateTable(table); + this.queue.delete(table); + if (this.queue.size == 0) { + await this.markSnapshotDone(); + } + } + throw new ReplicationAbortedError(`Replication loop aborted`, this.abortSignal.reason); + } catch (e) { + // If initial snapshot already completed, this has no effect + this.initialSnapshotDone.reject(e); + throw e; + } + } + + private async markSnapshotDone() { + const db = await this.connections.snapshotConnection(); + await using _ = { [Symbol.asyncDispose]: () => db.end() }; + + const flushResults = await this.storage.startBatch( + { + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: true + }, + async (batch) => { + const rs = await db.query(`select pg_current_wal_lsn() as lsn`); + const globalLsnNotBefore = rs.rows[0][0]; + await batch.markAllSnapshotDone(globalLsnNotBefore); + } + ); + /** + * Send a keepalive message after initial replication. + * In some edge cases we wait for a keepalive after the initial snapshot. + * If we don't explicitly check the contents of keepalive messages then a keepalive is detected + * rather quickly after initial replication - perhaps due to other WAL events. + * If we do explicitly check the contents of messages, we need an actual keepalive payload in order + * to advance the active sync rules LSN. + */ + await sendKeepAlive(db); + + const lastOp = flushResults?.flushed_op; + if (lastOp != null) { + // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules. + // TODO: only run this after initial replication, not after each table. + await this.storage.populatePersistentChecksumCache({ + // No checkpoint yet, but we do have the opId. + maxOpId: lastOp, + signal: this.abortSignal + }); + } + } + + /** + * Start initial replication. + * + * If (partial) replication was done before on this slot, this clears the state + * and starts again from scratch. + */ + async queueSnapshotTables(db: pgwire.PgConnection) { + const sourceTables = this.sync_rules.getSourceTables(); + + await this.storage.startBatch( + { + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: true + }, + async (batch) => { + for (let tablePattern of sourceTables) { + const tables = await this.getQualifiedTableNames(batch, db, tablePattern); + // Pre-get counts + for (let table of tables) { + if (table.snapshotComplete) { + this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); + continue; + } + const count = await this.estimatedCountNumber(db, table); + table = await batch.updateTableProgress(table, { totalEstimatedCount: count }); + this.relationCache.update(table); + + this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`); + + this.queue.add(table); + } + } + } + ); + } + + static *getQueryData(results: Iterable): Generator { + for (let row of results) { + yield toSyncRulesRow(row); + } + } + + public async queueSnapshot(batch: storage.BucketStorageBatch, table: storage.SourceTable) { + await batch.markTableSnapshotRequired(table); + this.queue.add(table); + } + + public async snapshotTableInTx( + batch: storage.BucketStorageBatch, + db: pgwire.PgConnection, + table: storage.SourceTable, + limited?: PrimaryKeyValue[] + ): Promise { + // Note: We use the default "Read Committed" isolation level here, not snapshot isolation. + // The data may change during the transaction, but that is compensated for in the streaming + // replication afterwards. + await db.query('BEGIN'); + try { + let tableLsnNotBefore: string; + await this.snapshotTable(batch, db, table, limited); + + // Get the current LSN. + // The data will only be consistent once incremental replication has passed that point. + // We have to get this LSN _after_ we have finished the table snapshot. + // + // There are basically two relevant LSNs here: + // A: The LSN before the snapshot starts. We don't explicitly record this on the PowerSync side, + // but it is implicitly recorded in the replication slot. + // B: The LSN after the table snapshot is complete, which is what we get here. + // When we do the snapshot queries, the data that we get back for each chunk could match the state + // anywhere between A and B. To actually have a consistent state on our side, we need to: + // 1. Complete the snapshot. + // 2. Wait until logical replication has caught up with all the change between A and B. + // Calling `markSnapshotDone(LSN B)` covers that. + const rs = await db.query(`select pg_current_wal_lsn() as lsn`); + tableLsnNotBefore = rs.rows[0][0]; + // Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction. + await db.query('COMMIT'); + this.logger.info(`Snapshot complete for table ${table.qualifiedName}, resume at ${tableLsnNotBefore}`); + const [resultTable] = await batch.markTableSnapshotDone([table], tableLsnNotBefore); + this.relationCache.update(resultTable); + return resultTable; + } catch (e) { + await db.query('ROLLBACK'); + throw e; + } + } + + private async snapshotTable( + batch: storage.BucketStorageBatch, + db: pgwire.PgConnection, + table: storage.SourceTable, + limited?: PrimaryKeyValue[] + ) { + let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount; + let at = table.snapshotStatus?.replicatedCount ?? 0; + let lastCountTime = 0; + let q: SnapshotQuery; + // We do streaming on two levels: + // 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time. + // 2. Fine level: Stream chunks from each fetch call. + if (limited) { + q = new IdSnapshotQuery(db, table, limited); + } else if (ChunkedSnapshotQuery.supports(table)) { + // Single primary key - we can use the primary key for chunking + const orderByKey = table.replicaIdColumns[0]; + q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null); + if (table.snapshotStatus?.lastKey != null) { + this.logger.info( + `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${(q as ChunkedSnapshotQuery).lastKey}` + ); + } else { + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`); + } + } else { + // Fallback case - query the entire table + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`); + q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength); + at = 0; + } + await q.initialize(); + + let columns: { i: number; name: string }[] = []; + let columnMap: Record = {}; + let hasRemainingData = true; + while (hasRemainingData) { + // Fetch 10k at a time. + // The balance here is between latency overhead per FETCH call, + // and not spending too much time on each FETCH call. + // We aim for a couple of seconds on each FETCH call. + const cursor = q.nextChunk(); + hasRemainingData = false; + // pgwire streams rows in chunks. + // These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically. + // There are typically 100-200 rows per chunk. + for await (let chunk of cursor) { + if (chunk.tag == 'RowDescription') { + // We get a RowDescription for each FETCH call, but they should + // all be the same. + let i = 0; + columns = chunk.payload.map((c) => { + return { i: i++, name: c.name }; + }); + for (let column of chunk.payload) { + columnMap[column.name] = column.typeOid; + } + continue; + } + + const rows = chunk.rows.map((row) => { + let q: DatabaseInputRow = {}; + for (let c of columns) { + q[c.name] = row[c.i]; + } + return q; + }); + if (rows.length > 0) { + hasRemainingData = true; + } + + for (const inputRecord of PostgresSnapshotter.getQueryData(rows)) { + const record = this.syncRulesRecord(this.connections.types.constructRowRecord(columnMap, inputRecord)); + // This auto-flushes when the batch reaches its size limit + await batch.save({ + tag: storage.SaveOperationTag.INSERT, + sourceTable: table, + before: undefined, + beforeReplicaId: undefined, + after: record, + afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns) + }); + } + + at += rows.length; + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length); + + this.touch(); + } + + // Important: flush before marking progress + await batch.flush(); + if (limited == null) { + let lastKey: Uint8Array | undefined; + if (q instanceof ChunkedSnapshotQuery) { + lastKey = q.getLastKeySerialized(); + } + if (lastCountTime < performance.now() - 10 * 60 * 1000) { + // Even though we're doing the snapshot inside a transaction, the transaction uses + // the default "Read Committed" isolation level. This means we can get new data + // within the transaction, so we re-estimate the count every 10 minutes when replicating + // large tables. + totalEstimatedCount = await this.estimatedCountNumber(db, table); + lastCountTime = performance.now(); + } + table = await batch.updateTableProgress(table, { + lastKey: lastKey, + replicatedCount: at, + totalEstimatedCount: totalEstimatedCount + }); + this.relationCache.update(table); + + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`); + } else { + this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`); + } + + if (this.abortSignal.aborted) { + // We only abort after flushing + throw new ReplicationAbortedError(`Table snapshot interrupted`, this.abortSignal.reason); + } + } + } + + async handleRelation(options: { + batch: storage.BucketStorageBatch; + descriptor: SourceEntityDescriptor; + referencedTypeIds: number[]; + }) { + const { batch, descriptor, referencedTypeIds } = options; + + if (!descriptor.objectId && typeof descriptor.objectId != 'number') { + throw new ReplicationAssertionError(`objectId expected, got ${typeof descriptor.objectId}`); + } + const result = await this.storage.resolveTable({ + group_id: this.group_id, + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: descriptor, + sync_rules: this.sync_rules + }); + this.relationCache.update(result.table); + + // Drop conflicting tables. This includes for example renamed tables. + await batch.drop(result.dropTables); + + // Ensure we have a description for custom types referenced in the table. + await this.connections.types.fetchTypes(referencedTypeIds); + + return result.table; + } + + private touch() { + container.probes.touch().catch((e) => { + this.logger.error(`Error touching probe`, e); + }); + } + + private syncRulesRecord(row: SqliteInputRow): SqliteRow; + private syncRulesRecord(row: SqliteInputRow | undefined): SqliteRow | undefined; + + private syncRulesRecord(row: SqliteInputRow | undefined): SqliteRow | undefined { + if (row == null) { + return undefined; + } + return this.sync_rules.applyRowContext(row); + } +} diff --git a/modules/module-postgres/src/replication/WalStream.ts b/modules/module-postgres/src/replication/WalStream.ts index 84da6200b..9e3426d6a 100644 --- a/modules/module-postgres/src/replication/WalStream.ts +++ b/modules/module-postgres/src/replication/WalStream.ts @@ -1,9 +1,7 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; import { container, - DatabaseConnectionError, logger as defaultLogger, - ErrorCode, Logger, ReplicationAbortedError, ReplicationAssertionError @@ -22,28 +20,19 @@ import * as pgwire from '@powersync/service-jpgwire'; import { applyValueContext, CompatibilityContext, - DatabaseInputRow, SqliteInputRow, SqliteInputValue, SqliteRow, SqlSyncRules, - TablePattern, - ToastableSqliteRow, - toSyncRulesRow + ToastableSqliteRow } from '@powersync/service-sync-rules'; import { ReplicationMetric } from '@powersync/service-types'; import { PgManager } from './PgManager.js'; import { getPgOutputRelation, getRelId, referencedColumnTypeIds } from './PgRelation.js'; -import { checkSourceConfiguration, checkTableRls, getReplicationIdentityColumns } from './replication-utils.js'; -import { - ChunkedSnapshotQuery, - IdSnapshotQuery, - MissingRow, - PrimaryKeyValue, - SimpleSnapshotQuery, - SnapshotQuery -} from './SnapshotQuery.js'; +import { PostgresSnapshotter } from './PostgresSnapshotter.js'; +import { ensureStorageCompatibility } from './replication-utils.js'; +import { IdSnapshotQuery, MissingRow, PrimaryKeyValue } from './SnapshotQuery.js'; export interface WalStreamOptions { logger?: Logger; @@ -62,13 +51,6 @@ export interface WalStreamOptions { snapshotChunkLength?: number; } -interface InitResult { - /** True if initial snapshot is not yet done. */ - needsInitialSync: boolean; - /** True if snapshot must be started from scratch with a new slot. */ - needsNewSlot: boolean; -} - export const ZERO_LSN = '00000000/00000000'; export const PUBLICATION_NAME = 'powersync'; export const POSTGRES_DEFAULT_SCHEMA = 'public'; @@ -120,7 +102,11 @@ export class WalStream { private connections: PgManager; - private abort_signal: AbortSignal; + private abortController = new AbortController(); + private abortSignal: AbortSignal = this.abortController.signal; + + private initPromise: Promise | null = null; + private snapshotter: PostgresSnapshotter; private relationCache = new RelationCache((relation: number | SourceTable) => { if (typeof relation == 'number') { @@ -131,8 +117,6 @@ export class WalStream { private startedStreaming = false; - private snapshotChunkLength: number; - /** * Time of the oldest uncommitted change, according to the source db. * This is used to determine the replication lag. @@ -144,9 +128,7 @@ export class WalStream { */ private isStartingReplication = true; - private initialSnapshotPromise: Promise | null = null; - - constructor(options: WalStreamOptions) { + constructor(private options: WalStreamOptions) { this.logger = options.logger ?? defaultLogger; this.storage = options.storage; this.metrics = options.metrics; @@ -154,10 +136,17 @@ export class WalStream { this.group_id = options.storage.group_id; this.slot_name = options.storage.slot_name; this.connections = options.connections; - this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000; - this.abort_signal = options.abort_signal; - this.abort_signal.addEventListener( + // We wrap in our own abort controller so we can trigger abort internally. + options.abort_signal.addEventListener('abort', () => { + this.abortController.abort(options.abort_signal.reason); + }); + if (options.abort_signal.aborted) { + this.abortController.abort(options.abort_signal.reason); + } + + this.snapshotter = new PostgresSnapshotter({ ...options, abort_signal: this.abortSignal }); + this.abortSignal.addEventListener( 'abort', () => { if (this.startedStreaming) { @@ -179,470 +168,7 @@ export class WalStream { } get stopped() { - return this.abort_signal.aborted; - } - - async getQualifiedTableNames( - batch: storage.BucketStorageBatch, - db: pgwire.PgConnection, - tablePattern: TablePattern - ): Promise { - const schema = tablePattern.schema; - if (tablePattern.connectionTag != this.connections.connectionTag) { - return []; - } - - let tableRows: any[]; - const prefix = tablePattern.isWildcard ? tablePattern.tablePrefix : undefined; - - { - let query = ` - SELECT - c.oid AS relid, - c.relname AS table_name, - (SELECT - json_agg(DISTINCT a.atttypid) - FROM pg_attribute a - WHERE a.attnum > 0 AND NOT a.attisdropped AND a.attrelid = c.oid) - AS column_types - FROM pg_class c - JOIN pg_namespace n ON n.oid = c.relnamespace - WHERE n.nspname = $1 - AND c.relkind = 'r'`; - - if (tablePattern.isWildcard) { - query += ' AND c.relname LIKE $2'; - } else { - query += ' AND c.relname = $2'; - } - - const result = await db.query({ - statement: query, - params: [ - { type: 'varchar', value: schema }, - { type: 'varchar', value: tablePattern.tablePattern } - ] - }); - - tableRows = pgwire.pgwireRows(result); - } - - let result: storage.SourceTable[] = []; - - for (let row of tableRows) { - const name = row.table_name as string; - if (typeof row.relid != 'bigint') { - throw new ReplicationAssertionError(`Missing relid for ${name}`); - } - const relid = Number(row.relid as bigint); - - if (prefix && !name.startsWith(prefix)) { - continue; - } - - const rs = await db.query({ - statement: `SELECT 1 FROM pg_publication_tables WHERE pubname = $1 AND schemaname = $2 AND tablename = $3`, - params: [ - { type: 'varchar', value: PUBLICATION_NAME }, - { type: 'varchar', value: tablePattern.schema }, - { type: 'varchar', value: name } - ] - }); - if (rs.rows.length == 0) { - this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`); - continue; - } - - try { - const result = await checkTableRls(db, relid); - if (!result.canRead) { - // We log the message, then continue anyway, since the check does not cover all cases. - this.logger.warn(result.message!); - } - } catch (e) { - // It's possible that we just don't have permission to access pg_roles - log the error and continue. - this.logger.warn(`Could not check RLS access for ${tablePattern.schema}.${name}`, e); - } - - const cresult = await getReplicationIdentityColumns(db, relid); - - const columnTypes = (JSON.parse(row.column_types) as string[]).map((e) => Number(e)); - const table = await this.handleRelation({ - batch, - descriptor: { - name, - schema, - objectId: relid, - replicaIdColumns: cresult.replicationColumns - } as SourceEntityDescriptor, - snapshot: false, - referencedTypeIds: columnTypes - }); - - result.push(table); - } - return result; - } - - async initSlot(): Promise { - await checkSourceConfiguration(this.connections.pool, PUBLICATION_NAME); - await this.ensureStorageCompatibility(); - - const slotName = this.slot_name; - - const status = await this.storage.getStatus(); - const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null; - if (snapshotDone) { - // Snapshot is done, but we still need to check the replication slot status - this.logger.info(`Initial replication already done`); - } - - // Check if replication slot exists - const slot = pgwire.pgwireRows( - await this.connections.pool.query({ - // We specifically want wal_status and invalidation_reason, but it's not available on older versions, - // so we just query *. - statement: 'SELECT * FROM pg_replication_slots WHERE slot_name = $1', - params: [{ type: 'varchar', value: slotName }] - }) - )[0]; - - // Previously we also used pg_catalog.pg_logical_slot_peek_binary_changes to confirm that we can query the slot. - // However, there were some edge cases where the query times out, repeating the query, ultimately - // causing high load on the source database and never recovering automatically. - // We now instead jump straight to replication if the wal_status is not "lost", rather detecting those - // errors during streaming replication, which is a little more robust. - - // We can have: - // 1. needsInitialSync: true, lost slot -> MissingReplicationSlotError (starts new sync rules version). - // Theoretically we could handle this the same as (2). - // 2. needsInitialSync: true, no slot -> create new slot - // 3. needsInitialSync: true, valid slot -> resume initial sync - // 4. needsInitialSync: false, lost slot -> MissingReplicationSlotError (starts new sync rules version) - // 5. needsInitialSync: false, no slot -> MissingReplicationSlotError (starts new sync rules version) - // 6. needsInitialSync: false, valid slot -> resume streaming replication - // The main advantage of MissingReplicationSlotError are: - // 1. If there was a complete snapshot already (cases 4/5), users can still sync from that snapshot while - // we do the reprocessing under a new slot name. - // 2. If there was a partial snapshot (case 1), we can start with the new slot faster by not waiting for - // the partial data to be cleared. - if (slot != null) { - // This checks that the slot is still valid - - // wal_status is present in postgres 13+ - // invalidation_reason is present in postgres 17+ - const lost = slot.wal_status == 'lost'; - if (lost) { - // Case 1 / 4 - throw new MissingReplicationSlotError( - `Replication slot ${slotName} is not valid anymore. invalidation_reason: ${slot.invalidation_reason ?? 'unknown'}` - ); - } - // Case 3 / 6 - return { - needsInitialSync: !snapshotDone, - needsNewSlot: false - }; - } else { - if (snapshotDone) { - // Case 5 - // This will create a new slot, while keeping the current sync rules active - throw new MissingReplicationSlotError(`Replication slot ${slotName} is missing`); - } - // Case 2 - // This will clear data (if any) and re-create the same slot - return { needsInitialSync: true, needsNewSlot: true }; - } - } - - async estimatedCountNumber(db: pgwire.PgConnection, table: storage.SourceTable): Promise { - const results = await db.query({ - statement: `SELECT reltuples::bigint AS estimate -FROM pg_class -WHERE oid = $1::regclass`, - params: [{ value: table.qualifiedName, type: 'varchar' }] - }); - const row = results.rows[0]; - if ((row?.[0] ?? -1n) == -1n) { - return -1; - } else { - return Number(row[0]); - } - } - - /** - * Start initial replication. - * - * If (partial) replication was done before on this slot, this clears the state - * and starts again from scratch. - */ - async startInitialReplication(replicationConnection: pgwire.PgConnection, status: InitResult) { - // If anything here errors, the entire replication process is aborted, - // and all connections are closed, including this one. - const db = await this.connections.snapshotConnection(); - - const slotName = this.slot_name; - - if (status.needsNewSlot) { - // This happens when there is no existing replication slot, or if the - // existing one is unhealthy. - // In those cases, we have to start replication from scratch. - // If there is an existing healthy slot, we can skip this and continue - // initial replication where we left off. - await this.storage.clear({ signal: this.abort_signal }); - - await db.query({ - statement: 'SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name = $1', - params: [{ type: 'varchar', value: slotName }] - }); - - // We use the replication connection here, not a pool. - // The replication slot must be created before we start snapshotting tables. - await replicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`); - - this.logger.info(`Created replication slot ${slotName}`); - } - - await this.initialReplication(db); - } - - async initialReplication(db: pgwire.PgConnection) { - const sourceTables = this.sync_rules.getSourceTables(); - const flushResults = await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: ZERO_LSN, - defaultSchema: POSTGRES_DEFAULT_SCHEMA, - storeCurrentData: true, - skipExistingRows: true - }, - async (batch) => { - let tablesWithStatus: SourceTable[] = []; - for (let tablePattern of sourceTables) { - const tables = await this.getQualifiedTableNames(batch, db, tablePattern); - // Pre-get counts - for (let table of tables) { - if (table.snapshotComplete) { - this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); - continue; - } - const count = await this.estimatedCountNumber(db, table); - table = await batch.updateTableProgress(table, { totalEstimatedCount: count }); - this.relationCache.update(table); - tablesWithStatus.push(table); - - this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`); - } - } - - for (let table of tablesWithStatus) { - await this.snapshotTableInTx(batch, db, table); - this.touch(); - } - - // Always commit the initial snapshot at zero. - // This makes sure we don't skip any changes applied before starting this snapshot, - // in the case of snapshot retries. - // We could alternatively commit at the replication slot LSN. - - // Get the current LSN for hte snapshot. - // We could also use the LSN from the last table snapshto. - const rs = await db.query(`select pg_current_wal_lsn() as lsn`); - const noCommitBefore = rs.rows[0][0]; - - await batch.markAllSnapshotDone(noCommitBefore); - await batch.commit(ZERO_LSN); - } - ); - /** - * Send a keepalive message after initial replication. - * In some edge cases we wait for a keepalive after the initial snapshot. - * If we don't explicitly check the contents of keepalive messages then a keepalive is detected - * rather quickly after initial replication - perhaps due to other WAL events. - * If we do explicitly check the contents of messages, we need an actual keepalive payload in order - * to advance the active sync rules LSN. - */ - await sendKeepAlive(db); - - const lastOp = flushResults?.flushed_op; - if (lastOp != null) { - // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules. - await this.storage.populatePersistentChecksumCache({ - // No checkpoint yet, but we do have the opId. - maxOpId: lastOp, - signal: this.abort_signal - }); - } - } - - static *getQueryData(results: Iterable): Generator { - for (let row of results) { - yield toSyncRulesRow(row); - } - } - private async snapshotTableInTx( - batch: storage.BucketStorageBatch, - db: pgwire.PgConnection, - table: storage.SourceTable, - limited?: PrimaryKeyValue[] - ): Promise { - // Note: We use the default "Read Committed" isolation level here, not snapshot isolation. - // The data may change during the transaction, but that is compensated for in the streaming - // replication afterwards. - await db.query('BEGIN'); - try { - await this.snapshotTable(batch, db, table, limited); - - // Get the current LSN. - // The data will only be consistent once incremental replication has passed that point. - // We have to get this LSN _after_ we have finished the table snapshot. - // - // There are basically two relevant LSNs here: - // A: The LSN before the snapshot starts. We don't explicitly record this on the PowerSync side, - // but it is implicitly recorded in the replication slot. - // B: The LSN after the table snapshot is complete, which is what we get here. - // When we do the snapshot queries, the data that we get back for each chunk could match the state - // anywhere between A and B. To actually have a consistent state on our side, we need to: - // 1. Complete the snapshot. - // 2. Wait until logical replication has caught up with all the change between A and B. - // Calling `markSnapshotDone(LSN B)` covers that. - const rs = await db.query(`select pg_current_wal_lsn() as lsn`); - const tableLsnNotBefore = rs.rows[0][0]; - - // Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction. - await db.query('COMMIT'); - const [resultTable] = await batch.markTableSnapshotDone([table], tableLsnNotBefore); - this.relationCache.update(resultTable); - return resultTable; - } catch (e) { - await db.query('ROLLBACK'); - throw e; - } - } - - private async snapshotTable( - batch: storage.BucketStorageBatch, - db: pgwire.PgConnection, - table: storage.SourceTable, - limited?: PrimaryKeyValue[] - ) { - let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount; - let at = table.snapshotStatus?.replicatedCount ?? 0; - let lastCountTime = 0; - let q: SnapshotQuery; - // We do streaming on two levels: - // 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time. - // 2. Fine level: Stream chunks from each fetch call. - if (limited) { - q = new IdSnapshotQuery(db, table, limited); - } else if (ChunkedSnapshotQuery.supports(table)) { - // Single primary key - we can use the primary key for chunking - const orderByKey = table.replicaIdColumns[0]; - q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null); - if (table.snapshotStatus?.lastKey != null) { - this.logger.info( - `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${(q as ChunkedSnapshotQuery).lastKey}` - ); - } else { - this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`); - } - } else { - // Fallback case - query the entire table - this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`); - q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength); - at = 0; - } - await q.initialize(); - - let columns: { i: number; name: string }[] = []; - let columnMap: Record = {}; - let hasRemainingData = true; - while (hasRemainingData) { - // Fetch 10k at a time. - // The balance here is between latency overhead per FETCH call, - // and not spending too much time on each FETCH call. - // We aim for a couple of seconds on each FETCH call. - const cursor = q.nextChunk(); - hasRemainingData = false; - // pgwire streams rows in chunks. - // These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically. - // There are typically 100-200 rows per chunk. - for await (let chunk of cursor) { - if (chunk.tag == 'RowDescription') { - // We get a RowDescription for each FETCH call, but they should - // all be the same. - let i = 0; - columns = chunk.payload.map((c) => { - return { i: i++, name: c.name }; - }); - for (let column of chunk.payload) { - columnMap[column.name] = column.typeOid; - } - continue; - } - - const rows = chunk.rows.map((row) => { - let q: DatabaseInputRow = {}; - for (let c of columns) { - q[c.name] = row[c.i]; - } - return q; - }); - if (rows.length > 0) { - hasRemainingData = true; - } - - for (const inputRecord of WalStream.getQueryData(rows)) { - const record = this.syncRulesRecord(this.connections.types.constructRowRecord(columnMap, inputRecord)); - // This auto-flushes when the batch reaches its size limit - await batch.save({ - tag: storage.SaveOperationTag.INSERT, - sourceTable: table, - before: undefined, - beforeReplicaId: undefined, - after: record, - afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns) - }); - } - - at += rows.length; - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length); - - this.touch(); - } - - // Important: flush before marking progress - await batch.flush(); - if (limited == null) { - let lastKey: Uint8Array | undefined; - if (q instanceof ChunkedSnapshotQuery) { - lastKey = q.getLastKeySerialized(); - } - if (lastCountTime < performance.now() - 10 * 60 * 1000) { - // Even though we're doing the snapshot inside a transaction, the transaction uses - // the default "Read Committed" isolation level. This means we can get new data - // within the transaction, so we re-estimate the count every 10 minutes when replicating - // large tables. - totalEstimatedCount = await this.estimatedCountNumber(db, table); - lastCountTime = performance.now(); - } - table = await batch.updateTableProgress(table, { - lastKey: lastKey, - replicatedCount: at, - totalEstimatedCount: totalEstimatedCount - }); - this.relationCache.update(table); - - this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`); - } else { - this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`); - } - - if (this.abort_signal.aborted) { - // We only abort after flushing - throw new ReplicationAbortedError(`Initial replication interrupted`); - } - } + return this.abortSignal.aborted; } async handleRelation(options: { @@ -666,7 +192,10 @@ WHERE oid = $1::regclass`, this.relationCache.update(result.table); // Drop conflicting tables. This includes for example renamed tables. - await batch.drop(result.dropTables); + if (result.dropTables.length > 0) { + this.logger.info(`Dropping conflicting tables: ${result.dropTables.map((t) => t.qualifiedName).join(', ')}`); + await batch.drop(result.dropTables); + } // Ensure we have a description for custom types referenced in the table. await this.connections.types.fetchTypes(referencedTypeIds); @@ -678,22 +207,8 @@ WHERE oid = $1::regclass`, const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny; if (shouldSnapshot) { - // Truncate this table, in case a previous snapshot was interrupted. - await batch.truncate([result.table]); - - // Start the snapshot inside a transaction. - // We use a dedicated connection for this. - const db = await this.connections.snapshotConnection(); - try { - const table = await this.snapshotTableInTx(batch, db, result.table); - // After the table snapshot, we wait for replication to catch up. - // To make sure there is actually something to replicate, we send a keepalive - // message. - await sendKeepAlive(db); - return table; - } finally { - await db.end(); - } + this.logger.info(`Queuing snapshot for new table ${result.table.qualifiedName}`); + await this.snapshotter.queueSnapshot(batch, result.table); } return result.table; @@ -720,7 +235,7 @@ WHERE oid = $1::regclass`, try { for (let rows of byTable.values()) { const table = rows[0].table; - await this.snapshotTableInTx( + await this.snapshotter.snapshotTableInTx( batch, db, table, @@ -822,73 +337,116 @@ WHERE oid = $1::regclass`, return null; } + /** + * Start replication loop, and continue until aborted or error. + */ async replicate() { + let streamPromise: Promise | null = null; + let loopPromise: Promise | null = null; try { - // If anything errors here, the entire replication process is halted, and - // all connections automatically closed, including this one. - this.initialSnapshotPromise = (async () => { - const initReplicationConnection = await this.connections.replicationConnection(); - await this.initReplication(initReplicationConnection); - await initReplicationConnection.end(); - })(); - - await this.initialSnapshotPromise; - - // At this point, the above connection has often timed out, so we start a new one - const streamReplicationConnection = await this.connections.replicationConnection(); - await this.streamChanges(streamReplicationConnection); - await streamReplicationConnection.end(); + this.initPromise = this.initReplication(); + await this.initPromise; + // These Promises are both expected to run until aborted or error. + streamPromise = this.streamChanges() + .then(() => { + throw new ReplicationAssertionError(`Replication stream exited unexpectedly`); + }) + .catch((e) => { + this.abortController.abort(e); + throw e; + }); + loopPromise = this.snapshotter + .replicationLoop() + .then(() => { + throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`); + }) + .catch((e) => { + this.abortController.abort(e); + throw e; + }); + const results = await Promise.allSettled([loopPromise, streamPromise]); + // First, prioritize non-aborted errors + for (let result of results) { + if (result.status == 'rejected' && !(result.reason instanceof ReplicationAbortedError)) { + throw result.reason; + } + } + // Then include aborted errors + for (let result of results) { + if (result.status == 'rejected') { + throw result.reason; + } + } + + // If we get here, both Promises completed successfully, which is unexpected. + throw new ReplicationAssertionError(`Replication loop exited unexpectedly`); } catch (e) { await this.storage.reportError(e); throw e; + } finally { + // Just to make sure + this.abortController.abort(); } } /** - * After calling replicate(), call this to wait for the initial snapshot to complete. - * - * For tests only. + * For tests: Wait until the initial snapshot is complete. */ - async waitForInitialSnapshot() { - if (this.initialSnapshotPromise == null) { - throw new ReplicationAssertionError(`Initial snapshot not started yet`); + public async waitForInitialSnapshot() { + if (this.initPromise == null) { + throw new ReplicationAssertionError('replicate() must be called before waitForInitialSnapshot()'); } - return this.initialSnapshotPromise; + await this.initPromise; + + await this.snapshotter.waitForInitialSnapshot(); } - async initReplication(replicationConnection: pgwire.PgConnection) { - const result = await this.initSlot(); - if (result.needsInitialSync) { - await this.startInitialReplication(replicationConnection, result); + /** + * Initialize replication. + * Start replication loop, and continue until aborted, error or initial snapshot completed. + */ + private async initReplication() { + const result = await this.snapshotter.checkSlot(); + const db = await this.connections.snapshotConnection(); + try { + await this.snapshotter.setupSlot(db, result); + if (result.needsInitialSync) { + await this.snapshotter.queueSnapshotTables(db); + } + } finally { + await db.end(); } } - async streamChanges(replicationConnection: pgwire.PgConnection) { + private async streamChanges() { + const streamReplicationConnection = await this.connections.replicationConnection(); try { - await this.streamChangesInternal(replicationConnection); + await this.streamChangesInternal(streamReplicationConnection); } catch (e) { if (isReplicationSlotInvalidError(e)) { throw new MissingReplicationSlotError(e.message, e); } throw e; + } finally { + await streamReplicationConnection.end(); } } private async streamChangesInternal(replicationConnection: pgwire.PgConnection) { // When changing any logic here, check /docs/wal-lsns.md. - const { createEmptyCheckpoints } = await this.ensureStorageCompatibility(); + + // Viewing the contents of logical messages emitted with `pg_logical_emit_message` + // is only supported on Postgres >= 14.0. + // https://www.postgresql.org/docs/14/protocol-logical-replication.html + const { createEmptyCheckpoints, exposesLogicalMessages } = await ensureStorageCompatibility( + this.connections.pool, + this.storage.factory + ); const replicationOptions: Record = { proto_version: '1', publication_names: PUBLICATION_NAME }; - - /** - * Viewing the contents of logical messages emitted with `pg_logical_emit_message` - * is only supported on Postgres >= 14.0. - * https://www.postgresql.org/docs/14/protocol-logical-replication.html - */ - const exposesLogicalMessages = await this.checkLogicalMessageSupport(); if (exposesLogicalMessages) { /** * Only add this option if the Postgres server supports it. @@ -949,7 +507,7 @@ WHERE oid = $1::regclass`, for await (const chunk of replicationStream.pgoutputDecode()) { this.touch(); - if (this.abort_signal.aborted) { + if (this.abortSignal.aborted) { break; } @@ -1044,6 +602,7 @@ WHERE oid = $1::regclass`, // Big caveat: This _must not_ be used to skip individual messages, since this LSN // may be in the middle of the next transaction. // It must only be used to associate checkpoints with LSNs. + const didCommit = await batch.keepalive(chunkLastLsn); if (didCommit) { this.oldestUncommittedChange = null; @@ -1061,6 +620,8 @@ WHERE oid = $1::regclass`, } } ); + + throw new ReplicationAbortedError(`Replication stream aborted`, this.abortSignal.reason); } async ack(lsn: string, replicationStream: pgwire.ReplicationStream) { @@ -1071,55 +632,6 @@ WHERE oid = $1::regclass`, replicationStream.ack(lsn); } - /** - * Ensures that the storage is compatible with the replication connection. - * @throws {DatabaseConnectionError} If the storage is not compatible with the replication connection. - */ - protected async ensureStorageCompatibility(): Promise { - const supportsLogicalMessages = await this.checkLogicalMessageSupport(); - - const storageIdentifier = await this.storage.factory.getSystemIdentifier(); - if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) { - return { - // Keep the same behaviour as before allowing Postgres storage. - createEmptyCheckpoints: true, - oldestUncommittedChange: null - }; - } - - const parsedStorageIdentifier = lib_postgres.utils.decodePostgresSystemIdentifier(storageIdentifier.id); - /** - * Check if the same server is being used for both the sync bucket storage and the logical replication. - */ - const replicationIdentifier = await lib_postgres.utils.queryPostgresSystemIdentifier(this.connections.pool); - - if (!supportsLogicalMessages && replicationIdentifier.server_id == parsedStorageIdentifier.server_id) { - throw new DatabaseConnectionError( - ErrorCode.PSYNC_S1144, - `Separate Postgres servers are required for the replication source and sync bucket storage when using Postgres versions below 14.0.`, - new Error('Postgres version is below 14') - ); - } - - return { - /** - * Don't create empty checkpoints if the same Postgres database is used for the data source - * and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops. - */ - createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name, - oldestUncommittedChange: null - }; - } - - /** - * Check if the replication connection Postgres server supports - * viewing the contents of logical replication messages. - */ - protected async checkLogicalMessageSupport() { - const version = await this.connections.getServerVersion(); - return version ? version.compareMain('14.0.0') >= 0 : false; - } - async getReplicationLagMillis(): Promise { if (this.oldestUncommittedChange == null) { if (this.isStartingReplication) { diff --git a/modules/module-postgres/src/replication/replication-utils.ts b/modules/module-postgres/src/replication/replication-utils.ts index 893f2ba8c..48f627222 100644 --- a/modules/module-postgres/src/replication/replication-utils.ts +++ b/modules/module-postgres/src/replication/replication-utils.ts @@ -1,11 +1,18 @@ import * as pgwire from '@powersync/service-jpgwire'; import * as lib_postgres from '@powersync/lib-service-postgres'; -import { ErrorCode, logger, ServiceAssertionError, ServiceError } from '@powersync/lib-services-framework'; -import { PatternResult, storage } from '@powersync/service-core'; +import { + DatabaseConnectionError, + ErrorCode, + logger, + ServiceAssertionError, + ServiceError +} from '@powersync/lib-services-framework'; +import { BucketStorageFactory, PatternResult, storage } from '@powersync/service-core'; import * as sync_rules from '@powersync/service-sync-rules'; import * as service_types from '@powersync/service-types'; import { ReplicationIdentity } from './PgRelation.js'; +import { getServerVersion } from '../utils/postgres_version.js'; export interface ReplicaIdentityResult { replicationColumns: storage.ColumnDescriptor[]; @@ -396,3 +403,57 @@ export async function cleanUpReplicationSlot(slotName: string, db: pgwire.PgClie params: [{ type: 'varchar', value: slotName }] }); } + +/** + * Ensures that the storage is compatible with the replication connection. + * @throws {DatabaseConnectionError} If the storage is not compatible with the replication connection. + */ +export async function ensureStorageCompatibility( + db: pgwire.PgClient, + factory: BucketStorageFactory +): Promise { + const supportsLogicalMessages = await checkLogicalMessageSupport(db); + + const storageIdentifier = await factory.getSystemIdentifier(); + if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) { + return { + // Keep the same behaviour as before allowing Postgres storage. + createEmptyCheckpoints: true, + oldestUncommittedChange: null, + exposesLogicalMessages: supportsLogicalMessages + }; + } + + const parsedStorageIdentifier = lib_postgres.utils.decodePostgresSystemIdentifier(storageIdentifier.id); + /** + * Check if the same server is being used for both the sync bucket storage and the logical replication. + */ + const replicationIdentifier = await lib_postgres.utils.queryPostgresSystemIdentifier(db); + + if (!supportsLogicalMessages && replicationIdentifier.server_id == parsedStorageIdentifier.server_id) { + throw new DatabaseConnectionError( + ErrorCode.PSYNC_S1144, + `Separate Postgres servers are required for the replication source and sync bucket storage when using Postgres versions below 14.0.`, + new Error('Postgres version is below 14') + ); + } + + return { + /** + * Don't create empty checkpoints if the same Postgres database is used for the data source + * and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops. + */ + createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name, + oldestUncommittedChange: null, + exposesLogicalMessages: supportsLogicalMessages + }; +} + +/** + * Check if the replication connection Postgres server supports + * viewing the contents of logical replication messages. + */ +export async function checkLogicalMessageSupport(db: pgwire.PgClient) { + const version = await getServerVersion(db); + return version ? version.compareMain('14.0.0') >= 0 : false; +} diff --git a/modules/module-postgres/test/src/pg_test.test.ts b/modules/module-postgres/test/src/pg_test.test.ts index 9d4a517cf..a4fa4f7d0 100644 --- a/modules/module-postgres/test/src/pg_test.test.ts +++ b/modules/module-postgres/test/src/pg_test.test.ts @@ -1,16 +1,18 @@ -import { WalStream } from '@module/replication/WalStream.js'; -import { PostgresTypeResolver } from '@module/types/resolver.js'; import * as pgwire from '@powersync/service-jpgwire'; import { applyRowContext, CompatibilityContext, - CompatibilityEdition, - DateTimeValue, SqliteInputRow, - TimeValue + DateTimeValue, + TimeValue, + CompatibilityEdition } from '@powersync/service-sync-rules'; import { describe, expect, test } from 'vitest'; import { clearTestDb, connectPgPool, connectPgWire, TEST_URI } from './util.js'; +import { WalStream } from '@module/replication/WalStream.js'; +import { PostgresTypeResolver } from '@module/types/resolver.js'; +import { CustomTypeRegistry } from '@module/types/registry.js'; +import { PostgresSnapshotter } from '@module/replication/PostgresSnapshotter.js'; describe('pg data types', () => { async function setupTable(db: pgwire.PgClient) { @@ -302,7 +304,7 @@ VALUES(10, ARRAY['null']::TEXT[]); await insert(db); const transformed = [ - ...WalStream.getQueryData(pgwire.pgwireRows(await db.query(`SELECT * FROM test_data ORDER BY id`))) + ...PostgresSnapshotter.getQueryData(pgwire.pgwireRows(await db.query(`SELECT * FROM test_data ORDER BY id`))) ]; checkResults(transformed); @@ -321,7 +323,7 @@ VALUES(10, ARRAY['null']::TEXT[]); await insert(db); const transformed = [ - ...WalStream.getQueryData( + ...PostgresSnapshotter.getQueryData( pgwire.pgwireRows( await db.query({ statement: `SELECT * FROM test_data WHERE $1 ORDER BY id`, @@ -345,7 +347,9 @@ VALUES(10, ARRAY['null']::TEXT[]); await insertArrays(db); const transformed = [ - ...WalStream.getQueryData(pgwire.pgwireRows(await db.query(`SELECT * FROM test_data_arrays ORDER BY id`))) + ...PostgresSnapshotter.getQueryData( + pgwire.pgwireRows(await db.query(`SELECT * FROM test_data_arrays ORDER BY id`)) + ) ].map((e) => applyRowContext(e, CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY)); checkResultArrays(transformed); @@ -448,7 +452,7 @@ INSERT INTO test_data(id, time, timestamp, timestamptz) VALUES (1, '17:42:01.12' `); const [row] = [ - ...WalStream.getQueryData( + ...PostgresSnapshotter.getQueryData( pgwire.pgwireRows(await db.query(`SELECT time, timestamp, timestamptz FROM test_data`)) ) ]; From 689dcee82bdcb126cda4aaa451d83a9ffcca4f10 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 7 Jan 2026 10:26:45 +0200 Subject: [PATCH 002/101] Fix error code check and improve test stability. --- .../src/storage/batch/PostgresBucketBatch.ts | 2 +- .../test/src/slow_tests.test.ts | 26 +++++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts index ad9191768..53705621f 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts @@ -1115,7 +1115,7 @@ export class PostgresBucketBatch return await callback(db); }); } catch (err) { - const code = err[Symbol.for('pg.ErrorCode')]; + const code = err.cause?.code; if ((code == '40001' || code == '40P01') && Date.now() < lastTry) { // Serialization (lock) failure, retry this.logger.warn(`Serialization failure during replication transaction, retrying: ${err.message}`); diff --git a/modules/module-postgres/test/src/slow_tests.test.ts b/modules/module-postgres/test/src/slow_tests.test.ts index 1d2e9a424..cf5b371c6 100644 --- a/modules/module-postgres/test/src/slow_tests.test.ts +++ b/modules/module-postgres/test/src/slow_tests.test.ts @@ -19,7 +19,9 @@ import { createCoreReplicationMetrics, initializeCoreReplicationMetrics, reduceBucket, - storage + settledPromise, + storage, + unsettledPromise } from '@powersync/service-core'; import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; import * as mongo_storage from '@powersync/service-module-mongodb-storage'; @@ -39,7 +41,7 @@ function defineSlowTests(config: storage.TestStorageConfig) { let walStream: WalStream | undefined; let connections: PgManager | undefined; let abortController: AbortController | undefined; - let streamPromise: Promise | undefined; + let streamPromise: Promise> | undefined; beforeAll(async () => { createCoreReplicationMetrics(METRICS_HELPER.metricsEngine); @@ -50,7 +52,7 @@ function defineSlowTests(config: storage.TestStorageConfig) { // This cleans up, similar to WalStreamTestContext.dispose(). // These tests are a little more complex than what is supported by WalStreamTestContext. abortController?.abort(); - await streamPromise?.catch((_) => {}); + await streamPromise; streamPromise = undefined; connections?.destroy(); @@ -105,9 +107,11 @@ bucket_definitions: await pool.query(`ALTER TABLE test_data REPLICA IDENTITY FULL`); let abort = false; - streamPromise = walStream.replicate().finally(() => { - abort = true; - }); + streamPromise = settledPromise( + walStream.replicate().finally(() => { + abort = true; + }) + ); await walStream.waitForInitialSnapshot(); const start = Date.now(); @@ -298,7 +302,7 @@ bucket_definitions: } abortController.abort(); - await streamPromise.catch((e) => { + await unsettledPromise(streamPromise).catch((e) => { if (e instanceof ReplicationAbortedError) { // Ignore } else { @@ -361,7 +365,7 @@ bucket_definitions: // 3. Start replication, but don't wait for it let initialReplicationDone = false; - streamPromise = walStream.replicate(); + streamPromise = settledPromise(walStream.replicate()); walStream .waitForInitialSnapshot() .catch((_) => {}) @@ -409,7 +413,7 @@ bucket_definitions: } abortController.abort(); - await streamPromise.catch((e) => { + await unsettledPromise(streamPromise).catch((e) => { if (e instanceof ReplicationAbortedError) { // Ignore } else { @@ -479,7 +483,7 @@ bucket_definitions: // 3. Start replication, but don't wait for it let initialReplicationDone = false; - streamPromise = context.replicateSnapshot().finally(() => { + streamPromise = settledPromise(context.replicateSnapshot()).finally(() => { initialReplicationDone = true; }); @@ -501,7 +505,7 @@ bucket_definitions: await new Promise((resolve) => setTimeout(resolve, Math.random() * 10)); } - await streamPromise; + await unsettledPromise(streamPromise); // 5. Once initial replication is done, wait for the streaming changes to complete syncing. const data = await context.getBucketData('global[]', 0n); From f08bb44a8bfffb861c13c9ec7b52fd161a30498a Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 7 Jan 2026 13:16:36 +0200 Subject: [PATCH 003/101] Initial implementation for MongoDB. --- modules/module-mongodb/package.json | 3 +- .../src/replication/ChangeStream.ts | 514 +++---------- .../src/replication/ChangeStreamErrors.ts | 38 + .../src/replication/MongoSnapshotter.ts | 706 ++++++++++++++++++ .../test/src/change_stream_utils.ts | 35 +- pnpm-lock.yaml | 3 + 6 files changed, 860 insertions(+), 439 deletions(-) create mode 100644 modules/module-mongodb/src/replication/ChangeStreamErrors.ts create mode 100644 modules/module-mongodb/src/replication/MongoSnapshotter.ts diff --git a/modules/module-mongodb/package.json b/modules/module-mongodb/package.json index 9a5b5a5b7..b56e157c3 100644 --- a/modules/module-mongodb/package.json +++ b/modules/module-mongodb/package.json @@ -35,6 +35,7 @@ "@powersync/service-sync-rules": "workspace:*", "@powersync/service-types": "workspace:*", "bson": "^6.10.4", + "p-defer": "^4.0.1", "ts-codec": "^1.3.0", "uuid": "^11.1.0" }, @@ -43,4 +44,4 @@ "@powersync/service-module-mongodb-storage": "workspace:*", "@powersync/service-module-postgres-storage": "workspace:*" } -} \ No newline at end of file +} diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 8bd4e67de..a366a60b9 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -1,7 +1,6 @@ -import { isMongoNetworkTimeoutError, isMongoServerError, mongo } from '@powersync/lib-service-mongodb'; +import { mongo } from '@powersync/lib-service-mongodb'; import { container, - DatabaseConnectionError, logger as defaultLogger, ErrorCode, Logger, @@ -17,13 +16,7 @@ import { SourceTable, storage } from '@powersync/service-core'; -import { - DatabaseInputRow, - SqliteInputRow, - SqliteRow, - HydratedSyncRules, - TablePattern -} from '@powersync/service-sync-rules'; +import { DatabaseInputRow, SqliteInputRow, SqliteRow, HydratedSyncRules } from '@powersync/service-sync-rules'; import { ReplicationMetric } from '@powersync/service-types'; import { MongoLSN } from '../common/MongoLSN.js'; import { PostImagesOption } from '../types/types.js'; @@ -36,8 +29,9 @@ import { getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js'; -import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js'; import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js'; +import { ChangeStreamInvalidatedError, mapChangeStreamError } from './ChangeStreamErrors.js'; +import { MongoSnapshotter } from './MongoSnapshotter.js'; export interface ChangeStreamOptions { connections: MongoManager; @@ -60,25 +54,6 @@ export interface ChangeStreamOptions { logger?: Logger; } -interface InitResult { - needsInitialSync: boolean; - snapshotLsn: string | null; -} - -/** - * Thrown when the change stream is not valid anymore, and replication - * must be restarted. - * - * Possible reasons: - * * Some change stream documents do not have postImages. - * * startAfter/resumeToken is not valid anymore. - */ -export class ChangeStreamInvalidatedError extends DatabaseConnectionError { - constructor(message: string, cause: any) { - super(ErrorCode.PSYNC_S1344, message, cause); - } -} - export class ChangeStream { sync_rules: HydratedSyncRules; group_id: number; @@ -94,7 +69,11 @@ export class ChangeStream { private readonly maxAwaitTimeMS: number; - private abort_signal: AbortSignal; + private abortController = new AbortController(); + private abortSignal: AbortSignal = this.abortController.signal; + + private initPromise: Promise | null = null; + private snapshotter: MongoSnapshotter; private relationCache = new RelationCache(getCacheIdentifier); @@ -113,8 +92,6 @@ export class ChangeStream { private logger: Logger; - private snapshotChunkLength: number; - private changeStreamTimeout: number; constructor(options: ChangeStreamOptions) { @@ -123,7 +100,6 @@ export class ChangeStream { this.group_id = options.storage.group_id; this.connections = options.connections; this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000; - this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000; this.client = this.connections.client; this.defaultDb = this.connections.db; this.sync_rules = options.storage.getParsedSyncRules({ @@ -133,20 +109,25 @@ export class ChangeStream { // so we use 90% of the socket timeout value. this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9); - this.abort_signal = options.abort_signal; - this.abort_signal.addEventListener( - 'abort', - () => { - // TODO: Fast abort? - }, - { once: true } - ); - this.logger = options.logger ?? defaultLogger; + this.snapshotter = new MongoSnapshotter({ + ...options, + abort_signal: this.abortSignal, + logger: this.logger, + checkpointStreamId: this.checkpointStreamId + }); + + // We wrap in our own abort controller so we can trigger abort internally. + options.abort_signal.addEventListener('abort', () => { + this.abortController.abort(options.abort_signal.reason); + }); + if (options.abort_signal.aborted) { + this.abortController.abort(options.abort_signal.reason); + } } get stopped() { - return this.abort_signal.aborted; + return this.abortSignal.aborted; } private get usePostImages() { @@ -157,267 +138,6 @@ export class ChangeStream { return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE; } - /** - * This resolves a pattern, persists the related metadata, and returns - * the resulting SourceTables. - * - * This implicitly checks the collection postImage configuration. - */ - async resolveQualifiedTableNames( - batch: storage.BucketStorageBatch, - tablePattern: TablePattern - ): Promise { - const schema = tablePattern.schema; - if (tablePattern.connectionTag != this.connections.connectionTag) { - return []; - } - - let nameFilter: RegExp | string; - if (tablePattern.isWildcard) { - nameFilter = new RegExp('^' + escapeRegExp(tablePattern.tablePrefix)); - } else { - nameFilter = tablePattern.name; - } - let result: storage.SourceTable[] = []; - - // Check if the collection exists - const collections = await this.client - .db(schema) - .listCollections( - { - name: nameFilter - }, - { nameOnly: false } - ) - .toArray(); - - if (!tablePattern.isWildcard && collections.length == 0) { - this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`); - } - - for (let collection of collections) { - const table = await this.handleRelation( - batch, - getMongoRelation({ db: schema, coll: collection.name }), - // This is done as part of the initial setup - snapshot is handled elsewhere - { snapshot: false, collectionInfo: collection } - ); - - result.push(table); - } - - return result; - } - - async initSlot(): Promise { - const status = await this.storage.getStatus(); - if (status.snapshot_done && status.checkpoint_lsn) { - this.logger.info(`Initial replication already done`); - return { needsInitialSync: false, snapshotLsn: null }; - } - - return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn }; - } - - async estimatedCount(table: storage.SourceTable): Promise { - const count = await this.estimatedCountNumber(table); - return `~${count}`; - } - - async estimatedCountNumber(table: storage.SourceTable): Promise { - const db = this.client.db(table.schema); - return await db.collection(table.name).estimatedDocumentCount(); - } - - /** - * This gets a LSN before starting a snapshot, which we can resume streaming from after the snapshot. - * - * This LSN can survive initial replication restarts. - */ - private async getSnapshotLsn(): Promise { - const hello = await this.defaultDb.command({ hello: 1 }); - // Basic sanity check - if (hello.msg == 'isdbgrid') { - throw new ServiceError( - ErrorCode.PSYNC_S1341, - 'Sharded MongoDB Clusters are not supported yet (including MongoDB Serverless instances).' - ); - } else if (hello.setName == null) { - throw new ServiceError( - ErrorCode.PSYNC_S1342, - 'Standalone MongoDB instances are not supported - use a replicaset.' - ); - } - - // Open a change stream just to get a resume token for later use. - // We could use clusterTime from the hello command, but that won't tell us if the - // snapshot isn't valid anymore. - // If we just use the first resumeToken from the stream, we get two potential issues: - // 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes - // in source db or other stream issues. - // 2. The first actual change we get may have the same clusterTime, causing us to incorrect - // skip that event. - // Instead, we create a new checkpoint document, and wait until we get that document back in the stream. - // To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document - // periodically until the timeout is reached. - - const LSN_TIMEOUT_SECONDS = 60; - const LSN_CREATE_INTERVAL_SECONDS = 1; - - await using streamManager = this.openChangeStream({ lsn: null, maxAwaitTimeMs: 0 }); - const { stream } = streamManager; - const startTime = performance.now(); - let lastCheckpointCreated = -10_000; - let eventsSeen = 0; - - while (performance.now() - startTime < LSN_TIMEOUT_SECONDS * 1000) { - if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) { - await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId); - lastCheckpointCreated = performance.now(); - } - - // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream - const changeDocument = await stream.tryNext().catch((e) => { - throw mapChangeStreamError(e); - }); - if (changeDocument == null) { - continue; - } - - const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined; - - if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) { - const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId; - if (!this.checkpointStreamId.equals(checkpointId)) { - continue; - } - const { comparable: lsn } = new MongoLSN({ - timestamp: changeDocument.clusterTime!, - resume_token: changeDocument._id - }); - return lsn; - } - - eventsSeen += 1; - } - - // Could happen if there is a very large replication lag? - throw new ServiceError( - ErrorCode.PSYNC_S1301, - `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}` - ); - } - - /** - * Given a snapshot LSN, validate that we can read from it, by opening a change stream. - */ - private async validateSnapshotLsn(lsn: string) { - await using streamManager = this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 }); - const { stream } = streamManager; - try { - // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream - await stream.tryNext(); - } catch (e) { - // Note: A timeout here is not handled as a ChangeStreamInvalidatedError, even though - // we possibly cannot recover from it. - throw mapChangeStreamError(e); - } - } - - async initialReplication(snapshotLsn: string | null) { - const sourceTables = this.sync_rules.getSourceTables(); - await this.client.connect(); - - const flushResult = await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: MongoLSN.ZERO.comparable, - defaultSchema: this.defaultDb.databaseName, - storeCurrentData: false, - skipExistingRows: true - }, - async (batch) => { - if (snapshotLsn == null) { - // First replication attempt - get a snapshot and store the timestamp - snapshotLsn = await this.getSnapshotLsn(); - await batch.setResumeLsn(snapshotLsn); - this.logger.info(`Marking snapshot at ${snapshotLsn}`); - } else { - this.logger.info(`Resuming snapshot at ${snapshotLsn}`); - // Check that the snapshot is still valid. - await this.validateSnapshotLsn(snapshotLsn); - } - - // Start by resolving all tables. - // This checks postImage configuration, and that should fail as - // early as possible. - let allSourceTables: SourceTable[] = []; - for (let tablePattern of sourceTables) { - const tables = await this.resolveQualifiedTableNames(batch, tablePattern); - allSourceTables.push(...tables); - } - - let tablesWithStatus: SourceTable[] = []; - for (let table of allSourceTables) { - if (table.snapshotComplete) { - this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); - continue; - } - let count = await this.estimatedCountNumber(table); - const updated = await batch.updateTableProgress(table, { - totalEstimatedCount: count - }); - tablesWithStatus.push(updated); - this.relationCache.update(updated); - this.logger.info( - `To replicate: ${table.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}` - ); - } - - for (let table of tablesWithStatus) { - await this.snapshotTable(batch, table); - await batch.markTableSnapshotDone([table]); - - this.touch(); - } - - // The checkpoint here is a marker - we need to replicate up to at least this - // point before the data can be considered consistent. - // We could do this for each individual table, but may as well just do it once for the entire snapshot. - const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); - await batch.markAllSnapshotDone(checkpoint); - - // This will not create a consistent checkpoint yet, but will persist the op. - // Actual checkpoint will be created when streaming replication caught up. - await batch.commit(snapshotLsn); - - this.logger.info(`Snapshot done. Need to replicate from ${snapshotLsn} to ${checkpoint} to be consistent`); - } - ); - return { lastOpId: flushResult?.flushed_op }; - } - - private async setupCheckpointsCollection() { - const collection = await this.getCollectionInfo(this.defaultDb.databaseName, CHECKPOINTS_COLLECTION); - if (collection == null) { - await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, { - changeStreamPreAndPostImages: { enabled: true } - }); - } else if (this.usePostImages && collection.options?.changeStreamPreAndPostImages?.enabled != true) { - // Drop + create requires less permissions than collMod, - // and we don't care about the data in this collection. - await this.defaultDb.dropCollection(CHECKPOINTS_COLLECTION); - await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, { - changeStreamPreAndPostImages: { enabled: true } - }); - } else { - // Clear the collection on startup, to keep it clean - // We never query this collection directly, and don't want to keep the data around. - // We only use this to get data into the oplog/changestream. - await this.defaultDb.collection(CHECKPOINTS_COLLECTION).deleteMany({}); - } - } - private getSourceNamespaceFilters(): { $match: any; multipleDatabases: boolean } { const sourceTables = this.sync_rules.getSourceTables(); @@ -475,76 +195,6 @@ export class ChangeStream { } } - private async snapshotTable(batch: storage.BucketStorageBatch, table: storage.SourceTable) { - const totalEstimatedCount = await this.estimatedCountNumber(table); - let at = table.snapshotStatus?.replicatedCount ?? 0; - const db = this.client.db(table.schema); - const collection = db.collection(table.name); - await using query = new ChunkedSnapshotQuery({ - collection, - key: table.snapshotStatus?.lastKey, - batchSize: this.snapshotChunkLength - }); - if (query.lastKey != null) { - this.logger.info( - `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}` - ); - } else { - this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`); - } - - let lastBatch = performance.now(); - let nextChunkPromise = query.nextChunk(); - while (true) { - const { docs: docBatch, lastKey } = await nextChunkPromise; - if (docBatch.length == 0) { - // No more data - stop iterating - break; - } - - if (this.abort_signal.aborted) { - throw new ReplicationAbortedError(`Aborted initial replication`, this.abort_signal.reason); - } - - // Pre-fetch next batch, so that we can read and write concurrently - nextChunkPromise = query.nextChunk(); - for (let document of docBatch) { - const record = this.constructAfterRecord(document); - - // This auto-flushes when the batch reaches its size limit - await batch.save({ - tag: SaveOperationTag.INSERT, - sourceTable: table, - before: undefined, - beforeReplicaId: undefined, - after: record, - afterReplicaId: document._id - }); - } - - // Important: flush before marking progress - await batch.flush(); - at += docBatch.length; - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(docBatch.length); - - table = await batch.updateTableProgress(table, { - lastKey, - replicatedCount: at, - totalEstimatedCount: totalEstimatedCount - }); - this.relationCache.update(table); - - const duration = performance.now() - lastBatch; - lastBatch = performance.now(); - this.logger.info( - `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms` - ); - this.touch(); - } - // In case the loop was interrupted, make sure we await the last promise. - await nextChunkPromise; - } - private async getRelation( batch: storage.BucketStorageBatch, descriptor: SourceEntityDescriptor, @@ -634,14 +284,7 @@ export class ChangeStream { const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny; if (shouldSnapshot) { this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); - // Truncate this table, in case a previous snapshot was interrupted. - await batch.truncate([result.table]); - - await this.snapshotTable(batch, result.table); - const no_checkpoint_before_lsn = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); - - const [table] = await batch.markTableSnapshotDone([result.table], no_checkpoint_before_lsn); - return table; + await this.snapshotter.queueSnapshot(batch, result.table); } return result.table; @@ -705,38 +348,79 @@ export class ChangeStream { } async replicate() { + let streamPromise: Promise | null = null; + let loopPromise: Promise | null = null; try { // If anything errors here, the entire replication process is halted, and // all connections automatically closed, including this one. - await this.initReplication(); - await this.streamChanges(); + this.initPromise = this.initReplication(); + await this.initPromise; + streamPromise = this.streamChanges() + .then(() => { + throw new ReplicationAssertionError(`Replication stream exited unexpectedly`); + }) + .catch((e) => { + this.abortController.abort(e); + throw e; + }); + loopPromise = this.snapshotter + .replicationLoop() + .then(() => { + throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`); + }) + .catch((e) => { + this.abortController.abort(e); + throw e; + }); + const results = await Promise.allSettled([loopPromise, streamPromise]); + // First, prioritize non-aborted errors + for (let result of results) { + if (result.status == 'rejected' && !(result.reason instanceof ReplicationAbortedError)) { + throw result.reason; + } + } + // Then include aborted errors + for (let result of results) { + if (result.status == 'rejected') { + throw result.reason; + } + } + + // If we get here, both Promises completed successfully, which is unexpected. + throw new ReplicationAssertionError(`Replication loop exited unexpectedly`); } catch (e) { await this.storage.reportError(e); throw e; + } finally { + // Just to make sure + this.abortController.abort(); + } + } + + /** + * For tests: Wait until the initial snapshot is complete. + */ + public async waitForInitialSnapshot() { + if (this.initPromise == null) { + throw new ReplicationAssertionError('replicate() must be called before waitForInitialSnapshot()'); } + await this.initPromise; + await this.snapshotter.waitForInitialSnapshot(); } - async initReplication() { - const result = await this.initSlot(); - await this.setupCheckpointsCollection(); + private async initReplication() { + const result = await this.snapshotter.checkSlot(); + await this.snapshotter.setupCheckpointsCollection(); if (result.needsInitialSync) { if (result.snapshotLsn == null) { // Snapshot LSN is not present, so we need to start replication from scratch. - await this.storage.clear({ signal: this.abort_signal }); - } - const { lastOpId } = await this.initialReplication(result.snapshotLsn); - if (lastOpId != null) { - // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules. - await this.storage.populatePersistentChecksumCache({ - signal: this.abort_signal, - // No checkpoint yet, but we do have the opId. - maxOpId: lastOpId - }); + await this.storage.clear({ signal: this.abortSignal }); } + await this.snapshotter.queueSnapshotTables(result.snapshotLsn); } } - async streamChanges() { + private async streamChanges() { try { await this.streamChangesInternal(); } catch (e) { @@ -802,7 +486,7 @@ export class ChangeStream { stream = this.defaultDb.watch(pipeline, streamOptions); } - this.abort_signal.addEventListener('abort', () => { + this.abortSignal.addEventListener('abort', () => { stream.close(); }); @@ -815,7 +499,7 @@ export class ChangeStream { }; } - async streamChangesInternal() { + private async streamChangesInternal() { await this.storage.startBatch( { logger: this.logger, @@ -840,7 +524,7 @@ export class ChangeStream { await using streamManager = this.openChangeStream({ lsn: resumeFromLsn }); const { stream, filters } = streamManager; - if (this.abort_signal.aborted) { + if (this.abortSignal.aborted) { await stream.close(); return; } @@ -862,7 +546,7 @@ export class ChangeStream { let lastEmptyResume = performance.now(); while (true) { - if (this.abort_signal.aborted) { + if (this.abortSignal.aborted) { break; } @@ -874,7 +558,7 @@ export class ChangeStream { break; } - if (this.abort_signal.aborted) { + if (this.abortSignal.aborted) { break; } @@ -1098,6 +782,8 @@ export class ChangeStream { } } ); + + throw new ReplicationAbortedError(`Replication stream aborted`, this.abortSignal.reason); } async getReplicationLagMillis(): Promise { @@ -1126,24 +812,4 @@ export class ChangeStream { } } -function mapChangeStreamError(e: any) { - if (isMongoNetworkTimeoutError(e)) { - // This typically has an unhelpful message like "connection 2 to 159.41.94.47:27017 timed out". - // We wrap the error to make it more useful. - throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e); - } else if (isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') { - // maxTimeMS was reached. Example message: - // MongoServerError: Executor error during aggregate command on namespace: powersync_test_data.$cmd.aggregate :: caused by :: operation exceeded time limit - throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e); - } else if ( - isMongoServerError(e) && - e.codeName == 'NoMatchingDocument' && - e.errmsg?.includes('post-image was not found') - ) { - throw new ChangeStreamInvalidatedError(e.errmsg, e); - } else if (isMongoServerError(e) && e.hasErrorLabel('NonResumableChangeStreamError')) { - throw new ChangeStreamInvalidatedError(e.message, e); - } else { - throw new DatabaseConnectionError(ErrorCode.PSYNC_S1346, `Error reading MongoDB ChangeStream`, e); - } -} +export { ChangeStreamInvalidatedError }; diff --git a/modules/module-mongodb/src/replication/ChangeStreamErrors.ts b/modules/module-mongodb/src/replication/ChangeStreamErrors.ts new file mode 100644 index 000000000..44fc40cd3 --- /dev/null +++ b/modules/module-mongodb/src/replication/ChangeStreamErrors.ts @@ -0,0 +1,38 @@ +import { isMongoNetworkTimeoutError, isMongoServerError } from '@powersync/lib-service-mongodb'; +import { DatabaseConnectionError, ErrorCode } from '@powersync/lib-services-framework'; + +/** + * Thrown when the change stream is not valid anymore, and replication + * must be restarted. + * + * Possible reasons: + * * Some change stream documents do not have postImages. + * * startAfter/resumeToken is not valid anymore. + */ +export class ChangeStreamInvalidatedError extends DatabaseConnectionError { + constructor(message: string, cause: any) { + super(ErrorCode.PSYNC_S1344, message, cause); + } +} + +export function mapChangeStreamError(e: any) { + if (isMongoNetworkTimeoutError(e)) { + // This typically has an unhelpful message like "connection 2 to 159.41.94.47:27017 timed out". + // We wrap the error to make it more useful. + throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e); + } else if (isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') { + // maxTimeMS was reached. Example message: + // MongoServerError: Executor error during aggregate command on namespace: powersync_test_data.$cmd.aggregate :: caused by :: operation exceeded time limit + throw new DatabaseConnectionError(ErrorCode.PSYNC_S1345, `Timeout while reading MongoDB ChangeStream`, e); + } else if ( + isMongoServerError(e) && + e.codeName == 'NoMatchingDocument' && + e.errmsg?.includes('post-image was not found') + ) { + throw new ChangeStreamInvalidatedError(e.errmsg, e); + } else if (isMongoServerError(e) && e.hasErrorLabel('NonResumableChangeStreamError')) { + throw new ChangeStreamInvalidatedError(e.message, e); + } else { + throw new DatabaseConnectionError(ErrorCode.PSYNC_S1346, `Error reading MongoDB ChangeStream`, e); + } +} diff --git a/modules/module-mongodb/src/replication/MongoSnapshotter.ts b/modules/module-mongodb/src/replication/MongoSnapshotter.ts new file mode 100644 index 000000000..55c0b030b --- /dev/null +++ b/modules/module-mongodb/src/replication/MongoSnapshotter.ts @@ -0,0 +1,706 @@ +import { mongo } from '@powersync/lib-service-mongodb'; +import { + container, + ErrorCode, + logger as defaultLogger, + Logger, + ReplicationAbortedError, + ServiceError +} from '@powersync/lib-services-framework'; +import { + MetricsEngine, + RelationCache, + SaveOperationTag, + SourceEntityDescriptor, + SourceTable, + InternalOpId, + storage +} from '@powersync/service-core'; +import { + DatabaseInputRow, + SqliteInputRow, + SqliteRow, + HydratedSyncRules, + TablePattern +} from '@powersync/service-sync-rules'; +import { ReplicationMetric } from '@powersync/service-types'; +import * as timers from 'node:timers/promises'; +import pDefer from 'p-defer'; +import { MongoLSN } from '../common/MongoLSN.js'; +import { PostImagesOption } from '../types/types.js'; +import { escapeRegExp } from '../utils.js'; +import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js'; +import { + constructAfterRecord, + createCheckpoint, + getCacheIdentifier, + getMongoRelation, + STANDALONE_CHECKPOINT_ID +} from './MongoRelation.js'; +import { MongoManager } from './MongoManager.js'; +import { mapChangeStreamError } from './ChangeStreamErrors.js'; +import { CHECKPOINTS_COLLECTION } from './replication-utils.js'; + +export interface MongoSnapshotterOptions { + connections: MongoManager; + storage: storage.SyncRulesBucketStorage; + metrics: MetricsEngine; + abort_signal: AbortSignal; + /** + * Override maxAwaitTimeMS for testing. + */ + maxAwaitTimeMS?: number; + /** + * Override snapshotChunkLength for testing. + */ + snapshotChunkLength?: number; + logger?: Logger; + checkpointStreamId: mongo.ObjectId; +} + +interface InitResult { + needsInitialSync: boolean; + snapshotLsn: string | null; +} + +export class MongoSnapshotter { + sync_rules: HydratedSyncRules; + group_id: number; + + connection_id = 1; + + private readonly storage: storage.SyncRulesBucketStorage; + private readonly metrics: MetricsEngine; + + private connections: MongoManager; + private readonly client: mongo.MongoClient; + private readonly defaultDb: mongo.Db; + + private readonly maxAwaitTimeMS: number; + private readonly snapshotChunkLength: number; + + private abortSignal: AbortSignal; + + private relationCache = new RelationCache(getCacheIdentifier); + + private logger: Logger; + + private checkpointStreamId: mongo.ObjectId; + private changeStreamTimeout: number; + + private queue = new Set(); + private initialSnapshotDone = pDefer(); + private lastSnapshotOpId: InternalOpId | null = null; + + constructor(options: MongoSnapshotterOptions) { + this.storage = options.storage; + this.metrics = options.metrics; + this.group_id = options.storage.group_id; + this.connections = options.connections; + this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000; + this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000; + this.client = this.connections.client; + this.defaultDb = this.connections.db; + this.sync_rules = options.storage.getParsedSyncRules({ + defaultSchema: this.defaultDb.databaseName + }); + this.abortSignal = options.abort_signal; + this.logger = options.logger ?? defaultLogger; + this.checkpointStreamId = options.checkpointStreamId; + this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9); + } + + private get usePostImages() { + return this.connections.options.postImages != PostImagesOption.OFF; + } + + private get configurePostImages() { + return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE; + } + + async checkSlot(): Promise { + const status = await this.storage.getStatus(); + if (status.snapshot_done && status.checkpoint_lsn) { + this.logger.info(`Initial replication already done`); + return { needsInitialSync: false, snapshotLsn: null }; + } + + return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn }; + } + + async setupCheckpointsCollection() { + const collection = await this.getCollectionInfo(this.defaultDb.databaseName, CHECKPOINTS_COLLECTION); + if (collection == null) { + await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, { + changeStreamPreAndPostImages: { enabled: true } + }); + } else if (this.usePostImages && collection.options?.changeStreamPreAndPostImages?.enabled != true) { + // Drop + create requires less permissions than collMod, + // and we don't care about the data in this collection. + await this.defaultDb.dropCollection(CHECKPOINTS_COLLECTION); + await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, { + changeStreamPreAndPostImages: { enabled: true } + }); + } else { + // Clear the collection on startup, to keep it clean + // We never query this collection directly, and don't want to keep the data around. + // We only use this to get data into the oplog/changestream. + await this.defaultDb.collection(CHECKPOINTS_COLLECTION).deleteMany({}); + } + } + + async queueSnapshotTables(snapshotLsn: string | null) { + const sourceTables = this.sync_rules.getSourceTables(); + await this.client.connect(); + + await this.storage.startBatch( + { + logger: this.logger, + zeroLSN: MongoLSN.ZERO.comparable, + defaultSchema: this.defaultDb.databaseName, + storeCurrentData: false, + skipExistingRows: true + }, + async (batch) => { + if (snapshotLsn == null) { + // First replication attempt - get a snapshot and store the timestamp + snapshotLsn = await this.getSnapshotLsn(); + await batch.setResumeLsn(snapshotLsn); + this.logger.info(`Marking snapshot at ${snapshotLsn}`); + } else { + this.logger.info(`Resuming snapshot at ${snapshotLsn}`); + // Check that the snapshot is still valid. + await this.validateSnapshotLsn(snapshotLsn); + } + + // Start by resolving all tables. + // This checks postImage configuration, and that should fail as + // early as possible. + let allSourceTables: SourceTable[] = []; + for (let tablePattern of sourceTables) { + const tables = await this.resolveQualifiedTableNames(batch, tablePattern); + allSourceTables.push(...tables); + } + + let tablesWithStatus: SourceTable[] = []; + for (let table of allSourceTables) { + if (table.snapshotComplete) { + this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); + continue; + } + const count = await this.estimatedCountNumber(table); + const updated = await batch.updateTableProgress(table, { + totalEstimatedCount: count + }); + tablesWithStatus.push(updated); + this.relationCache.update(updated); + this.logger.info( + `To replicate: ${updated.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}` + ); + } + + for (let table of tablesWithStatus) { + this.queue.add(table); + } + } + ); + } + + async waitForInitialSnapshot() { + await this.initialSnapshotDone.promise; + } + + async replicationLoop() { + try { + if (this.queue.size == 0) { + // Special case where we start with no tables to snapshot + await this.markSnapshotDone(); + } + while (!this.abortSignal.aborted) { + const table = this.queue.values().next().value; + if (table == null) { + this.initialSnapshotDone.resolve(); + await timers.setTimeout(500, { signal: this.abortSignal }); + continue; + } + + await this.replicateTable(table); + this.queue.delete(table); + if (this.queue.size == 0) { + await this.markSnapshotDone(); + } + } + throw new ReplicationAbortedError(`Replication loop aborted`, this.abortSignal.reason); + } catch (e) { + // If initial snapshot already completed, this has no effect + this.initialSnapshotDone.reject(e); + throw e; + } + } + + private async markSnapshotDone() { + const flushResults = await this.storage.startBatch( + { + logger: this.logger, + zeroLSN: MongoLSN.ZERO.comparable, + defaultSchema: this.defaultDb.databaseName, + storeCurrentData: false, + skipExistingRows: true + }, + async (batch) => { + // The checkpoint here is a marker - we need to replicate up to at least this + // point before the data can be considered consistent. + const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); + await batch.markAllSnapshotDone(checkpoint); + } + ); + + const lastOp = flushResults?.flushed_op ?? this.lastSnapshotOpId; + if (lastOp != null) { + // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules. + // TODO: only run this after initial replication, not after each table. + await this.storage.populatePersistentChecksumCache({ + // No checkpoint yet, but we do have the opId. + maxOpId: lastOp, + signal: this.abortSignal + }); + } + } + + private async replicateTable(table: SourceTable) { + const flushResults = await this.storage.startBatch( + { + logger: this.logger, + zeroLSN: MongoLSN.ZERO.comparable, + defaultSchema: this.defaultDb.databaseName, + storeCurrentData: false, + skipExistingRows: true + }, + async (batch) => { + await this.snapshotTable(batch, table); + + const noCheckpointBefore = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); + await batch.markTableSnapshotDone([table], noCheckpointBefore); + + // This commit ensures we set keepalive_op. + const resumeLsn = batch.resumeFromLsn ?? MongoLSN.ZERO.comparable; + await batch.commit(resumeLsn); + } + ); + if (flushResults?.flushed_op != null) { + this.lastSnapshotOpId = flushResults.flushed_op; + } + this.logger.info(`Flushed snapshot at ${flushResults?.flushed_op}`); + } + + async queueSnapshot(batch: storage.BucketStorageBatch, table: storage.SourceTable) { + await batch.markTableSnapshotRequired(table); + this.queue.add(table); + } + + async estimatedCount(table: storage.SourceTable): Promise { + const count = await this.estimatedCountNumber(table); + return `~${count}`; + } + + async estimatedCountNumber(table: storage.SourceTable): Promise { + const db = this.client.db(table.schema); + return await db.collection(table.name).estimatedDocumentCount(); + } + + async resolveQualifiedTableNames( + batch: storage.BucketStorageBatch, + tablePattern: TablePattern + ): Promise { + const schema = tablePattern.schema; + if (tablePattern.connectionTag != this.connections.connectionTag) { + return []; + } + + let nameFilter: RegExp | string; + if (tablePattern.isWildcard) { + nameFilter = new RegExp('^' + escapeRegExp(tablePattern.tablePrefix)); + } else { + nameFilter = tablePattern.name; + } + let result: storage.SourceTable[] = []; + + // Check if the collection exists + const collections = await this.client + .db(schema) + .listCollections( + { + name: nameFilter + }, + { nameOnly: false } + ) + .toArray(); + + if (!tablePattern.isWildcard && collections.length == 0) { + this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`); + } + + for (let collection of collections) { + const table = await this.handleRelation(batch, getMongoRelation({ db: schema, coll: collection.name }), { + collectionInfo: collection + }); + + result.push(table); + } + + return result; + } + + private async snapshotTable(batch: storage.BucketStorageBatch, table: storage.SourceTable) { + const totalEstimatedCount = await this.estimatedCountNumber(table); + let at = table.snapshotStatus?.replicatedCount ?? 0; + const db = this.client.db(table.schema); + const collection = db.collection(table.name); + await using query = new ChunkedSnapshotQuery({ + collection, + key: table.snapshotStatus?.lastKey, + batchSize: this.snapshotChunkLength + }); + if (query.lastKey != null) { + this.logger.info( + `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}` + ); + } else { + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`); + } + + let lastBatch = performance.now(); + let nextChunkPromise = query.nextChunk(); + while (true) { + const { docs: docBatch, lastKey } = await nextChunkPromise; + if (docBatch.length == 0) { + // No more data - stop iterating + break; + } + + if (this.abortSignal.aborted) { + throw new ReplicationAbortedError(`Aborted initial replication`, this.abortSignal.reason); + } + + // Pre-fetch next batch, so that we can read and write concurrently + nextChunkPromise = query.nextChunk(); + for (let document of docBatch) { + const record = this.constructAfterRecord(document); + + // This auto-flushes when the batch reaches its size limit + await batch.save({ + tag: SaveOperationTag.INSERT, + sourceTable: table, + before: undefined, + beforeReplicaId: undefined, + after: record, + afterReplicaId: document._id + }); + } + + // Important: flush before marking progress + await batch.flush(); + at += docBatch.length; + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(docBatch.length); + + table = await batch.updateTableProgress(table, { + lastKey, + replicatedCount: at, + totalEstimatedCount: totalEstimatedCount + }); + this.relationCache.update(table); + + const duration = performance.now() - lastBatch; + lastBatch = performance.now(); + this.logger.info( + `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms` + ); + this.touch(); + } + // In case the loop was interrupted, make sure we await the last promise. + await nextChunkPromise; + } + + private constructAfterRecord(document: mongo.Document): SqliteRow { + const inputRow = constructAfterRecord(document); + return this.sync_rules.applyRowContext(inputRow); + } + + private async getCollectionInfo(db: string, name: string): Promise { + const collection = ( + await this.client + .db(db) + .listCollections( + { + name: name + }, + { nameOnly: false } + ) + .toArray() + )[0]; + return collection; + } + + private async checkPostImages(db: string, collectionInfo: mongo.CollectionInfo) { + if (!this.usePostImages) { + // Nothing to check + return; + } + + const enabled = collectionInfo.options?.changeStreamPreAndPostImages?.enabled == true; + + if (!enabled && this.configurePostImages) { + await this.client.db(db).command({ + collMod: collectionInfo.name, + changeStreamPreAndPostImages: { enabled: true } + }); + this.logger.info(`Enabled postImages on ${db}.${collectionInfo.name}`); + } else if (!enabled) { + throw new ServiceError(ErrorCode.PSYNC_S1343, `postImages not enabled on ${db}.${collectionInfo.name}`); + } + } + + private async handleRelation( + batch: storage.BucketStorageBatch, + descriptor: SourceEntityDescriptor, + options: { collectionInfo: mongo.CollectionInfo | undefined } + ) { + if (options.collectionInfo != null) { + await this.checkPostImages(descriptor.schema, options.collectionInfo); + } else { + // If collectionInfo is null, the collection may have been dropped. + // Ignore the postImages check in this case. + } + + const result = await this.storage.resolveTable({ + group_id: this.group_id, + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: descriptor, + sync_rules: this.sync_rules + }); + this.relationCache.update(result.table); + + // Drop conflicting collections. + // This is generally not expected for MongoDB source dbs, so we log an error. + if (result.dropTables.length > 0) { + this.logger.error( + `Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}` + ); + await batch.drop(result.dropTables); + } + + return result.table; + } + + private async getSnapshotLsn(): Promise { + const hello = await this.defaultDb.command({ hello: 1 }); + // Basic sanity check + if (hello.msg == 'isdbgrid') { + throw new ServiceError( + ErrorCode.PSYNC_S1341, + 'Sharded MongoDB Clusters are not supported yet (including MongoDB Serverless instances).' + ); + } else if (hello.setName == null) { + throw new ServiceError( + ErrorCode.PSYNC_S1342, + 'Standalone MongoDB instances are not supported - use a replicaset.' + ); + } + + // Open a change stream just to get a resume token for later use. + // We could use clusterTime from the hello command, but that won't tell us if the + // snapshot isn't valid anymore. + // If we just use the first resumeToken from the stream, we get two potential issues: + // 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes + // in source db or other stream issues. + // 2. The first actual change we get may have the same clusterTime, causing us to incorrect + // skip that event. + // Instead, we create a new checkpoint document, and wait until we get that document back in the stream. + // To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document + // periodically until the timeout is reached. + + const LSN_TIMEOUT_SECONDS = 60; + const LSN_CREATE_INTERVAL_SECONDS = 1; + + await using streamManager = this.openChangeStream({ lsn: null, maxAwaitTimeMs: 0 }); + const { stream } = streamManager; + const startTime = performance.now(); + let lastCheckpointCreated = -10_000; + let eventsSeen = 0; + + while (performance.now() - startTime < LSN_TIMEOUT_SECONDS * 1000) { + if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) { + await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId); + lastCheckpointCreated = performance.now(); + } + + // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream + const changeDocument = await stream.tryNext().catch((e) => { + throw mapChangeStreamError(e); + }); + if (changeDocument == null) { + continue; + } + + const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined; + + if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) { + const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId; + if (!this.checkpointStreamId.equals(checkpointId)) { + continue; + } + const { comparable: lsn } = new MongoLSN({ + timestamp: changeDocument.clusterTime!, + resume_token: changeDocument._id + }); + return lsn; + } + + eventsSeen += 1; + } + + // Could happen if there is a very large replication lag? + throw new ServiceError( + ErrorCode.PSYNC_S1301, + `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}` + ); + } + + /** + * Given a snapshot LSN, validate that we can read from it, by opening a change stream. + */ + private async validateSnapshotLsn(lsn: string) { + await using streamManager = this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 }); + const { stream } = streamManager; + try { + // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream + await stream.tryNext(); + } catch (e) { + // Note: A timeout here is not handled as a ChangeStreamInvalidatedError, even though + // we possibly cannot recover from it. + throw mapChangeStreamError(e); + } + } + + private getSourceNamespaceFilters(): { $match: any; multipleDatabases: boolean } { + const sourceTables = this.sync_rules.getSourceTables(); + + let $inFilters: { db: string; coll: string }[] = [ + { db: this.defaultDb.databaseName, coll: CHECKPOINTS_COLLECTION } + ]; + let $refilters: { 'ns.db': string; 'ns.coll': RegExp }[] = []; + let multipleDatabases = false; + for (let tablePattern of sourceTables) { + if (tablePattern.connectionTag != this.connections.connectionTag) { + continue; + } + + if (tablePattern.schema != this.defaultDb.databaseName) { + multipleDatabases = true; + } + + if (tablePattern.isWildcard) { + $refilters.push({ + 'ns.db': tablePattern.schema, + 'ns.coll': new RegExp('^' + escapeRegExp(tablePattern.tablePrefix)) + }); + } else { + $inFilters.push({ + db: tablePattern.schema, + coll: tablePattern.name + }); + } + } + + const nsFilter = multipleDatabases + ? { ns: { $in: $inFilters } } + : { 'ns.coll': { $in: $inFilters.map((ns) => ns.coll) } }; + if ($refilters.length > 0) { + return { $match: { $or: [nsFilter, ...$refilters] }, multipleDatabases }; + } + return { $match: nsFilter, multipleDatabases }; + } + + static *getQueryData(results: Iterable): Generator { + for (let row of results) { + yield constructAfterRecord(row); + } + } + + private openChangeStream(options: { lsn: string | null; maxAwaitTimeMs?: number }) { + const lastLsn = options.lsn ? MongoLSN.fromSerialized(options.lsn) : null; + const startAfter = lastLsn?.timestamp; + const resumeAfter = lastLsn?.resumeToken; + + const filters = this.getSourceNamespaceFilters(); + + const pipeline: mongo.Document[] = [ + { + $match: filters.$match + }, + { $changeStreamSplitLargeEvent: {} } + ]; + + let fullDocument: 'required' | 'updateLookup'; + + if (this.usePostImages) { + // 'read_only' or 'auto_configure' + // Configuration happens during snapshot, or when we see new + // collections. + fullDocument = 'required'; + } else { + fullDocument = 'updateLookup'; + } + const streamOptions: mongo.ChangeStreamOptions = { + showExpandedEvents: true, + maxAwaitTimeMS: options.maxAwaitTimeMs ?? this.maxAwaitTimeMS, + fullDocument: fullDocument, + maxTimeMS: this.changeStreamTimeout + }; + + /** + * Only one of these options can be supplied at a time. + */ + if (resumeAfter) { + streamOptions.resumeAfter = resumeAfter; + } else { + // Legacy: We don't persist lsns without resumeTokens anymore, but we do still handle the + // case if we have an old one. + streamOptions.startAtOperationTime = startAfter; + } + + let stream: mongo.ChangeStream; + if (filters.multipleDatabases) { + // Requires readAnyDatabase@admin on Atlas + stream = this.client.watch(pipeline, streamOptions); + } else { + // Same general result, but requires less permissions than the above + stream = this.defaultDb.watch(pipeline, streamOptions); + } + + this.abortSignal.addEventListener('abort', () => { + stream.close(); + }); + + return { + stream, + filters, + [Symbol.asyncDispose]: async () => { + return stream.close(); + } + }; + } + + private lastTouchedAt = performance.now(); + + private touch() { + if (performance.now() - this.lastTouchedAt > 1_000) { + this.lastTouchedAt = performance.now(); + // Update the probes, but don't wait for it + container.probes.touch().catch((e) => { + this.logger.error(`Failed to touch the container probe: ${e.message}`, e); + }); + } + } +} diff --git a/modules/module-mongodb/test/src/change_stream_utils.ts b/modules/module-mongodb/test/src/change_stream_utils.ts index 1f54a7810..2851c01ee 100644 --- a/modules/module-mongodb/test/src/change_stream_utils.ts +++ b/modules/module-mongodb/test/src/change_stream_utils.ts @@ -7,8 +7,10 @@ import { OplogEntry, ProtocolOpId, ReplicationCheckpoint, + settledPromise, SyncRulesBucketStorage, - TestStorageOptions + TestStorageOptions, + unsettledPromise } from '@powersync/service-core'; import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; @@ -18,11 +20,12 @@ import { createCheckpoint, STANDALONE_CHECKPOINT_ID } from '@module/replication/ import { NormalizedMongoConnectionConfig } from '@module/types/types.js'; import { clearTestDb, TEST_CONNECTION_OPTIONS } from './util.js'; +import { ReplicationAbortedError } from '@powersync/lib-services-framework'; export class ChangeStreamTestContext { private _walStream?: ChangeStream; private abortController = new AbortController(); - private streamPromise?: Promise>; + private settledReplicationPromise?: Promise>; public storage?: SyncRulesBucketStorage; /** @@ -66,7 +69,7 @@ export class ChangeStreamTestContext { async dispose() { this.abort(); - await this.streamPromise?.catch((e) => e); + await this.settledReplicationPromise; await this.factory[Symbol.asyncDispose](); await this.connectionManager.end(); } @@ -125,7 +128,18 @@ export class ChangeStreamTestContext { } async replicateSnapshot() { - await this.streamer.initReplication(); + // Use a settledPromise to avoid unhandled rejections + this.settledReplicationPromise ??= settledPromise(this.streamer.replicate()); + try { + await Promise.race([unsettledPromise(this.settledReplicationPromise), this.streamer.waitForInitialSnapshot()]); + } catch (e) { + if (e instanceof ReplicationAbortedError && e.cause != null) { + // Edge case for tests: replicate() can throw an error, but we'd receive the ReplicationAbortedError from + // waitForInitialSnapshot() first. In that case, prioritize the cause. + throw e.cause; + } + throw e; + } } /** @@ -143,21 +157,14 @@ export class ChangeStreamTestContext { } startStreaming() { - this.streamPromise = this.streamer - .streamChanges() - .then(() => ({ status: 'fulfilled', value: undefined }) satisfies PromiseFulfilledResult) - .catch((reason) => ({ status: 'rejected', reason }) satisfies PromiseRejectedResult); - return this.streamPromise; + this.settledReplicationPromise ??= settledPromise(this.streamer.replicate()); + return this.settledReplicationPromise; } async getCheckpoint(options?: { timeout?: number }) { let checkpoint = await Promise.race([ getClientCheckpoint(this.client, this.db, this.factory, { timeout: options?.timeout ?? 15_000 }), - this.streamPromise?.then((e) => { - if (e.status == 'rejected') { - throw e.reason; - } - }) + unsettledPromise(this.settledReplicationPromise!) ]); if (checkpoint == null) { // This indicates an issue with the test setup - streamingPromise completed instead diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f5b6507fc..5e0fab3e9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -198,6 +198,9 @@ importers: bson: specifier: ^6.10.4 version: 6.10.4 + p-defer: + specifier: ^4.0.1 + version: 4.0.1 ts-codec: specifier: ^1.3.0 version: 1.3.0 From eae4aef9090f4d01be9d45bf9374ded6872909e9 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 7 Jan 2026 15:44:41 +0200 Subject: [PATCH 004/101] Fix tests to use the new structure. --- .../src/replication/MongoSnapshotter.ts | 7 +- .../test/src/change_stream.test.ts | 79 ++++++++----------- .../test/src/change_stream_utils.ts | 24 ++++-- .../test/src/chunked_snapshot.test.ts | 15 +++- .../module-mongodb/test/src/resume.test.ts | 6 +- .../test/src/resuming_snapshots.test.ts | 26 +++--- .../test/src/slow_tests.test.ts | 10 +-- 7 files changed, 87 insertions(+), 80 deletions(-) diff --git a/modules/module-mongodb/src/replication/MongoSnapshotter.ts b/modules/module-mongodb/src/replication/MongoSnapshotter.ts index 55c0b030b..660307c10 100644 --- a/modules/module-mongodb/src/replication/MongoSnapshotter.ts +++ b/modules/module-mongodb/src/replication/MongoSnapshotter.ts @@ -267,7 +267,7 @@ export class MongoSnapshotter { } } - private async replicateTable(table: SourceTable) { + private async replicateTable(tableRequest: SourceTable) { const flushResults = await this.storage.startBatch( { logger: this.logger, @@ -277,6 +277,11 @@ export class MongoSnapshotter { skipExistingRows: true }, async (batch) => { + // Get fresh table info, in case it was updated while queuing + const table = await this.handleRelation(batch, tableRequest, { collectionInfo: undefined }); + if (table.snapshotComplete) { + return; + } await this.snapshotTable(batch, table); const noCheckpointBefore = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); diff --git a/modules/module-mongodb/test/src/change_stream.test.ts b/modules/module-mongodb/test/src/change_stream.test.ts index b9375c935..e3d66e3b5 100644 --- a/modules/module-mongodb/test/src/change_stream.test.ts +++ b/modules/module-mongodb/test/src/change_stream.test.ts @@ -40,9 +40,7 @@ bucket_definitions: }); const collection = db.collection('test_data'); - await context.replicateSnapshot(); - - context.startStreaming(); + await context.initializeReplication(); const result = await collection.insertOne({ description: 'test1', num: 1152921504606846976n }); const test_id = result.insertedId; @@ -77,9 +75,7 @@ bucket_definitions: const result = await collection.insertOne({ description: 'test1', num: 1152921504606846976n }); const test_id = result.insertedId; - await context.replicateSnapshot(); - - context.startStreaming(); + await context.initializeReplication(); await setTimeout(30); await collection.updateOne({ _id: test_id }, { $set: { description: 'test2' } }); @@ -108,8 +104,7 @@ bucket_definitions: }); const collection = db.collection('test_data'); - await context.replicateSnapshot(); - context.startStreaming(); + await context.initializeReplication(); const session = client.startSession(); let test_id: mongo.ObjectId | undefined; @@ -155,9 +150,7 @@ bucket_definitions: }); const collection = db.collection('test_data'); - await context.replicateSnapshot(); - - context.startStreaming(); + await context.initializeReplication(); const session = client.startSession(); let test_id: mongo.ObjectId | undefined; @@ -202,9 +195,7 @@ bucket_definitions: }); const collection = db.collection('test_data'); - await context.replicateSnapshot(); - - context.startStreaming(); + await context.initializeReplication(); const session = client.startSession(); let test_id: mongo.ObjectId | undefined; @@ -242,9 +233,7 @@ bucket_definitions: `); await db.createCollection('test_DATA'); - await context.replicateSnapshot(); - - context.startStreaming(); + await context.initializeReplication(); const collection = db.collection('test_DATA'); const result = await collection.insertOne({ description: 'test1' }); @@ -266,8 +255,7 @@ bucket_definitions: `); await db.createCollection('test_data'); - await context.replicateSnapshot(); - context.startStreaming(); + await context.initializeReplication(); const largeDescription = crypto.randomBytes(20_000).toString('hex'); @@ -299,8 +287,7 @@ bucket_definitions: data: [] `; await context.updateSyncRules(syncRuleContent); - await context.replicateSnapshot(); - context.startStreaming(); + await context.initializeReplication(); const collection = db.collection('test_data'); const result = await collection.insertOne({ description: 'test1' }); @@ -327,8 +314,7 @@ bucket_definitions: - SELECT _id as id, description FROM "test_data2" `; await context.updateSyncRules(syncRuleContent); - await context.replicateSnapshot(); - context.startStreaming(); + await context.initializeReplication(); const collection = db.collection('test_data1'); const result = await collection.insertOne({ description: 'test1' }); @@ -354,11 +340,10 @@ bucket_definitions: const result = await collection.insertOne({ description: 'test1' }); const test_id = result.insertedId.toHexString(); - await context.replicateSnapshot(); + await context.initializeReplication(); // Note: snapshot is only consistent some time into the streaming request. // At the point that we get the first acknowledged checkpoint, as is required // for getBucketData(), the data should be consistent. - context.startStreaming(); const data = await context.getBucketData('global[]'); expect(data).toMatchObject([test_utils.putOp('test_data', { id: test_id, description: 'test1' })]); @@ -380,7 +365,7 @@ bucket_definitions: await db.createCollection('test_data'); - await context.replicateSnapshot(); + await context.initializeReplication(); const collection = db.collection('test_data'); const result = await collection.insertOne({ name: 't1' }); @@ -395,7 +380,6 @@ bucket_definitions: const largeDescription = crypto.randomBytes(12000000 / 2).toString('hex'); await collection.updateOne({ _id: test_id }, { $set: { description: largeDescription } }); - context.startStreaming(); const data = await context.getBucketData('global[]'); expect(data.length).toEqual(2); @@ -424,9 +408,7 @@ bucket_definitions: const { db } = context; await context.updateSyncRules(BASIC_SYNC_RULES); - await context.replicateSnapshot(); - - context.startStreaming(); + await context.initializeReplication(); const collection = db.collection('test_donotsync'); const result = await collection.insertOne({ description: 'test' }); @@ -447,7 +429,7 @@ bucket_definitions: data: - SELECT _id as id, description FROM "test_%"`); - await context.replicateSnapshot(); + await context.initializeReplication(); await db.createCollection('test_data', { // enabled: true here - everything should work @@ -458,15 +440,21 @@ bucket_definitions: const test_id = result.insertedId; await collection.updateOne({ _id: test_id }, { $set: { description: 'test2' } }); - context.startStreaming(); - const data = await context.getBucketData('global[]'); - expect(data).toMatchObject([ - // An extra op here, since this triggers a snapshot in addition to getting the event. - test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test2' }), - test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test1' }), - test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test2' }) - ]); + // Either case is valid here + if (data.length == 3) { + expect(data).toMatchObject([ + // An extra op here, since this triggers a snapshot in addition to getting the event. + test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test2' }), + test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test1' }), + test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test2' }) + ]); + } else { + expect(data).toMatchObject([ + test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test1' }), + test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test2' }) + ]); + } }); test('postImages - new collection with postImages disabled', async () => { @@ -480,7 +468,7 @@ bucket_definitions: data: - SELECT _id as id, description FROM "test_data%"`); - await context.replicateSnapshot(); + await context.initializeReplication(); await db.createCollection('test_data', { // enabled: false here, but autoConfigure will enable it. @@ -492,8 +480,6 @@ bucket_definitions: const test_id = result.insertedId; await collection.updateOne({ _id: test_id }, { $set: { description: 'test2' } }); - context.startStreaming(); - await expect(() => context.getBucketData('global[]')).rejects.toMatchObject({ message: expect.stringContaining('stream was configured to require a post-image for all update events') }); @@ -515,8 +501,8 @@ bucket_definitions: const collection = db.collection('test_data'); await collection.insertOne({ description: 'test1', num: 1152921504606846976n }); - await context.replicateSnapshot(); - await context.markSnapshotConsistent(); + // Initialize + await context.initializeReplication(); // Simulate an error await context.storage!.reportError(new Error('simulated error')); @@ -524,10 +510,9 @@ bucket_definitions: expect(syncRules).toBeTruthy(); expect(syncRules?.last_fatal_error).toEqual('simulated error'); - // startStreaming() should automatically clear the error. - context.startStreaming(); + // The new checkpoint should clear the error + await context.getCheckpoint(); - // getBucketData() creates a checkpoint that clears the error, so we don't do that // Just wait, and check that the error is cleared automatically. await vi.waitUntil( async () => { diff --git a/modules/module-mongodb/test/src/change_stream_utils.ts b/modules/module-mongodb/test/src/change_stream_utils.ts index 2851c01ee..81a17cd18 100644 --- a/modules/module-mongodb/test/src/change_stream_utils.ts +++ b/modules/module-mongodb/test/src/change_stream_utils.ts @@ -63,12 +63,12 @@ export class ChangeStreamTestContext { /** * Abort snapshot and/or replication, without actively closing connections. */ - abort() { - this.abortController.abort(); + abort(cause?: Error) { + this.abortController.abort(cause); } async dispose() { - this.abort(); + this.abort(new Error('Disposing test context')); await this.settledReplicationPromise; await this.factory[Symbol.asyncDispose](); await this.connectionManager.end(); @@ -118,6 +118,7 @@ export class ChangeStreamTestContext { metrics: METRICS_HELPER.metricsEngine, connections: this.connectionManager, abort_signal: this.abortController.signal, + logger: this.streamOptions?.logger, // Specifically reduce this from the default for tests on MongoDB <= 6.0, otherwise it can take // a long time to abort the stream. maxAwaitTimeMS: this.streamOptions?.maxAwaitTimeMS ?? 200, @@ -127,6 +128,18 @@ export class ChangeStreamTestContext { return this._walStream!; } + /** + * Replicate a snapshot, start streaming, and wait for a consistent checkpoint. + */ + async initializeReplication() { + await this.replicateSnapshot(); + // Make sure we're up to date + await this.getCheckpoint(); + } + + /** + * Replicate the initial snapshot, and start streaming. + */ async replicateSnapshot() { // Use a settledPromise to avoid unhandled rejections this.settledReplicationPromise ??= settledPromise(this.streamer.replicate()); @@ -156,11 +169,6 @@ export class ChangeStreamTestContext { }); } - startStreaming() { - this.settledReplicationPromise ??= settledPromise(this.streamer.replicate()); - return this.settledReplicationPromise; - } - async getCheckpoint(options?: { timeout?: number }) { let checkpoint = await Promise.race([ getClientCheckpoint(this.client, this.db, this.factory, { timeout: options?.timeout ?? 15_000 }), diff --git a/modules/module-mongodb/test/src/chunked_snapshot.test.ts b/modules/module-mongodb/test/src/chunked_snapshot.test.ts index 930c82e9c..26508b8f7 100644 --- a/modules/module-mongodb/test/src/chunked_snapshot.test.ts +++ b/modules/module-mongodb/test/src/chunked_snapshot.test.ts @@ -1,5 +1,11 @@ import { mongo } from '@powersync/lib-service-mongodb'; -import { reduceBucket, TestStorageConfig, TestStorageFactory } from '@powersync/service-core'; +import { + reduceBucket, + settledPromise, + TestStorageConfig, + TestStorageFactory, + unsettledPromise +} from '@powersync/service-core'; import { METRICS_HELPER } from '@powersync/service-core-tests'; import { JSONBig } from '@powersync/service-jsonbig'; import { SqliteJsonValue } from '@powersync/service-sync-rules'; @@ -116,7 +122,7 @@ function defineBatchTests(config: TestStorageConfig) { // 2. Replicate one batch of rows // Our "stopping point" here is not quite deterministic. - const p = context.replicateSnapshot(); + const p = settledPromise(context.initializeReplication()); const stopAfter = 100; const startRowCount = (await METRICS_HELPER.getMetricValueForTests('powersync_rows_replicated_total')) ?? 0; @@ -146,9 +152,10 @@ function defineBatchTests(config: TestStorageConfig) { await db.collection('test_data').insertOne({ _id: idD, description: 'new' }); // 4. Replicate the rest of the table. - await p; + await unsettledPromise(p); - context.startStreaming(); + // FIXME: only start streaming at this point: + // context.startStreaming(); const data = await context.getBucketData('global[]'); const reduced = reduceBucket(data); diff --git a/modules/module-mongodb/test/src/resume.test.ts b/modules/module-mongodb/test/src/resume.test.ts index a58bd8f4b..5e5dba9a9 100644 --- a/modules/module-mongodb/test/src/resume.test.ts +++ b/modules/module-mongodb/test/src/resume.test.ts @@ -1,7 +1,7 @@ import { ChangeStreamInvalidatedError } from '@module/replication/ChangeStream.js'; import { MongoManager } from '@module/replication/MongoManager.js'; import { normalizeConnectionConfig } from '@module/types/types.js'; -import { TestStorageConfig } from '@powersync/service-core'; +import { settledPromise, TestStorageConfig } from '@powersync/service-core'; import { describe, expect, test } from 'vitest'; import { ChangeStreamTestContext } from './change_stream_utils.js'; import { env } from './env.js'; @@ -26,8 +26,6 @@ function defineResumeTest(config: TestStorageConfig) { await context.replicateSnapshot(); - context.startStreaming(); - const collection = db.collection('test_data'); await collection.insertOne({ description: 'test1', num: 1152921504606846976n }); @@ -60,7 +58,7 @@ function defineResumeTest(config: TestStorageConfig) { context2.storage = factory.getInstance(activeContent!); // If this test times out, it likely didn't throw the expected error here. - const result = await context2.startStreaming(); + const result = await settledPromise(context2.initializeReplication()); // The ChangeStreamReplicationJob will detect this and throw a ChangeStreamInvalidatedError expect(result.status).toEqual('rejected'); expect((result as PromiseRejectedResult).reason).toBeInstanceOf(ChangeStreamInvalidatedError); diff --git a/modules/module-mongodb/test/src/resuming_snapshots.test.ts b/modules/module-mongodb/test/src/resuming_snapshots.test.ts index 302f5cc7b..24bd2b3c9 100644 --- a/modules/module-mongodb/test/src/resuming_snapshots.test.ts +++ b/modules/module-mongodb/test/src/resuming_snapshots.test.ts @@ -6,6 +6,7 @@ import { describe, expect, test } from 'vitest'; import { ChangeStreamTestContext } from './change_stream_utils.js'; import { env } from './env.js'; import { describeWithStorage } from './util.js'; +import { logger } from '@powersync/lib-services-framework'; describe.skipIf(!(env.CI || env.SLOW_TESTS))('batch replication', function () { describeWithStorage({ timeout: 240_000 }, function (config) { @@ -35,7 +36,9 @@ async function testResumingReplication(factory: TestStorageFactory, stopAfter: n let startRowCount: number; { - await using context = await ChangeStreamTestContext.open(factory, { streamOptions: { snapshotChunkLength: 1000 } }); + await using context = await ChangeStreamTestContext.open(factory, { + streamOptions: { snapshotChunkLength: 1000, logger: logger.child({ prefix: '[context1] ' }) } + }); await context.updateSyncRules(`bucket_definitions: global: @@ -87,7 +90,7 @@ async function testResumingReplication(factory: TestStorageFactory, stopAfter: n // Bypass the usual "clear db on factory open" step. await using context2 = await ChangeStreamTestContext.open(factory, { doNotClear: true, - streamOptions: { snapshotChunkLength: 1000 } + streamOptions: { snapshotChunkLength: 1000, logger: logger.child({ prefix: '[context2] ' }) } }); const { db } = context2; @@ -98,9 +101,8 @@ async function testResumingReplication(factory: TestStorageFactory, stopAfter: n await db.collection('test_data2').insertOne({ _id: 10001 as any, description: 'insert1' }); await context2.loadNextSyncRules(); - await context2.replicateSnapshot(); + await context2.initializeReplication(); - context2.startStreaming(); const data = await context2.getBucketData('global[]', undefined, {}); const deletedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === '1'); @@ -122,26 +124,30 @@ async function testResumingReplication(factory: TestStorageFactory, stopAfter: n // We only test the final version. expect(JSON.parse(updatedRowOps[1].data as string).description).toEqual('update1'); - expect(insertedRowOps.length).toEqual(2); expect(JSON.parse(insertedRowOps[0].data as string).description).toEqual('insert1'); - expect(JSON.parse(insertedRowOps[1].data as string).description).toEqual('insert1'); + if (insertedRowOps.length != 1) { + // Also valid + expect(insertedRowOps.length).toEqual(2); + expect(JSON.parse(insertedRowOps[1].data as string).description).toEqual('insert1'); + } // 1000 of test_data1 during first replication attempt. // N >= 1000 of test_data2 during first replication attempt. // 10000 - N - 1 + 1 of test_data2 during second replication attempt. // An additional update during streaming replication (2x total for this row). - // An additional insert during streaming replication (2x total for this row). + // An additional insert during streaming replication (1x or 2x total for this row). // If the deleted row was part of the first replication batch, it's removed by streaming replication. // This adds 2 ops. // We expect this to be 11002 for stopAfter: 2000, and 11004 for stopAfter: 8000. // However, this is not deterministic. - const expectedCount = 11002 + deletedRowOps.length; + const expectedCount = 11000 + deletedRowOps.length + insertedRowOps.length; expect(data.length).toEqual(expectedCount); const replicatedCount = ((await METRICS_HELPER.getMetricValueForTests(ReplicationMetric.ROWS_REPLICATED)) ?? 0) - startRowCount; - // With resumable replication, there should be no need to re-replicate anything. - expect(replicatedCount).toEqual(expectedCount); + // With resumable replication, there should be no need to re-replicate anything, apart from the newly-inserted row + expect(replicatedCount).toBeGreaterThanOrEqual(expectedCount); + expect(replicatedCount).toBeLessThanOrEqual(expectedCount + 1); } } diff --git a/modules/module-mongodb/test/src/slow_tests.test.ts b/modules/module-mongodb/test/src/slow_tests.test.ts index df575ef39..8acf8d88d 100644 --- a/modules/module-mongodb/test/src/slow_tests.test.ts +++ b/modules/module-mongodb/test/src/slow_tests.test.ts @@ -2,7 +2,7 @@ import { setTimeout } from 'node:timers/promises'; import { describe, expect, test } from 'vitest'; import { mongo } from '@powersync/lib-service-mongodb'; -import { storage } from '@powersync/service-core'; +import { settledPromise, storage, unsettledPromise } from '@powersync/service-core'; import { ChangeStreamTestContext, setSnapshotHistorySeconds } from './change_stream_utils.js'; import { env } from './env.js'; @@ -41,8 +41,7 @@ bucket_definitions: await collection1.bulkWrite(operations); await collection2.bulkWrite(operations); - await context.replicateSnapshot(); - context.startStreaming(); + await context.initializeReplication(); const checksum = await context.getChecksum('global[]'); expect(checksum).toMatchObject({ count: 20_000 @@ -71,7 +70,7 @@ bucket_definitions: } await collection.bulkWrite(operations); - const snapshotPromise = context.replicateSnapshot(); + const snapshotPromise = settledPromise(context.initializeReplication()); for (let i = 49; i >= 0; i--) { await collection.updateMany( @@ -81,8 +80,7 @@ bucket_definitions: await setTimeout(20); } - await snapshotPromise; - context.startStreaming(); + await unsettledPromise(snapshotPromise); const data = await context.getBucketData('global[]'); From 96cb151fcba2076d8a837ea800b011bf502d840a Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 7 Jan 2026 15:59:16 +0200 Subject: [PATCH 005/101] Fix race conditions with table snapshot state. --- .../src/replication/PostgresSnapshotter.ts | 67 ++++++++++--------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/modules/module-postgres/src/replication/PostgresSnapshotter.ts b/modules/module-postgres/src/replication/PostgresSnapshotter.ts index d45dc1997..31b13ded4 100644 --- a/modules/module-postgres/src/replication/PostgresSnapshotter.ts +++ b/modules/module-postgres/src/replication/PostgresSnapshotter.ts @@ -9,7 +9,6 @@ import { getUuidReplicaIdentityBson, MetricsEngine, RelationCache, - SourceEntityDescriptor, SourceTable, storage } from '@powersync/service-core'; @@ -19,7 +18,6 @@ import { HydratedSyncRules, SqliteInputRow, SqliteInputValue, - SqlSyncRules, TablePattern, toSyncRulesRow, toSyncRulesValue @@ -117,12 +115,7 @@ export class PostgresSnapshotter { let query = ` SELECT c.oid AS relid, - c.relname AS table_name, - (SELECT - json_agg(DISTINCT a.atttypid) - FROM pg_attribute a - WHERE a.attnum > 0 AND NOT a.attisdropped AND a.attrelid = c.oid) - AS column_types + c.relname AS table_name FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = $1 @@ -182,18 +175,12 @@ export class PostgresSnapshotter { this.logger.warn(`Could not check RLS access for ${tablePattern.schema}.${name}`, e); } - const cresult = await getReplicationIdentityColumns(db, relid); - - const columnTypes = (JSON.parse(row.column_types) as string[]).map((e) => Number(e)); const table = await this.handleRelation({ batch, - descriptor: { - name, - schema, - objectId: relid, - replicaIdColumns: cresult.replicationColumns - } as SourceEntityDescriptor, - referencedTypeIds: columnTypes + db, + name, + schema, + relId: relid }); result.push(table); @@ -314,7 +301,7 @@ export class PostgresSnapshotter { } } - async replicateTable(table: SourceTable) { + async replicateTable(requestTable: SourceTable) { const db = await this.connections.snapshotConnection(); try { const flushResults = await this.storage.startBatch( @@ -326,6 +313,14 @@ export class PostgresSnapshotter { skipExistingRows: true }, async (batch) => { + // Get fresh table info, in case it was updated while queuing + const table = await this.handleRelation({ + batch, + db: db, + name: requestTable.name, + schema: requestTable.schema, + relId: requestTable.objectId as number + }); await this.snapshotTableInTx(batch, db, table); // This commit ensures we set keepalive_op. // It may be better if that is automatically set when flushing. @@ -535,8 +530,6 @@ export class PostgresSnapshotter { } await q.initialize(); - let columns: { i: number; name: string }[] = []; - let columnMap: Record = {}; let hasRemainingData = true; while (hasRemainingData) { // Fetch 10k at a time. @@ -613,21 +606,35 @@ export class PostgresSnapshotter { } } - async handleRelation(options: { + private async handleRelation(options: { batch: storage.BucketStorageBatch; - descriptor: SourceEntityDescriptor; - referencedTypeIds: number[]; + db: pgwire.PgConnection; + name: string; + schema: string; + relId: number; }) { - const { batch, descriptor, referencedTypeIds } = options; + const { batch, db, name, schema, relId } = options; + + const cresult = await getReplicationIdentityColumns(db, relId); + const columnTypesResult = await db.query({ + statement: `SELECT DISTINCT atttypid + FROM pg_attribute + WHERE attnum > 0 AND NOT attisdropped AND attrelid = $1`, + params: [{ type: 'int4', value: relId }] + }); + + const columnTypes = columnTypesResult.rows.map((row) => Number(row.decodeWithoutCustomTypes(0))); - if (!descriptor.objectId && typeof descriptor.objectId != 'number') { - throw new ReplicationAssertionError(`objectId expected, got ${typeof descriptor.objectId}`); - } const result = await this.storage.resolveTable({ group_id: this.group_id, connection_id: this.connection_id, connection_tag: this.connections.connectionTag, - entity_descriptor: descriptor, + entity_descriptor: { + name, + schema, + objectId: relId, + replicaIdColumns: cresult.replicationColumns + }, sync_rules: this.sync_rules }); this.relationCache.update(result.table); @@ -636,7 +643,7 @@ export class PostgresSnapshotter { await batch.drop(result.dropTables); // Ensure we have a description for custom types referenced in the table. - await this.connections.types.fetchTypes(referencedTypeIds); + await this.connections.types.fetchTypes(columnTypes); return result.table; } From 53efacbc183484c5ed24d3f915ed403aef892fc5 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 7 Jan 2026 16:32:23 +0200 Subject: [PATCH 006/101] Fix race condition on completing snapshots. --- modules/module-mongodb/src/replication/MongoSnapshotter.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/module-mongodb/src/replication/MongoSnapshotter.ts b/modules/module-mongodb/src/replication/MongoSnapshotter.ts index 660307c10..eeac96a03 100644 --- a/modules/module-mongodb/src/replication/MongoSnapshotter.ts +++ b/modules/module-mongodb/src/replication/MongoSnapshotter.ts @@ -252,6 +252,10 @@ export class MongoSnapshotter { // point before the data can be considered consistent. const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); await batch.markAllSnapshotDone(checkpoint); + // KLUDGE: We need to create an extra checkpoint _after_ marking the snapshot done, to fix + // issues with order of processing commits(). This is picked up by tests on postgres storage, + // the issue may be specific to that storage engine. + await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); } ); From a845758c0f4b96ee3441a2c33734860dcdde84fd Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 8 Jan 2026 14:05:52 +0200 Subject: [PATCH 007/101] Refactor ChangeStream implementation. Allows multiplexing multiple sync rule versions onto one change stream. --- .../implementation/MongoBucketBatch.ts | 4 + .../implementation/MongoSyncBucketStorage.ts | 15 +- .../src/replication/ChangeStream.ts | 952 ++++++++++-------- .../src/storage/PostgresSyncRulesStorage.ts | 13 +- .../src/storage/batch/PostgresBucketBatch.ts | 4 + .../src/storage/BucketStorageBatch.ts | 5 + .../src/storage/SyncRulesBucketStorage.ts | 12 + 7 files changed, 555 insertions(+), 450 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 6fab32240..078f671ed 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -662,6 +662,10 @@ export class MongoBucketBatch } async [Symbol.asyncDispose]() { + await this.dispose(); + } + + async dispose(): Promise { await this.session.endSession(); super.clearListeners(); } diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 07b971580..62d6fa404 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -154,10 +154,7 @@ export class MongoSyncBucketStorage }); } - async startBatch( - options: storage.StartBatchOptions, - callback: (batch: storage.BucketStorageBatch) => Promise - ): Promise { + async createWriter(options: storage.StartBatchOptions): Promise { const doc = await this.db.sync_rules.findOne( { _id: this.group_id @@ -166,7 +163,7 @@ export class MongoSyncBucketStorage ); const checkpoint_lsn = doc?.last_checkpoint_lsn ?? null; - await using batch = new MongoBucketBatch({ + const batch = new MongoBucketBatch({ logger: options.logger, db: this.db, syncRules: this.sync_rules.parsed(options).hydratedSyncRules(), @@ -180,6 +177,14 @@ export class MongoSyncBucketStorage markRecordUnavailable: options.markRecordUnavailable }); this.iterateListeners((cb) => cb.batchStarted?.(batch)); + return batch; + } + + async startBatch( + options: storage.StartBatchOptions, + callback: (batch: storage.BucketStorageBatch) => Promise + ): Promise { + await using batch = await this.createWriter(options); await callback(batch); await batch.flush(); diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index a366a60b9..ddbf5927b 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -32,6 +32,7 @@ import { import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js'; import { ChangeStreamInvalidatedError, mapChangeStreamError } from './ChangeStreamErrors.js'; import { MongoSnapshotter } from './MongoSnapshotter.js'; +import { MongoBucketBatch } from '../../../module-mongodb-storage/src/index.js'; export interface ChangeStreamOptions { connections: MongoManager; @@ -54,80 +55,49 @@ export interface ChangeStreamOptions { logger?: Logger; } -export class ChangeStream { - sync_rules: HydratedSyncRules; - group_id: number; - - connection_id = 1; - - private readonly storage: storage.SyncRulesBucketStorage; +interface SubStreamOptions { + connections: MongoManager; + storage: storage.SyncRulesBucketStorage; + logger: Logger; + abortSignal: AbortSignal; + checkpointStreamId: mongo.ObjectId; + snapshotChunkLength?: number; + metrics: MetricsEngine; + maxAwaitTimeMS: number; +} - private connections: MongoManager; +class SubStream { + public readonly relationCache = new RelationCache(getCacheIdentifier); + private readonly connection_id = 1; + private readonly connections: MongoManager; + public readonly storage: storage.SyncRulesBucketStorage; + public readonly syncRules: HydratedSyncRules; + private readonly logger: Logger; + public readonly snapshotter: MongoSnapshotter; private readonly client: mongo.MongoClient; - private readonly defaultDb: mongo.Db; private readonly metrics: MetricsEngine; + private readonly abortSignal: AbortSignal; - private readonly maxAwaitTimeMS: number; - - private abortController = new AbortController(); - private abortSignal: AbortSignal = this.abortController.signal; - - private initPromise: Promise | null = null; - private snapshotter: MongoSnapshotter; - - private relationCache = new RelationCache(getCacheIdentifier); - - /** - * Time of the oldest uncommitted change, according to the source db. - * This is used to determine the replication lag. - */ - private oldestUncommittedChange: Date | null = null; - /** - * Keep track of whether we have done a commit or keepalive yet. - * We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present. - */ - private isStartingReplication = true; - - private checkpointStreamId = new mongo.ObjectId(); - - private logger: Logger; - - private changeStreamTimeout: number; - - constructor(options: ChangeStreamOptions) { - this.storage = options.storage; - this.metrics = options.metrics; - this.group_id = options.storage.group_id; + constructor(options: SubStreamOptions) { this.connections = options.connections; - this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000; this.client = this.connections.client; - this.defaultDb = this.connections.db; - this.sync_rules = options.storage.getParsedSyncRules({ - defaultSchema: this.defaultDb.databaseName + this.storage = options.storage; + this.logger = options.logger; + this.metrics = options.metrics; + this.abortSignal = options.abortSignal; + this.syncRules = this.storage.getParsedSyncRules({ + defaultSchema: this.connections.db.databaseName }); - // The change stream aggregation command should timeout before the socket times out, - // so we use 90% of the socket timeout value. - this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9); - - this.logger = options.logger ?? defaultLogger; this.snapshotter = new MongoSnapshotter({ - ...options, - abort_signal: this.abortSignal, + abort_signal: options.abortSignal, + checkpointStreamId: options.checkpointStreamId, + connections: this.connections, + storage: this.storage, logger: this.logger, - checkpointStreamId: this.checkpointStreamId - }); - - // We wrap in our own abort controller so we can trigger abort internally. - options.abort_signal.addEventListener('abort', () => { - this.abortController.abort(options.abort_signal.reason); + snapshotChunkLength: options.snapshotChunkLength, + metrics: options.metrics, + maxAwaitTimeMS: options.maxAwaitTimeMS }); - if (options.abort_signal.aborted) { - this.abortController.abort(options.abort_signal.reason); - } - } - - get stopped() { - return this.abortSignal.aborted; } private get usePostImages() { @@ -138,64 +108,79 @@ export class ChangeStream { return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE; } - private getSourceNamespaceFilters(): { $match: any; multipleDatabases: boolean } { - const sourceTables = this.sync_rules.getSourceTables(); - - let $inFilters: { db: string; coll: string }[] = [ - { db: this.defaultDb.databaseName, coll: CHECKPOINTS_COLLECTION } - ]; - let $refilters: { 'ns.db': string; 'ns.coll': RegExp }[] = []; - let multipleDatabases = false; - for (let tablePattern of sourceTables) { - if (tablePattern.connectionTag != this.connections.connectionTag) { - continue; + async initReplication() { + const result = await this.snapshotter.checkSlot(); + // FIXME: This should be done once, not per sub-stream + await this.snapshotter.setupCheckpointsCollection(); + if (result.needsInitialSync) { + if (result.snapshotLsn == null) { + // Snapshot LSN is not present, so we need to start replication from scratch. + await this.storage.clear({ signal: this.abortSignal }); } + await this.snapshotter.queueSnapshotTables(result.snapshotLsn); + } + } - if (tablePattern.schema != this.defaultDb.databaseName) { - multipleDatabases = true; - } + async createWriter(): Promise { + return this.storage.createWriter({ + logger: this.logger, + zeroLSN: MongoLSN.ZERO.comparable, + defaultSchema: this.connections.db.databaseName, + // We get a complete postimage for every change, so we don't need to store the current data. + storeCurrentData: false + }); + } - if (tablePattern.isWildcard) { - $refilters.push({ - 'ns.db': tablePattern.schema, - 'ns.coll': new RegExp('^' + escapeRegExp(tablePattern.tablePrefix)) - }); - } else { - $inFilters.push({ - db: tablePattern.schema, - coll: tablePattern.name - }); - } + async handleRelation( + batch: storage.BucketStorageBatch, + descriptor: SourceEntityDescriptor, + options: { snapshot: boolean; collectionInfo: mongo.CollectionInfo | undefined } + ) { + if (options.collectionInfo != null) { + await this.checkPostImages(descriptor.schema, options.collectionInfo); + } else { + // If collectionInfo is null, the collection may have been dropped. + // Ignore the postImages check in this case. } - // When we have a large number of collections, the performance of the pipeline - // depends a lot on how the filters here are specified. - // Currently, only the multipleDatabases == false case is optimized, and the - // wildcard matching version is not tested (but we assume that will be more - // limited in the number of them). - // Specifically, the `ns: {$in: [...]}` version can lead to PSYNC_S1345 timeouts in - // some cases when we have a large number of collections. - // For details, see: - // https://github.com/powersync-ja/powersync-service/pull/417 - // https://jira.mongodb.org/browse/SERVER-114532 - const nsFilter = multipleDatabases - ? // cluster-level: filter on the entire namespace - { ns: { $in: $inFilters } } - : // collection-level: filter on coll only - { 'ns.coll': { $in: $inFilters.map((ns) => ns.coll) } }; - if ($refilters.length > 0) { - return { $match: { $or: [nsFilter, ...$refilters] }, multipleDatabases }; + const snapshot = options.snapshot; + const result = await this.storage.resolveTable({ + group_id: this.storage.group_id, + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: descriptor, + sync_rules: this.syncRules + }); + this.relationCache.update(result.table); + + // Drop conflicting collections. + // This is generally not expected for MongoDB source dbs, so we log an error. + if (result.dropTables.length > 0) { + this.logger.error( + `Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}` + ); + await batch.drop(result.dropTables); } - return { $match: nsFilter, multipleDatabases }; - } - static *getQueryData(results: Iterable): Generator { - for (let row of results) { - yield constructAfterRecord(row); + // Snapshot if: + // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) + // 2. Snapshot is not already done, AND: + // 3. The table is used in sync rules. + const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny; + if (shouldSnapshot) { + this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); + await this.snapshotter.queueSnapshot(batch, result.table); } + + return result.table; + } + + private constructAfterRecord(document: mongo.Document): SqliteRow { + const inputRow = constructAfterRecord(document); + return this.syncRules.applyRowContext(inputRow); } - private async getRelation( + public async getRelation( batch: storage.BucketStorageBatch, descriptor: SourceEntityDescriptor, options: { snapshot: boolean } @@ -212,7 +197,7 @@ export class ChangeStream { return this.handleRelation(batch, descriptor, { snapshot: options.snapshot, collectionInfo: collection }); } - private async getCollectionInfo(db: string, name: string): Promise { + public async getCollectionInfo(db: string, name: string): Promise { const collection = ( await this.client .db(db) @@ -246,55 +231,6 @@ export class ChangeStream { } } - async handleRelation( - batch: storage.BucketStorageBatch, - descriptor: SourceEntityDescriptor, - options: { snapshot: boolean; collectionInfo: mongo.CollectionInfo | undefined } - ) { - if (options.collectionInfo != null) { - await this.checkPostImages(descriptor.schema, options.collectionInfo); - } else { - // If collectionInfo is null, the collection may have been dropped. - // Ignore the postImages check in this case. - } - - const snapshot = options.snapshot; - const result = await this.storage.resolveTable({ - group_id: this.group_id, - connection_id: this.connection_id, - connection_tag: this.connections.connectionTag, - entity_descriptor: descriptor, - sync_rules: this.sync_rules - }); - this.relationCache.update(result.table); - - // Drop conflicting collections. - // This is generally not expected for MongoDB source dbs, so we log an error. - if (result.dropTables.length > 0) { - this.logger.error( - `Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}` - ); - await batch.drop(result.dropTables); - } - - // Snapshot if: - // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) - // 2. Snapshot is not already done, AND: - // 3. The table is used in sync rules. - const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny; - if (shouldSnapshot) { - this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); - await this.snapshotter.queueSnapshot(batch, result.table); - } - - return result.table; - } - - private constructAfterRecord(document: mongo.Document): SqliteRow { - const inputRow = constructAfterRecord(document); - return this.sync_rules.applyRowContext(inputRow); - } - async writeChange( batch: storage.BucketStorageBatch, table: storage.SourceTable, @@ -346,10 +282,143 @@ export class ChangeStream { throw new ReplicationAssertionError(`Unsupported operation: ${change.operationType}`); } } +} + +export class ChangeStream { + substreams: SubStream[] = []; + + connection_id = 1; + + private connections: MongoManager; + private readonly client: mongo.MongoClient; + private readonly defaultDb: mongo.Db; + private readonly metrics: MetricsEngine; + + private readonly maxAwaitTimeMS: number; + + private abortController = new AbortController(); + private abortSignal: AbortSignal = this.abortController.signal; + + private initPromise: Promise | null = null; + + /** + * Time of the oldest uncommitted change, according to the source db. + * This is used to determine the replication lag. + */ + private oldestUncommittedChange: Date | null = null; + /** + * Keep track of whether we have done a commit or keepalive yet. + * We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present. + */ + private isStartingReplication = true; + + private checkpointStreamId = new mongo.ObjectId(); + + private logger: Logger; + + private changeStreamTimeout: number; + + constructor(options: ChangeStreamOptions) { + this.metrics = options.metrics; + this.connections = options.connections; + this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000; + this.client = this.connections.client; + this.defaultDb = this.connections.db; + // The change stream aggregation command should timeout before the socket times out, + // so we use 90% of the socket timeout value. + this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9); + + this.logger = options.logger ?? defaultLogger; + + const substream = new SubStream({ + abortSignal: options.abort_signal, + checkpointStreamId: this.checkpointStreamId, + connections: this.connections, + storage: options.storage, + logger: this.logger, + snapshotChunkLength: options.snapshotChunkLength, + maxAwaitTimeMS: this.maxAwaitTimeMS, + metrics: this.metrics + }); + this.substreams.push(substream); + + // We wrap in our own abort controller so we can trigger abort internally. + options.abort_signal.addEventListener('abort', () => { + this.abortController.abort(options.abort_signal.reason); + }); + if (options.abort_signal.aborted) { + this.abortController.abort(options.abort_signal.reason); + } + } + + private get usePostImages() { + return this.connections.options.postImages != PostImagesOption.OFF; + } + + get stopped() { + return this.abortSignal.aborted; + } + + private getSourceNamespaceFilters(): { $match: any; multipleDatabases: boolean } { + const sourceTables = this.substreams.flatMap((s) => s.syncRules.getSourceTables()); + + let $inFilters: { db: string; coll: string }[] = [ + { db: this.defaultDb.databaseName, coll: CHECKPOINTS_COLLECTION } + ]; + let $refilters: { 'ns.db': string; 'ns.coll': RegExp }[] = []; + let multipleDatabases = false; + for (let tablePattern of sourceTables) { + if (tablePattern.connectionTag != this.connections.connectionTag) { + continue; + } + + if (tablePattern.schema != this.defaultDb.databaseName) { + multipleDatabases = true; + } + + if (tablePattern.isWildcard) { + $refilters.push({ + 'ns.db': tablePattern.schema, + 'ns.coll': new RegExp('^' + escapeRegExp(tablePattern.tablePrefix)) + }); + } else { + $inFilters.push({ + db: tablePattern.schema, + coll: tablePattern.name + }); + } + } + + // When we have a large number of collections, the performance of the pipeline + // depends a lot on how the filters here are specified. + // Currently, only the multipleDatabases == false case is optimized, and the + // wildcard matching version is not tested (but we assume that will be more + // limited in the number of them). + // Specifically, the `ns: {$in: [...]}` version can lead to PSYNC_S1345 timeouts in + // some cases when we have a large number of collections. + // For details, see: + // https://github.com/powersync-ja/powersync-service/pull/417 + // https://jira.mongodb.org/browse/SERVER-114532 + const nsFilter = multipleDatabases + ? // cluster-level: filter on the entire namespace + { ns: { $in: $inFilters } } + : // collection-level: filter on coll only + { 'ns.coll': { $in: $inFilters.map((ns) => ns.coll) } }; + if ($refilters.length > 0) { + return { $match: { $or: [nsFilter, ...$refilters] }, multipleDatabases }; + } + return { $match: nsFilter, multipleDatabases }; + } + + static *getQueryData(results: Iterable): Generator { + for (let row of results) { + yield constructAfterRecord(row); + } + } async replicate() { let streamPromise: Promise | null = null; - let loopPromise: Promise | null = null; + let loopPromises: Promise[] = []; try { // If anything errors here, the entire replication process is halted, and // all connections automatically closed, including this one. @@ -359,20 +428,28 @@ export class ChangeStream { .then(() => { throw new ReplicationAssertionError(`Replication stream exited unexpectedly`); }) - .catch((e) => { - this.abortController.abort(e); - throw e; - }); - loopPromise = this.snapshotter - .replicationLoop() - .then(() => { - throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`); - }) - .catch((e) => { + .catch(async (e) => { + // Report stream errors to all substreams + for (let substream of this.substreams) { + await substream.storage.reportError(e); + } + this.abortController.abort(e); throw e; }); - const results = await Promise.allSettled([loopPromise, streamPromise]); + loopPromises = this.substreams.map((s) => + s.snapshotter + .replicationLoop() + .then(() => { + throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`); + }) + .catch(async (e) => { + await s.storage.reportError(e); + this.abortController.abort(e); + throw e; + }) + ); + const results = await Promise.allSettled([...loopPromises, streamPromise]); // First, prioritize non-aborted errors for (let result of results) { if (result.status == 'rejected' && !(result.reason instanceof ReplicationAbortedError)) { @@ -388,9 +465,6 @@ export class ChangeStream { // If we get here, both Promises completed successfully, which is unexpected. throw new ReplicationAssertionError(`Replication loop exited unexpectedly`); - } catch (e) { - await this.storage.reportError(e); - throw e; } finally { // Just to make sure this.abortController.abort(); @@ -405,19 +479,11 @@ export class ChangeStream { throw new ReplicationAssertionError('replicate() must be called before waitForInitialSnapshot()'); } await this.initPromise; - await this.snapshotter.waitForInitialSnapshot(); + await Promise.all(this.substreams.map((s) => s.snapshotter.waitForInitialSnapshot())); } private async initReplication() { - const result = await this.snapshotter.checkSlot(); - await this.snapshotter.setupCheckpointsCollection(); - if (result.needsInitialSync) { - if (result.snapshotLsn == null) { - // Snapshot LSN is not present, so we need to start replication from scratch. - await this.storage.clear({ signal: this.abortSignal }); - } - await this.snapshotter.queueSnapshotTables(result.snapshotLsn); - } + await Promise.all(this.substreams.map((substream) => substream.initReplication())); } private async streamChanges() { @@ -500,288 +566,292 @@ export class ChangeStream { } private async streamChangesInternal() { - await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: MongoLSN.ZERO.comparable, - defaultSchema: this.defaultDb.databaseName, - // We get a complete postimage for every change, so we don't need to store the current data. - storeCurrentData: false - }, - async (batch) => { - const { resumeFromLsn } = batch; - if (resumeFromLsn == null) { - throw new ReplicationAssertionError(`No LSN found to resume from`); - } - const lastLsn = MongoLSN.fromSerialized(resumeFromLsn); - const startAfter = lastLsn?.timestamp; + const writers = await Promise.all(this.substreams.map((s) => s.createWriter())); - // It is normal for this to be a minute or two old when there is a low volume - // of ChangeStream events. - const tokenAgeSeconds = Math.round((Date.now() - timestampToDate(startAfter).getTime()) / 1000); + // FIXME: Proper resumeFromLsn implementation for multiple writers + // We should probably use the active sync rules for this, or alternatively the minimum from the writers. + const { resumeFromLsn } = writers[0]; + if (resumeFromLsn == null) { + throw new ReplicationAssertionError(`No LSN found to resume from`); + } + const lastLsn = MongoLSN.fromSerialized(resumeFromLsn); + const startAfter = lastLsn?.timestamp; - this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`); + // It is normal for this to be a minute or two old when there is a low volume + // of ChangeStream events. + const tokenAgeSeconds = Math.round((Date.now() - timestampToDate(startAfter).getTime()) / 1000); - await using streamManager = this.openChangeStream({ lsn: resumeFromLsn }); - const { stream, filters } = streamManager; - if (this.abortSignal.aborted) { - await stream.close(); - return; - } + this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`); - // Always start with a checkpoint. - // This helps us to clear errors when restarting, even if there is - // no data to replicate. - let waitForCheckpointLsn: string | null = await createCheckpoint( - this.client, - this.defaultDb, - this.checkpointStreamId - ); + await using streamManager = this.openChangeStream({ lsn: resumeFromLsn }); + const { stream, filters } = streamManager; + if (this.abortSignal.aborted) { + await stream.close(); + return; + } - let splitDocument: mongo.ChangeStreamDocument | null = null; + // Always start with a checkpoint. + // This helps us to clear errors when restarting, even if there is + // no data to replicate. + let waitForCheckpointLsn: string | null = await createCheckpoint( + this.client, + this.defaultDb, + this.checkpointStreamId + ); - let flexDbNameWorkaroundLogged = false; - let changesSinceLastCheckpoint = 0; + let splitDocument: mongo.ChangeStreamDocument | null = null; - let lastEmptyResume = performance.now(); + let flexDbNameWorkaroundLogged = false; + let changesSinceLastCheckpoint = 0; - while (true) { - if (this.abortSignal.aborted) { - break; - } + let lastEmptyResume = performance.now(); - const originalChangeDocument = await stream.tryNext().catch((e) => { - throw mapChangeStreamError(e); - }); - // The stream was closed, we will only ever receive `null` from it - if (!originalChangeDocument && stream.closed) { - break; - } + while (true) { + if (this.abortSignal.aborted) { + break; + } - if (this.abortSignal.aborted) { - break; - } + const originalChangeDocument = await stream.tryNext().catch((e) => { + throw mapChangeStreamError(e); + }); + // The stream was closed, we will only ever receive `null` from it + if (!originalChangeDocument && stream.closed) { + break; + } - if (originalChangeDocument == null) { - // We get a new null document after `maxAwaitTimeMS` if there were no other events. - // In this case, stream.resumeToken is the resume token associated with the last response. - // stream.resumeToken is not updated if stream.tryNext() returns data, while stream.next() - // does update it. - // From observed behavior, the actual resumeToken changes around once every 10 seconds. - // If we don't update it on empty events, we do keep consistency, but resuming the stream - // with old tokens may cause connection timeouts. - // We throttle this further by only persisting a keepalive once a minute. - // We add an additional check for waitForCheckpointLsn == null, to make sure we're not - // doing a keepalive in the middle of a transaction. - if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) { - const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken); - await batch.keepalive(lsn); - this.touch(); - lastEmptyResume = performance.now(); - // Log the token update. This helps as a general "replication is still active" message in the logs. - // This token would typically be around 10s behind. - this.logger.info( - `Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}` - ); - this.isStartingReplication = false; - } - continue; - } + if (this.abortSignal.aborted) { + break; + } + if (originalChangeDocument == null) { + // We get a new null document after `maxAwaitTimeMS` if there were no other events. + // In this case, stream.resumeToken is the resume token associated with the last response. + // stream.resumeToken is not updated if stream.tryNext() returns data, while stream.next() + // does update it. + // From observed behavior, the actual resumeToken changes around once every 10 seconds. + // If we don't update it on empty events, we do keep consistency, but resuming the stream + // with old tokens may cause connection timeouts. + // We throttle this further by only persisting a keepalive once a minute. + // We add an additional check for waitForCheckpointLsn == null, to make sure we're not + // doing a keepalive in the middle of a transaction. + if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) { + const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken); + await Promise.all(writers.map((batch) => batch.keepalive(lsn))); this.touch(); + lastEmptyResume = performance.now(); + // Log the token update. This helps as a general "replication is still active" message in the logs. + // This token would typically be around 10s behind. + this.logger.info(`Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}`); + this.isStartingReplication = false; + } + continue; + } - if (startAfter != null && originalChangeDocument.clusterTime?.lte(startAfter)) { - continue; - } - - let changeDocument = originalChangeDocument; - if (originalChangeDocument?.splitEvent != null) { - // Handle split events from $changeStreamSplitLargeEvent. - // This is only relevant for very large update operations. - const splitEvent = originalChangeDocument?.splitEvent; - - if (splitDocument == null) { - splitDocument = originalChangeDocument; - } else { - splitDocument = Object.assign(splitDocument, originalChangeDocument); - } + this.touch(); - if (splitEvent.fragment == splitEvent.of) { - // Got all fragments - changeDocument = splitDocument; - splitDocument = null; - } else { - // Wait for more fragments - continue; - } - } else if (splitDocument != null) { - // We were waiting for fragments, but got a different event - throw new ReplicationAssertionError(`Incomplete splitEvent: ${JSON.stringify(splitDocument.splitEvent)}`); - } + if (startAfter != null && originalChangeDocument.clusterTime?.lte(startAfter)) { + continue; + } - if ( - !filters.multipleDatabases && - 'ns' in changeDocument && - changeDocument.ns.db != this.defaultDb.databaseName && - changeDocument.ns.db.endsWith(`_${this.defaultDb.databaseName}`) - ) { - // When all of the following conditions are met: - // 1. We're replicating from an Atlas Flex instance. - // 2. There were changestream events recorded while the PowerSync service is paused. - // 3. We're only replicating from a single database. - // Then we've observed an ns with for example {db: '67b83e86cd20730f1e766dde_ps'}, - // instead of the expected {db: 'ps'}. - // We correct this. - changeDocument.ns.db = this.defaultDb.databaseName; - - if (!flexDbNameWorkaroundLogged) { - flexDbNameWorkaroundLogged = true; - this.logger.warn( - `Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.` - ); - } - } + let changeDocument = originalChangeDocument; + if (originalChangeDocument?.splitEvent != null) { + // Handle split events from $changeStreamSplitLargeEvent. + // This is only relevant for very large update operations. + const splitEvent = originalChangeDocument?.splitEvent; - const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined; - - if (ns?.coll == CHECKPOINTS_COLLECTION) { - /** - * Dropping the database does not provide an `invalidate` event. - * We typically would receive `drop` events for the collection which we - * would process below. - * - * However we don't commit the LSN after collections are dropped. - * The prevents the `startAfter` or `resumeToken` from advancing past the drop events. - * The stream also closes after the drop events. - * This causes an infinite loop of processing the collection drop events. - * - * This check here invalidates the change stream if our `_checkpoints` collection - * is dropped. This allows for detecting when the DB is dropped. - */ - if (changeDocument.operationType == 'drop') { - throw new ChangeStreamInvalidatedError( - 'Internal collections have been dropped', - new Error('_checkpoints collection was dropped') - ); - } + if (splitDocument == null) { + splitDocument = originalChangeDocument; + } else { + splitDocument = Object.assign(splitDocument, originalChangeDocument); + } - if ( - !( - changeDocument.operationType == 'insert' || - changeDocument.operationType == 'update' || - changeDocument.operationType == 'replace' - ) - ) { - continue; - } + if (splitEvent.fragment == splitEvent.of) { + // Got all fragments + changeDocument = splitDocument; + splitDocument = null; + } else { + // Wait for more fragments + continue; + } + } else if (splitDocument != null) { + // We were waiting for fragments, but got a different event + throw new ReplicationAssertionError(`Incomplete splitEvent: ${JSON.stringify(splitDocument.splitEvent)}`); + } - // We handle two types of checkpoint events: - // 1. "Standalone" checkpoints, typically write checkpoints. We want to process these - // immediately, regardless of where they were created. - // 2. "Batch" checkpoints for the current stream. This is used as a form of dynamic rate - // limiting of commits, so we specifically want to exclude checkpoints from other streams. - // - // It may be useful to also throttle commits due to standalone checkpoints in the future. - // However, these typically have a much lower rate than batch checkpoints, so we don't do that for now. - - const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId; - if (!(checkpointId == STANDALONE_CHECKPOINT_ID || this.checkpointStreamId.equals(checkpointId))) { - continue; - } - const { comparable: lsn } = new MongoLSN({ - timestamp: changeDocument.clusterTime!, - resume_token: changeDocument._id - }); - if (batch.lastCheckpointLsn != null && lsn < batch.lastCheckpointLsn) { - // Checkpoint out of order - should never happen with MongoDB. - // If it does happen, we throw an error to stop the replication - restarting should recover. - // Since we use batch.lastCheckpointLsn for the next resumeAfter, this should not result in an infinite loop. - // Originally a workaround for https://jira.mongodb.org/browse/NODE-7042. - // This has been fixed in the driver in the meantime, but we still keep this as a safety-check. - throw new ReplicationAssertionError( - `Change resumeToken ${(changeDocument._id as any)._data} (${timestampToDate(changeDocument.clusterTime!).toISOString()}) is less than last checkpoint LSN ${batch.lastCheckpointLsn}. Restarting replication.` - ); - } + if ( + !filters.multipleDatabases && + 'ns' in changeDocument && + changeDocument.ns.db != this.defaultDb.databaseName && + changeDocument.ns.db.endsWith(`_${this.defaultDb.databaseName}`) + ) { + // When all of the following conditions are met: + // 1. We're replicating from an Atlas Flex instance. + // 2. There were changestream events recorded while the PowerSync service is paused. + // 3. We're only replicating from a single database. + // Then we've observed an ns with for example {db: '67b83e86cd20730f1e766dde_ps'}, + // instead of the expected {db: 'ps'}. + // We correct this. + changeDocument.ns.db = this.defaultDb.databaseName; + + if (!flexDbNameWorkaroundLogged) { + flexDbNameWorkaroundLogged = true; + this.logger.warn( + `Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.` + ); + } + } - if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) { - waitForCheckpointLsn = null; - } - const didCommit = await batch.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); + const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined; + + if (ns?.coll == CHECKPOINTS_COLLECTION) { + /** + * Dropping the database does not provide an `invalidate` event. + * We typically would receive `drop` events for the collection which we + * would process below. + * + * However we don't commit the LSN after collections are dropped. + * The prevents the `startAfter` or `resumeToken` from advancing past the drop events. + * The stream also closes after the drop events. + * This causes an infinite loop of processing the collection drop events. + * + * This check here invalidates the change stream if our `_checkpoints` collection + * is dropped. This allows for detecting when the DB is dropped. + */ + if (changeDocument.operationType == 'drop') { + throw new ChangeStreamInvalidatedError( + 'Internal collections have been dropped', + new Error('_checkpoints collection was dropped') + ); + } - if (didCommit) { - this.oldestUncommittedChange = null; - this.isStartingReplication = false; - changesSinceLastCheckpoint = 0; - } - } else if ( + if ( + !( changeDocument.operationType == 'insert' || changeDocument.operationType == 'update' || - changeDocument.operationType == 'replace' || - changeDocument.operationType == 'delete' - ) { - if (waitForCheckpointLsn == null) { - waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId); - } - const rel = getMongoRelation(changeDocument.ns); - const table = await this.getRelation(batch, rel, { - // In most cases, we should not need to snapshot this. But if this is the first time we see the collection - // for whatever reason, then we do need to snapshot it. - // This may result in some duplicate operations when a collection is created for the first time after - // sync rules was deployed. - snapshot: true - }); - if (table.syncAny) { - if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) { - this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime); - } - const flushResult = await this.writeChange(batch, table, changeDocument); - changesSinceLastCheckpoint += 1; - if (flushResult != null && changesSinceLastCheckpoint >= 20_000) { - // When we are catching up replication after an initial snapshot, there may be a very long delay - // before we do a commit(). In that case, we need to periodically persist the resume LSN, so - // we don't restart from scratch if we restart replication. - // The same could apply if we need to catch up on replication after some downtime. - const { comparable: lsn } = new MongoLSN({ - timestamp: changeDocument.clusterTime!, - resume_token: changeDocument._id - }); - this.logger.info(`Updating resume LSN to ${lsn} after ${changesSinceLastCheckpoint} changes`); - await batch.setResumeLsn(lsn); - changesSinceLastCheckpoint = 0; - } - } - } else if (changeDocument.operationType == 'drop') { - const rel = getMongoRelation(changeDocument.ns); - const table = await this.getRelation(batch, rel, { - // We're "dropping" this collection, so never snapshot it. - snapshot: false - }); - if (table.syncAny) { - await batch.drop([table]); - this.relationCache.delete(table); + changeDocument.operationType == 'replace' + ) + ) { + continue; + } + + // We handle two types of checkpoint events: + // 1. "Standalone" checkpoints, typically write checkpoints. We want to process these + // immediately, regardless of where they were created. + // 2. "Batch" checkpoints for the current stream. This is used as a form of dynamic rate + // limiting of commits, so we specifically want to exclude checkpoints from other streams. + // + // It may be useful to also throttle commits due to standalone checkpoints in the future. + // However, these typically have a much lower rate than batch checkpoints, so we don't do that for now. + + const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId; + if (!(checkpointId == STANDALONE_CHECKPOINT_ID || this.checkpointStreamId.equals(checkpointId))) { + continue; + } + const { comparable: lsn } = new MongoLSN({ + timestamp: changeDocument.clusterTime!, + resume_token: changeDocument._id + }); + // FIXME: Implement this check again. We can't rely on batch.lastCheckpointLsn anymore. + // if (batch.lastCheckpointLsn != null && lsn < batch.lastCheckpointLsn) { + // // Checkpoint out of order - should never happen with MongoDB. + // // If it does happen, we throw an error to stop the replication - restarting should recover. + // // Since we use batch.lastCheckpointLsn for the next resumeAfter, this should not result in an infinite loop. + // // Originally a workaround for https://jira.mongodb.org/browse/NODE-7042. + // // This has been fixed in the driver in the meantime, but we still keep this as a safety-check. + // throw new ReplicationAssertionError( + // `Change resumeToken ${(changeDocument._id as any)._data} (${timestampToDate(changeDocument.clusterTime!).toISOString()}) is less than last checkpoint LSN ${batch.lastCheckpointLsn}. Restarting replication.` + // ); + // } + + if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) { + waitForCheckpointLsn = null; + } + let didCommit = false; + for (let batch of writers) { + didCommit ||= await batch.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); + } + + if (didCommit) { + // TODO: Re-check this logic + this.oldestUncommittedChange = null; + this.isStartingReplication = false; + changesSinceLastCheckpoint = 0; + } + + continue; + } + + for (let i = 0; i < this.substreams.length; i++) { + const batch = writers[i]; + const substream = this.substreams[i]; + if ( + changeDocument.operationType == 'insert' || + changeDocument.operationType == 'update' || + changeDocument.operationType == 'replace' || + changeDocument.operationType == 'delete' + ) { + if (waitForCheckpointLsn == null) { + waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId); + } + const rel = getMongoRelation(changeDocument.ns); + const table = await substream.getRelation(batch, rel, { + // In most cases, we should not need to snapshot this. But if this is the first time we see the collection + // for whatever reason, then we do need to snapshot it. + // This may result in some duplicate operations when a collection is created for the first time after + // sync rules was deployed. + snapshot: true + }); + if (table.syncAny) { + if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) { + this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime); } - } else if (changeDocument.operationType == 'rename') { - const relFrom = getMongoRelation(changeDocument.ns); - const relTo = getMongoRelation(changeDocument.to); - const tableFrom = await this.getRelation(batch, relFrom, { - // We're "dropping" this collection, so never snapshot it. - snapshot: false - }); - if (tableFrom.syncAny) { - await batch.drop([tableFrom]); - this.relationCache.delete(relFrom); + const flushResult = await substream.writeChange(batch, table, changeDocument); + changesSinceLastCheckpoint += 1; + if (flushResult != null && changesSinceLastCheckpoint >= 20_000) { + // When we are catching up replication after an initial snapshot, there may be a very long delay + // before we do a commit(). In that case, we need to periodically persist the resume LSN, so + // we don't restart from scratch if we restart replication. + // The same could apply if we need to catch up on replication after some downtime. + const { comparable: lsn } = new MongoLSN({ + timestamp: changeDocument.clusterTime!, + resume_token: changeDocument._id + }); + this.logger.info(`Updating resume LSN to ${lsn} after ${changesSinceLastCheckpoint} changes`); + await batch.setResumeLsn(lsn); + changesSinceLastCheckpoint = 0; } - // Here we do need to snapshot the new table - const collection = await this.getCollectionInfo(relTo.schema, relTo.name); - await this.handleRelation(batch, relTo, { - // This is a new (renamed) collection, so always snapshot it. - snapshot: true, - collectionInfo: collection - }); } + } else if (changeDocument.operationType == 'drop') { + const rel = getMongoRelation(changeDocument.ns); + const table = await substream.getRelation(batch, rel, { + // We're "dropping" this collection, so never snapshot it. + snapshot: false + }); + if (table.syncAny) { + await batch.drop([table]); + substream.relationCache.delete(table); + } + } else if (changeDocument.operationType == 'rename') { + const relFrom = getMongoRelation(changeDocument.ns); + const relTo = getMongoRelation(changeDocument.to); + const tableFrom = await substream.getRelation(batch, relFrom, { + // We're "dropping" this collection, so never snapshot it. + snapshot: false + }); + if (tableFrom.syncAny) { + await batch.drop([tableFrom]); + substream.relationCache.delete(relFrom); + } + // Here we do need to snapshot the new table + const collection = await substream.getCollectionInfo(relTo.schema, relTo.name); + await substream.handleRelation(batch, relTo, { + // This is a new (renamed) collection, so always snapshot it. + snapshot: true, + collectionInfo: collection + }); } } - ); + } throw new ReplicationAbortedError(`Replication stream aborted`, this.abortSignal.reason); } diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index 004d10ed0..12c676f79 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -327,10 +327,7 @@ export class PostgresSyncRulesStorage }); } - async startBatch( - options: storage.StartBatchOptions, - callback: (batch: storage.BucketStorageBatch) => Promise - ): Promise { + async createWriter(options: storage.StartBatchOptions): Promise { const syncRules = await this.db.sql` SELECT last_checkpoint_lsn, @@ -362,6 +359,14 @@ export class PostgresSyncRulesStorage markRecordUnavailable: options.markRecordUnavailable }); this.iterateListeners((cb) => cb.batchStarted?.(batch)); + return batch; + } + + async startBatch( + options: storage.StartBatchOptions, + callback: (batch: storage.BucketStorageBatch) => Promise + ): Promise { + const batch = await this.createWriter(options); await callback(batch); await batch.flush(); diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts index 53705621f..1a48766c2 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts @@ -112,6 +112,10 @@ export class PostgresBucketBatch } async [Symbol.asyncDispose]() { + await this.dispose(); + } + + async dispose(): Promise { super.clearListeners(); } diff --git a/packages/service-core/src/storage/BucketStorageBatch.ts b/packages/service-core/src/storage/BucketStorageBatch.ts index e35a83395..8c6323754 100644 --- a/packages/service-core/src/storage/BucketStorageBatch.ts +++ b/packages/service-core/src/storage/BucketStorageBatch.ts @@ -12,6 +12,11 @@ export const DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS: ResolvedBucketBatchCommitOptio }; export interface BucketStorageBatch extends ObserverClient, AsyncDisposable { + /** + * Alias for [Symbol.asyncDispose] + */ + dispose(): Promise; + /** * Save an op, and potentially flush. * diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index 175427449..eaf1696ff 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -26,12 +26,24 @@ export interface SyncRulesBucketStorage /** * Use this to get access to update storage data. + * + * @deprecated Use `createWriter` instead. */ startBatch( options: StartBatchOptions, callback: (batch: BucketStorageBatch) => Promise ): Promise; + /** + * Create a new writer - an alternative to `startBatch`. + * + * The writer is stateful. It is not safe to use the same writer concurrently from multiple places, + * but different writers can be used concurrently. + * + * The writer must be flushed and disposed when done. + */ + createWriter(options: StartBatchOptions): Promise; + getParsedSyncRules(options: ParseSyncRulesOptions): HydratedSyncRules; /** From 8e28411ebbb4e0600c296f8660f42a453dea85a5 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 8 Jan 2026 14:31:40 +0200 Subject: [PATCH 008/101] Fixes. --- modules/module-mongodb/src/replication/ChangeStream.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index ddbf5927b..4e1a25af3 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -331,7 +331,7 @@ export class ChangeStream { this.logger = options.logger ?? defaultLogger; const substream = new SubStream({ - abortSignal: options.abort_signal, + abortSignal: this.abortSignal, checkpointStreamId: this.checkpointStreamId, connections: this.connections, storage: options.storage, @@ -567,6 +567,11 @@ export class ChangeStream { private async streamChangesInternal() { const writers = await Promise.all(this.substreams.map((s) => s.createWriter())); + await using _ = { + [Symbol.asyncDispose]: async () => { + await Promise.all(writers.map((w) => w[Symbol.asyncDispose]())); + } + }; // FIXME: Proper resumeFromLsn implementation for multiple writers // We should probably use the active sync rules for this, or alternatively the minimum from the writers. From f2b0b6c46865a68620da708c40c4da5268cbd2b3 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 8 Jan 2026 16:19:23 +0200 Subject: [PATCH 009/101] Fix potential race condition leading to locks held for longer than needed. --- .../src/storage/MongoBucketStorage.ts | 2 +- .../implementation/MongoPersistedSyncRulesContent.ts | 4 ++-- .../src/storage/implementation/MongoSyncRulesLock.ts | 8 ++++++-- modules/module-mongodb/src/replication/ChangeStream.ts | 7 +++---- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts index aad07740d..8faaafe35 100644 --- a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts @@ -225,7 +225,7 @@ export class MongoBucketStorage await this.db.notifyCheckpoint(); rules = new MongoPersistedSyncRulesContent(this.db, doc); if (options.lock) { - const lock = await rules.lock(); + await rules.lock(this.session); } }); diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts index a843d9a00..409d659ea 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts @@ -45,8 +45,8 @@ export class MongoPersistedSyncRulesContent implements storage.PersistedSyncRule ); } - async lock() { - const lock = await MongoSyncRulesLock.createLock(this.db, this); + async lock(session: mongo.ClientSession | undefined = undefined): Promise { + const lock = await MongoSyncRulesLock.createLock(this.db, this, session); this.current_lock = lock; return lock; } diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncRulesLock.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncRulesLock.ts index ddd53cb47..922b1de94 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncRulesLock.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncRulesLock.ts @@ -3,6 +3,7 @@ import crypto from 'crypto'; import { ErrorCode, logger, ServiceError } from '@powersync/lib-services-framework'; import { storage } from '@powersync/service-core'; import { PowerSyncMongo } from './db.js'; +import { mongo } from '@powersync/lib-service-mongodb'; /** * Manages a lock on a sync rules document, so that only one process @@ -13,7 +14,8 @@ export class MongoSyncRulesLock implements storage.ReplicationLock { static async createLock( db: PowerSyncMongo, - sync_rules: storage.PersistedSyncRulesContent + sync_rules: storage.PersistedSyncRulesContent, + session?: mongo.ClientSession ): Promise { const lockId = crypto.randomBytes(8).toString('hex'); const doc = await db.sync_rules.findOneAndUpdate( @@ -28,12 +30,14 @@ export class MongoSyncRulesLock implements storage.ReplicationLock { }, { projection: { lock: 1 }, - returnDocument: 'before' + returnDocument: 'before', + session } ); if (doc == null) { // Query the existing lock to get the expiration time (best effort - it may have been released in the meantime). + // We don't use the session here - we want to see the latest state. const heldLock = await db.sync_rules.findOne({ _id: sync_rules.id }, { projection: { lock: 1 } }); if (heldLock?.lock?.expires_at) { throw new ServiceError( diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 4e1a25af3..aab8660ee 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -16,11 +16,12 @@ import { SourceTable, storage } from '@powersync/service-core'; -import { DatabaseInputRow, SqliteInputRow, SqliteRow, HydratedSyncRules } from '@powersync/service-sync-rules'; +import { DatabaseInputRow, HydratedSyncRules, SqliteInputRow, SqliteRow } from '@powersync/service-sync-rules'; import { ReplicationMetric } from '@powersync/service-types'; import { MongoLSN } from '../common/MongoLSN.js'; import { PostImagesOption } from '../types/types.js'; import { escapeRegExp } from '../utils.js'; +import { ChangeStreamInvalidatedError, mapChangeStreamError } from './ChangeStreamErrors.js'; import { MongoManager } from './MongoManager.js'; import { constructAfterRecord, @@ -29,10 +30,8 @@ import { getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js'; -import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js'; -import { ChangeStreamInvalidatedError, mapChangeStreamError } from './ChangeStreamErrors.js'; import { MongoSnapshotter } from './MongoSnapshotter.js'; -import { MongoBucketBatch } from '../../../module-mongodb-storage/src/index.js'; +import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js'; export interface ChangeStreamOptions { connections: MongoManager; From ffe752f4b3b0ee5bfc4bbf3a51358ddbb03cad85 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Fri, 9 Jan 2026 12:18:13 +0200 Subject: [PATCH 010/101] Implement actual multiplexing for mongodb change streams. --- .../implementation/MongoPersistedSyncRules.ts | 2 +- .../src/replication/ChangeStream.ts | 29 +++---- .../replication/ChangeStreamReplicationJob.ts | 47 +++++++++++- .../src/replication/ChangeStreamReplicator.ts | 76 ++++++++++++++++--- .../src/replication/CDCReplicationJob.ts | 4 + .../src/replication/BinLogReplicationJob.ts | 4 + .../PostgresPersistedSyncRulesContent.ts | 2 +- .../replication/WalStreamReplicationJob.ts | 4 + .../src/test-utils/general-utils.ts | 2 +- .../src/replication/AbstractReplicationJob.ts | 6 +- .../src/replication/AbstractReplicator.ts | 10 ++- .../test/src/sync/BucketChecksumState.test.ts | 2 +- packages/sync-rules/src/index.ts | 1 + 13 files changed, 151 insertions(+), 38 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts index 77796b143..30e460bf1 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts @@ -1,7 +1,7 @@ import { SqlSyncRules, HydratedSyncRules } from '@powersync/service-sync-rules'; import { storage } from '@powersync/service-core'; -import { versionedHydrationState } from '@powersync/service-sync-rules/src/HydrationState.js'; +import { versionedHydrationState } from '@powersync/service-sync-rules'; export class MongoPersistedSyncRules implements storage.PersistedSyncRules { public readonly slot_name: string; diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index aab8660ee..45c12b722 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -32,10 +32,11 @@ import { } from './MongoRelation.js'; import { MongoSnapshotter } from './MongoSnapshotter.js'; import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js'; +import { ReplicationStreamConfig } from './ChangeStreamReplicationJob.js'; export interface ChangeStreamOptions { connections: MongoManager; - storage: storage.SyncRulesBucketStorage; + streams: ReplicationStreamConfig[]; metrics: MetricsEngine; abort_signal: AbortSignal; /** @@ -92,7 +93,7 @@ class SubStream { checkpointStreamId: options.checkpointStreamId, connections: this.connections, storage: this.storage, - logger: this.logger, + logger: this.logger.child({ prefix: `[powersync_${this.storage.group_id}_snapshot] ` }), snapshotChunkLength: options.snapshotChunkLength, metrics: options.metrics, maxAwaitTimeMS: options.maxAwaitTimeMS @@ -329,17 +330,18 @@ export class ChangeStream { this.logger = options.logger ?? defaultLogger; - const substream = new SubStream({ - abortSignal: this.abortSignal, - checkpointStreamId: this.checkpointStreamId, - connections: this.connections, - storage: options.storage, - logger: this.logger, - snapshotChunkLength: options.snapshotChunkLength, - maxAwaitTimeMS: this.maxAwaitTimeMS, - metrics: this.metrics + this.substreams = options.streams.map((config) => { + return new SubStream({ + abortSignal: this.abortSignal, + checkpointStreamId: this.checkpointStreamId, + connections: this.connections, + storage: config.storage, + logger: this.logger.child({ prefix: `[powersync_${config.lock.sync_rules_id}] ` }), + snapshotChunkLength: options.snapshotChunkLength, + maxAwaitTimeMS: this.maxAwaitTimeMS, + metrics: this.metrics + }); }); - this.substreams.push(substream); // We wrap in our own abort controller so we can trigger abort internally. options.abort_signal.addEventListener('abort', () => { @@ -772,7 +774,8 @@ export class ChangeStream { } let didCommit = false; for (let batch of writers) { - didCommit ||= await batch.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); + const didWriterCommit = await batch.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); + didCommit ||= didWriterCommit; } if (didCommit) { diff --git a/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts b/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts index 216138096..5f4f41017 100644 --- a/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts +++ b/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts @@ -1,22 +1,39 @@ import { container, logger as defaultLogger } from '@powersync/lib-services-framework'; -import { replication } from '@powersync/service-core'; +import { + PersistedSyncRulesContent, + replication, + ReplicationLock, + SyncRulesBucketStorage +} from '@powersync/service-core'; import { ChangeStream, ChangeStreamInvalidatedError } from './ChangeStream.js'; import { ConnectionManagerFactory } from './ConnectionManagerFactory.js'; export interface ChangeStreamReplicationJobOptions extends replication.AbstractReplicationJobOptions { connectionFactory: ConnectionManagerFactory; + streams: ReplicationStreamConfig[]; +} + +export interface ReplicationStreamConfig { + syncRules: PersistedSyncRulesContent; + storage: SyncRulesBucketStorage; + lock: ReplicationLock; } export class ChangeStreamReplicationJob extends replication.AbstractReplicationJob { private connectionFactory: ConnectionManagerFactory; private lastStream: ChangeStream | null = null; + private readonly streams: ReplicationStreamConfig[]; + constructor(options: ChangeStreamReplicationJobOptions) { super(options); this.connectionFactory = options.connectionFactory; + this.streams = options.streams; // We use a custom formatter to process the prefix - this.logger = defaultLogger.child({ prefix: `[powersync_${this.storage.group_id}] ` }); + this.logger = defaultLogger.child({ + prefix: `[powersync-${this.streams.map((stream) => stream.syncRules.id).join(',')}] ` + }); } async cleanUp(): Promise { @@ -27,6 +44,21 @@ export class ChangeStreamReplicationJob extends replication.AbstractReplicationJ // Nothing needed here } + isDifferent(syncRules: PersistedSyncRulesContent[]): boolean { + if (syncRules.length != this.streams.length) { + return true; + } + + for (let rules of syncRules) { + const existing = this.streams.find((stream) => stream.syncRules.id === rules.id); + if (existing == null) { + return true; + } + } + + return false; + } + async replicate() { try { await this.replicateOnce(); @@ -47,12 +79,19 @@ export class ChangeStreamReplicationJob extends replication.AbstractReplicationJ if (e instanceof ChangeStreamInvalidatedError) { // This stops replication and restarts with a new instance - await this.options.storage.factory.restartReplication(this.storage.group_id); + // FIXME: check this logic with multiple streams + for (let { storage } of Object.values(this.streams)) { + await storage.factory.restartReplication(storage.group_id); + } } // No need to rethrow - the error is already logged, and retry behavior is the same on error } finally { this.abortController.abort(); + + for (let { lock } of this.streams) { + await lock.release(); + } } } @@ -68,7 +107,7 @@ export class ChangeStreamReplicationJob extends replication.AbstractReplicationJ } const stream = new ChangeStream({ abort_signal: this.abortController.signal, - storage: this.options.storage, + streams: this.streams, metrics: this.options.metrics, connections: connectionManager, logger: this.logger diff --git a/modules/module-mongodb/src/replication/ChangeStreamReplicator.ts b/modules/module-mongodb/src/replication/ChangeStreamReplicator.ts index 2fca7aec3..123a30224 100644 --- a/modules/module-mongodb/src/replication/ChangeStreamReplicator.ts +++ b/modules/module-mongodb/src/replication/ChangeStreamReplicator.ts @@ -1,10 +1,11 @@ import { storage, replication } from '@powersync/service-core'; -import { ChangeStreamReplicationJob } from './ChangeStreamReplicationJob.js'; +import { ChangeStreamReplicationJob, ReplicationStreamConfig } from './ChangeStreamReplicationJob.js'; import { ConnectionManagerFactory } from './ConnectionManagerFactory.js'; import { MongoErrorRateLimiter } from './MongoErrorRateLimiter.js'; import { MongoModule } from '../module/MongoModule.js'; import { MongoLSN } from '../common/MongoLSN.js'; import { timestampToDate } from './replication-utils.js'; +import { ReplicationAssertionError } from '@powersync/lib-services-framework'; export interface ChangeStreamReplicatorOptions extends replication.AbstractReplicatorOptions { connectionFactory: ConnectionManagerFactory; @@ -12,6 +13,7 @@ export interface ChangeStreamReplicatorOptions extends replication.AbstractRepli export class ChangeStreamReplicator extends replication.AbstractReplicator { private readonly connectionFactory: ConnectionManagerFactory; + private job: ChangeStreamReplicationJob | null = null; constructor(options: ChangeStreamReplicatorOptions) { super(options); @@ -19,22 +21,78 @@ export class ChangeStreamReplicator extends replication.AbstractReplicator { // TODO: Implement anything? } + async refresh(options?: { configured_lock?: storage.ReplicationLock }) { + if (this.stopped) { + return; + } + + let configuredLock = options?.configured_lock; + + const replicatingSyncRules = await this.storage.getReplicatingSyncRules(); + + if (this.job?.isStopped) { + this.job = null; + } + + if (this.job != null && !this.job?.isDifferent(replicatingSyncRules)) { + // No changes + return; + } + + // Stop existing job, if any + await this.job?.stop(); + this.job = null; + if (replicatingSyncRules.length === 0) { + // No active replication + return; + } + + let streamConfig: ReplicationStreamConfig[] = []; + try { + for (let rules of replicatingSyncRules) { + let lock: storage.ReplicationLock; + if (configuredLock?.sync_rules_id == rules.id) { + lock = configuredLock; + } else { + lock = await rules.lock(); + } + streamConfig.push({ lock, syncRules: rules, storage: this.storage.getInstance(rules) }); + } + } catch (e) { + // Release any acquired locks + for (let { lock } of streamConfig) { + try { + await lock.release(); + } catch (ex) { + this.logger.warn('Failed to release replication lock after acquisition failure', ex); + } + } + throw e; + } + + const newJob = new ChangeStreamReplicationJob({ + id: this.createJobId(replicatingSyncRules[0].id), // FIXME: check the id + storage: streamConfig[0].storage, // FIXME: multi-stream logic + lock: streamConfig[0].lock, // FIXME: multi-stream logic + streams: streamConfig, + metrics: this.metrics, + connectionFactory: this.connectionFactory, + rateLimiter: new MongoErrorRateLimiter() + }); + this.job = newJob; + await newJob.start(); + } + async stop(): Promise { await super.stop(); + await this.job?.stop(); await this.connectionFactory.shutdown(); } diff --git a/modules/module-mssql/src/replication/CDCReplicationJob.ts b/modules/module-mssql/src/replication/CDCReplicationJob.ts index 120649544..5545848a3 100644 --- a/modules/module-mssql/src/replication/CDCReplicationJob.ts +++ b/modules/module-mssql/src/replication/CDCReplicationJob.ts @@ -21,6 +21,10 @@ export class CDCReplicationJob extends replication.AbstractReplicationJob { this.cdcReplicationJobOptions = options; } + public get storage() { + return this.options.storage; + } + async keepAlive() { // TODO Might need to leverage checkpoints table as a keepAlive } diff --git a/modules/module-mysql/src/replication/BinLogReplicationJob.ts b/modules/module-mysql/src/replication/BinLogReplicationJob.ts index bf72ca728..6c77fd89c 100644 --- a/modules/module-mysql/src/replication/BinLogReplicationJob.ts +++ b/modules/module-mysql/src/replication/BinLogReplicationJob.ts @@ -17,6 +17,10 @@ export class BinLogReplicationJob extends replication.AbstractReplicationJob { this.connectionFactory = options.connectionFactory; } + public get storage() { + return this.options.storage; + } + get slot_name() { return this.options.storage.slot_name; } diff --git a/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts b/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts index 2548b03b7..53db43dfa 100644 --- a/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts +++ b/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts @@ -4,7 +4,7 @@ import { storage } from '@powersync/service-core'; import { SqlSyncRules } from '@powersync/service-sync-rules'; import { models } from '../../types/types.js'; -import { versionedHydrationState } from '@powersync/service-sync-rules/src/HydrationState.js'; +import { versionedHydrationState } from '@powersync/service-sync-rules'; export class PostgresPersistedSyncRulesContent implements storage.PersistedSyncRulesContent { public readonly slot_name: string; diff --git a/modules/module-postgres/src/replication/WalStreamReplicationJob.ts b/modules/module-postgres/src/replication/WalStreamReplicationJob.ts index 340af22b9..737bb97c4 100644 --- a/modules/module-postgres/src/replication/WalStreamReplicationJob.ts +++ b/modules/module-postgres/src/replication/WalStreamReplicationJob.ts @@ -21,6 +21,10 @@ export class WalStreamReplicationJob extends replication.AbstractReplicationJob this.connectionFactory = options.connectionFactory; } + public get storage() { + return this.options.storage; + } + /** * Postgres on RDS writes performs a WAL checkpoint every 5 minutes by default, which creates a new 64MB file. * diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index 6e94d829d..fec74712d 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -1,6 +1,6 @@ import { storage, utils } from '@powersync/service-core'; import { GetQuerierOptions, RequestParameters, SqlSyncRules } from '@powersync/service-sync-rules'; -import { versionedHydrationState } from '@powersync/service-sync-rules/src/HydrationState.js'; +import { versionedHydrationState } from '@powersync/service-sync-rules'; import * as bson from 'bson'; export const ZERO_LSN = '0/0'; diff --git a/packages/service-core/src/replication/AbstractReplicationJob.ts b/packages/service-core/src/replication/AbstractReplicationJob.ts index 02f03bad5..63b053da9 100644 --- a/packages/service-core/src/replication/AbstractReplicationJob.ts +++ b/packages/service-core/src/replication/AbstractReplicationJob.ts @@ -54,7 +54,7 @@ export abstract class AbstractReplicationJob { * Safely stop the replication process */ public async stop(): Promise { - this.logger.info(`Stopping replication job for sync rule iteration: ${this.storage.group_id}`); + this.logger.info(`Stopping replication job for sync rule iteration: ${this.id}`); this.abortController.abort(); await this.isReplicatingPromise; } @@ -63,10 +63,6 @@ export abstract class AbstractReplicationJob { return this.options.id; } - public get storage() { - return this.options.storage; - } - protected get lock() { return this.options.lock; } diff --git a/packages/service-core/src/replication/AbstractReplicator.ts b/packages/service-core/src/replication/AbstractReplicator.ts index e76debe73..bc4c9b941 100644 --- a/packages/service-core/src/replication/AbstractReplicator.ts +++ b/packages/service-core/src/replication/AbstractReplicator.ts @@ -159,6 +159,8 @@ export abstract class AbstractReplicator { diff --git a/packages/sync-rules/src/index.ts b/packages/sync-rules/src/index.ts index 4609714fd..264fdbf59 100644 --- a/packages/sync-rules/src/index.ts +++ b/packages/sync-rules/src/index.ts @@ -28,3 +28,4 @@ export * from './types/custom_sqlite_value.js'; export * from './types/time.js'; export * from './utils.js'; export * from './HydratedSyncRules.js'; +export * from './HydrationState.js'; From 60e5897dc889724639077dee25b6752997d210c5 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Fri, 9 Jan 2026 15:37:30 +0200 Subject: [PATCH 011/101] Fix tests. --- modules/module-mongodb/src/replication/ChangeStream.ts | 4 ++-- modules/module-mongodb/test/src/change_stream_utils.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 45c12b722..b5b5b1f59 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -36,7 +36,7 @@ import { ReplicationStreamConfig } from './ChangeStreamReplicationJob.js'; export interface ChangeStreamOptions { connections: MongoManager; - streams: ReplicationStreamConfig[]; + streams: Pick[]; metrics: MetricsEngine; abort_signal: AbortSignal; /** @@ -336,7 +336,7 @@ export class ChangeStream { checkpointStreamId: this.checkpointStreamId, connections: this.connections, storage: config.storage, - logger: this.logger.child({ prefix: `[powersync_${config.lock.sync_rules_id}] ` }), + logger: this.logger.child({ prefix: `[powersync_${config.storage.group_id}] ` }), snapshotChunkLength: options.snapshotChunkLength, maxAwaitTimeMS: this.maxAwaitTimeMS, metrics: this.metrics diff --git a/modules/module-mongodb/test/src/change_stream_utils.ts b/modules/module-mongodb/test/src/change_stream_utils.ts index 81a17cd18..2357f3ac4 100644 --- a/modules/module-mongodb/test/src/change_stream_utils.ts +++ b/modules/module-mongodb/test/src/change_stream_utils.ts @@ -114,7 +114,7 @@ export class ChangeStreamTestContext { return this._walStream; } const options: ChangeStreamOptions = { - storage: this.storage, + streams: [{ storage: this.storage }], metrics: METRICS_HELPER.metricsEngine, connections: this.connectionManager, abort_signal: this.abortController.signal, From 22f590434609b34d6c29f868969316faf976a121 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 12 Jan 2026 10:05:49 +0200 Subject: [PATCH 012/101] Consistent sort order for sync rules. --- .../module-mongodb-storage/src/storage/MongoBucketStorage.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts index 8faaafe35..654550615 100644 --- a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts @@ -275,6 +275,8 @@ export class MongoBucketStorage .find({ state: { $in: [storage.SyncRuleState.PROCESSING, storage.SyncRuleState.ACTIVE] } }) + // Prioritize "ACTIVE" first + .sort({ state: 1, _id: 1 }) .toArray(); return docs.map((doc) => { From 55adf3a778ced3ac1674ffc7be63679ebe5279ce Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 12 Jan 2026 11:12:06 +0200 Subject: [PATCH 013/101] Persist mappings. --- .../src/storage/MongoBucketStorage.ts | 37 +++++++++++++++++- .../implementation/MongoPersistedSyncRules.ts | 38 ++++++++++++++++++- .../MongoPersistedSyncRulesContent.ts | 9 ++++- .../implementation/MongoSyncBucketStorage.ts | 6 +-- .../src/storage/implementation/models.ts | 7 +++- packages/sync-rules/src/SqlSyncRules.ts | 3 +- 6 files changed, 90 insertions(+), 10 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts index 654550615..6f505a07e 100644 --- a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts @@ -205,6 +205,37 @@ export class MongoBucketStorage const id = Number(id_doc!.op_id); const slot_name = generateSlotName(this.slot_name_prefix, id); + const syncRules = SqlSyncRules.fromYaml(options.content, { + // No schema-based validation at this point + schema: undefined, + defaultSchema: 'not_applicable', // Not needed for validation + throwOnError: false + }); + let bucketDefinitionMapping: Record = {}; + let parameterDefinitionMapping: Record = {}; + let bucketDefinitionId = (id << 16) + 1; + let parameterDefinitionId = (id << 17) + 1; + + syncRules.hydrate({ + hydrationState: { + getBucketSourceScope(source) { + bucketDefinitionMapping[source.uniqueName] = bucketDefinitionId; + bucketDefinitionId += 1; + return { + // N/A + bucketPrefix: '' + }; + }, + getParameterIndexLookupScope(source) { + const key = `${source.defaultLookupScope.lookupName}#${source.defaultLookupScope.queryId}`; + parameterDefinitionMapping[key] = parameterDefinitionId; + parameterDefinitionId += 1; + // N/A + return source.defaultLookupScope; + } + } + }); + const doc: SyncRuleDocument = { _id: id, content: options.content, @@ -219,7 +250,11 @@ export class MongoBucketStorage last_checkpoint_ts: null, last_fatal_error: null, last_fatal_error_ts: null, - last_keepalive_ts: null + last_keepalive_ts: null, + rule_mapping: { + definitions: bucketDefinitionMapping, + parameter_lookups: parameterDefinitionMapping + } }; await this.db.sync_rules.insertOne(doc); await this.db.notifyCheckpoint(); diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts index 30e460bf1..2d6ed9b64 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts @@ -2,6 +2,12 @@ import { SqlSyncRules, HydratedSyncRules } from '@powersync/service-sync-rules'; import { storage } from '@powersync/service-core'; import { versionedHydrationState } from '@powersync/service-sync-rules'; +import { ServiceAssertionError } from '@powersync/lib-services-framework'; + +export interface SyncDefinitionMapping { + definitions: Record; + parameterLookups: Record; +} export class MongoPersistedSyncRules implements storage.PersistedSyncRules { public readonly slot_name: string; @@ -10,12 +16,40 @@ export class MongoPersistedSyncRules implements storage.PersistedSyncRules { public readonly id: number, public readonly sync_rules: SqlSyncRules, public readonly checkpoint_lsn: string | null, - slot_name: string | null + slot_name: string | null, + private readonly mapping: SyncDefinitionMapping | null ) { this.slot_name = slot_name ?? `powersync_${id}`; } hydratedSyncRules(): HydratedSyncRules { - return this.sync_rules.hydrate({ hydrationState: versionedHydrationState(this.id) }); + if (this.mapping == null) { + return this.sync_rules.hydrate({ hydrationState: versionedHydrationState(this.id) }); + } else { + return this.sync_rules.hydrate({ + hydrationState: { + getBucketSourceScope: (source) => { + const defId = this.mapping!.definitions[source.uniqueName]; + if (defId == null) { + throw new ServiceAssertionError(`No mapping found for bucket source ${source.uniqueName}`); + } + return { + bucketPrefix: defId.toString(16) + }; + }, + getParameterIndexLookupScope: (source) => { + const key = `${source.defaultLookupScope.lookupName}#${source.defaultLookupScope.queryId}`; + const defId = this.mapping!.parameterLookups[key]; + if (defId == null) { + throw new ServiceAssertionError(`No mapping found for parameter lookup ${key}`); + } + return { + lookupName: defId.toString(16), + queryId: '' + }; + } + } + }); + } } } diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts index 409d659ea..ca0fd3379 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts @@ -1,7 +1,7 @@ import { mongo } from '@powersync/lib-service-mongodb'; import { storage } from '@powersync/service-core'; import { SqlSyncRules } from '@powersync/service-sync-rules'; -import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; +import { MongoPersistedSyncRules, SyncDefinitionMapping } from './MongoPersistedSyncRules.js'; import { MongoSyncRulesLock } from './MongoSyncRulesLock.js'; import { PowerSyncMongo } from './db.js'; import { SyncRuleDocument } from './models.js'; @@ -17,6 +17,7 @@ export class MongoPersistedSyncRulesContent implements storage.PersistedSyncRule public readonly last_keepalive_ts: Date | null; public readonly last_checkpoint_ts: Date | null; public readonly active: boolean; + private readonly rules_mapping: SyncDefinitionMapping | null = null; public current_lock: MongoSyncRulesLock | null = null; @@ -33,6 +34,9 @@ export class MongoPersistedSyncRulesContent implements storage.PersistedSyncRule this.last_fatal_error_ts = doc.last_fatal_error_ts; this.last_checkpoint_ts = doc.last_checkpoint_ts; this.last_keepalive_ts = doc.last_keepalive_ts; + this.rules_mapping = doc.rule_mapping + ? { definitions: doc.rule_mapping.definitions, parameterLookups: doc.rule_mapping.parameter_lookups } + : null; this.active = doc.state == 'ACTIVE'; } @@ -41,7 +45,8 @@ export class MongoPersistedSyncRulesContent implements storage.PersistedSyncRule this.id, SqlSyncRules.fromYaml(this.sync_rules_content, options), this.last_checkpoint_lsn, - this.slot_name + this.slot_name, + this.rules_mapping ); } diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 62d6fa404..f7bb638ca 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -208,8 +208,8 @@ export class MongoSyncBucketStorage let result: storage.ResolveTableResult | null = null; await this.db.client.withSession(async (session) => { const col = this.db.source_tables; - let filter: Partial = { - group_id: group_id, + let filter: mongo.Filter = { + sync_rules_ids: group_id, connection_id: connection_id, schema_name: schema, table_name: name, @@ -222,7 +222,7 @@ export class MongoSyncBucketStorage if (doc == null) { doc = { _id: new bson.ObjectId(), - group_id: group_id, + sync_rules_ids: [group_id], connection_id: connection_id, relation_id: objectId, schema_name: schema, diff --git a/modules/module-mongodb-storage/src/storage/implementation/models.ts b/modules/module-mongodb-storage/src/storage/implementation/models.ts index ccd45a556..4eeacfc02 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/models.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/models.ts @@ -72,7 +72,7 @@ export type OpType = 'PUT' | 'REMOVE' | 'MOVE' | 'CLEAR'; export interface SourceTableDocument { _id: bson.ObjectId; - group_id: number; + sync_rules_ids: number[]; connection_id: number; relation_id: number | string | undefined; schema_name: string; @@ -210,6 +210,11 @@ export interface SyncRuleDocument { id: string; expires_at: Date; } | null; + + rule_mapping: { + definitions: Record; + parameter_lookups: Record; + }; } export interface CheckpointEventDocument { diff --git a/packages/sync-rules/src/SqlSyncRules.ts b/packages/sync-rules/src/SqlSyncRules.ts index 5a6adfd24..373972eeb 100644 --- a/packages/sync-rules/src/SqlSyncRules.ts +++ b/packages/sync-rules/src/SqlSyncRules.ts @@ -400,7 +400,8 @@ export class SqlSyncRules { */ hydrate(params?: CreateSourceParams): HydratedSyncRules { let hydrationState = params?.hydrationState; - if (hydrationState == null || !this.compatibility.isEnabled(CompatibilityOption.versionedBucketIds)) { + // FIXME: Check logic for this: !this.compatibility.isEnabled(CompatibilityOption.versionedBucketIds) + if (hydrationState == null) { hydrationState = DEFAULT_HYDRATION_STATE; } const resolvedParams = { hydrationState }; From ed9e90c390c000d2ebf2acaa90435722a8daf689 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 12 Jan 2026 12:03:59 +0200 Subject: [PATCH 014/101] Refactor deletes. --- .../implementation/MongoSyncBucketStorage.ts | 149 ++++++++++++------ 1 file changed, 97 insertions(+), 52 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index f7bb638ca..921947801 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -566,33 +566,9 @@ export class MongoSyncBucketStorage } async clear(options?: storage.ClearStorageOptions): Promise { - while (true) { - if (options?.signal?.aborted) { - throw new ReplicationAbortedError('Aborted clearing data', options.signal.reason); - } - try { - await this.clearIteration(); - - logger.info(`${this.slot_name} Done clearing data`); - return; - } catch (e: unknown) { - if (lib_mongo.isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') { - logger.info( - `${this.slot_name} Cleared batch of data in ${lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS}ms, continuing...` - ); - await timers.setTimeout(lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS / 5); - } else { - throw e; - } - } - } - } + const signal = options?.signal ?? new AbortController().signal; - private async clearIteration(): Promise { - // Individual operations here may time out with the maxTimeMS option. - // It is expected to still make progress, and continue on the next try. - - await this.db.sync_rules.updateOne( + const doc = await this.db.sync_rules.findOneAndUpdate( { _id: this.group_id }, @@ -608,43 +584,112 @@ export class MongoSyncBucketStorage snapshot_lsn: 1 } }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS, returnDocument: 'after' } ); - await this.db.bucket_data.deleteMany( - { - _id: idPrefixFilter({ g: this.group_id }, ['b', 'o']) - }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + + if (doc?.rule_mapping != null) { + for (let [name, id] of Object.entries(doc.rule_mapping.definitions)) { + await this.retriedDelete(`deleting bucket data for ${name}`, signal, () => + this.db.bucket_data.deleteMany( + { + _id: idPrefixFilter({ g: id }, ['b', 'o']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) + ); + } + + for (let [name, id] of Object.entries(doc.rule_mapping.parameter_lookups)) { + await this.retriedDelete(`deleting parameter lookup data for ${name}`, signal, () => + this.db.bucket_parameters.deleteMany( + { + 'key.g': id + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) + ); + } + } + + await this.retriedDelete('deleting bucket data', signal, () => + this.db.bucket_data.deleteMany( + { + _id: idPrefixFilter({ g: this.group_id }, ['b', 'o']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) ); - await this.db.bucket_parameters.deleteMany( - { - 'key.g': this.group_id - }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + await this.retriedDelete('deleting bucket parameter lookup values', signal, () => + this.db.bucket_parameters.deleteMany( + { + 'key.g': this.group_id + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) ); - await this.db.current_data.deleteMany( - { - _id: idPrefixFilter({ g: this.group_id }, ['t', 'k']) - }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + // FIXME: handle refactored current_data structure + await this.retriedDelete('deleting current data records', signal, () => + this.db.current_data.deleteMany( + { + _id: idPrefixFilter({ g: this.group_id }, ['t', 'k']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) ); - await this.db.bucket_state.deleteMany( - { - _id: idPrefixFilter({ g: this.group_id }, ['b']) - }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + await this.retriedDelete('deleting bucket state records', signal, () => + this.db.bucket_state.deleteMany( + { + _id: idPrefixFilter({ g: this.group_id }, ['b']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) ); - await this.db.source_tables.deleteMany( - { - group_id: this.group_id - }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + // First remove the reference + this.db.source_tables.updateMany({ sync_rules_ids: this.group_id }, { $pull: { sync_rules_ids: this.group_id } }); + + // Then delete any source tables no longer referenced + await this.retriedDelete('deleting source table records', signal, () => + this.db.source_tables.deleteMany( + { + sync_rules_ids: [] + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) ); } + private async retriedDelete( + message: string, + signal: AbortSignal, + deleteFunc: () => Promise + ): Promise { + // Individual operations here may time out with the maxTimeMS option. + // It is expected to still make progress, and continue on the next try. + + let i = 0; + while (!signal.aborted) { + try { + const result = await deleteFunc(); + if (result.deletedCount > 0) { + logger.info(`${this.slot_name} ${message} - done`); + } + return; + } catch (e: unknown) { + if (lib_mongo.isMongoServerError(e) && e.codeName == 'MaxTimeMSExpired') { + i += 1; + logger.info(`${this.slot_name} ${message} iteration ${i}, continuing...`); + await timers.setTimeout(lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS / 5); + } else { + throw e; + } + } + } + throw new ReplicationAbortedError('Aborted clearing data', signal.reason); + } + async reportError(e: any): Promise { const message = String(e.message ?? 'Replication failure'); await this.db.sync_rules.updateOne( From 46ca53e067e95c5933d0e64d06384755d03b4802 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 12 Jan 2026 14:34:44 +0200 Subject: [PATCH 015/101] Scoped bucket_data. --- .../src/storage/MongoBucketStorage.ts | 12 +++- .../implementation/MongoBucketBatch.ts | 15 +++-- .../implementation/MongoPersistedSyncRules.ts | 61 +++++++++--------- .../MongoPersistedSyncRulesContent.ts | 11 ++-- .../implementation/MongoSyncBucketStorage.ts | 32 +++++++--- .../storage/implementation/PersistedBatch.ts | 38 ++++++++---- .../src/storage/implementation/models.ts | 1 + .../src/storage/PostgresSyncRulesStorage.ts | 12 ++-- .../register-data-storage-parameter-tests.ts | 14 +++-- .../register-parameter-compacting-tests.ts | 4 +- .../src/storage/SyncRulesBucketStorage.ts | 15 ++++- .../src/sync/BucketChecksumState.ts | 25 +++++--- .../service-core/src/util/protocol-types.ts | 2 +- packages/sync-rules/src/BucketDescription.ts | 5 ++ packages/sync-rules/src/BucketSource.ts | 16 ++--- packages/sync-rules/src/HydratedSyncRules.ts | 5 +- packages/sync-rules/src/HydrationState.ts | 8 ++- packages/sync-rules/src/SqlParameterQuery.ts | 7 ++- .../sync-rules/src/StaticSqlParameterQuery.ts | 4 +- .../TableValuedFunctionSqlParameterQuery.ts | 4 +- packages/sync-rules/src/streams/filter.ts | 5 +- packages/sync-rules/src/streams/variant.ts | 6 +- packages/sync-rules/src/types.ts | 8 ++- packages/sync-rules/src/utils.ts | 18 +++++- packages/sync-rules/test/src/streams.test.ts | 28 ++++++--- .../sync-rules/test/src/sync_rules.test.ts | 32 +++++++--- .../src/table_valued_function_queries.test.ts | 62 ++++++------------- 27 files changed, 277 insertions(+), 173 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts index 6f505a07e..d5cbd5bbf 100644 --- a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts @@ -50,7 +50,14 @@ export class MongoBucketStorage if ((typeof id as any) == 'bigint') { id = Number(id); } - const storage = new MongoSyncBucketStorage(this, id, syncRules, slot_name, undefined, this.internalOptions); + const storage = new MongoSyncBucketStorage( + this, + id, + syncRules as MongoPersistedSyncRulesContent, + slot_name, + undefined, + this.internalOptions + ); if (!options?.skipLifecycleHooks) { this.iterateListeners((cb) => cb.syncStorageCreated?.(storage)); } @@ -223,7 +230,8 @@ export class MongoBucketStorage bucketDefinitionId += 1; return { // N/A - bucketPrefix: '' + bucketPrefix: '', + source }; }, getParameterIndexLookupScope(source) { diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 078f671ed..0074c4754 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -24,9 +24,11 @@ import { } from '@powersync/service-core'; import * as timers from 'node:timers/promises'; import { idPrefixFilter, mongoTableId } from '../../utils/util.js'; +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; import { PowerSyncMongo } from './db.js'; import { CurrentBucket, CurrentDataDocument, SourceKey, SyncRuleDocument } from './models.js'; import { MongoIdSequence } from './MongoIdSequence.js'; +import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; import { batchCreateCustomWriteCheckpoints } from './MongoWriteCheckpointAPI.js'; import { cacheKey, OperationBatch, RecordOperation } from './OperationBatch.js'; import { PersistedBatch } from './PersistedBatch.js'; @@ -47,7 +49,7 @@ export const EMPTY_DATA = new bson.Binary(bson.serialize({})); export interface MongoBucketBatchOptions { db: PowerSyncMongo; - syncRules: HydratedSyncRules; + syncRules: MongoPersistedSyncRules; groupId: number; slotName: string; lastCheckpointLsn: string | null; @@ -85,6 +87,7 @@ export class MongoBucketBatch private write_checkpoint_batch: storage.CustomWriteCheckpointOptions[] = []; private markRecordUnavailable: BucketStorageMarkRecordUnavailable | undefined; private clearedError = false; + private readonly mapping: BucketDefinitionMapping; /** * Last LSN received associated with a checkpoint. @@ -126,7 +129,8 @@ export class MongoBucketBatch this.resumeFromLsn = options.resumeFromLsn; this.session = this.client.startSession(); this.slot_name = options.slotName; - this.sync_rules = options.syncRules; + this.sync_rules = options.syncRules.hydratedSyncRules(); + this.mapping = options.syncRules.mapping; this.storeCurrentData = options.storeCurrentData; this.skipExistingRows = options.skipExistingRows; this.markRecordUnavailable = options.markRecordUnavailable; @@ -271,7 +275,8 @@ export class MongoBucketBatch } let persistedBatch: PersistedBatch | null = new PersistedBatch(this.group_id, transactionSize, { - logger: this.logger + logger: this.logger, + mapping: this.mapping }); for (let op of b) { @@ -497,7 +502,9 @@ export class MongoBucketBatch before_buckets: existing_buckets }); new_buckets = evaluated.map((e) => { + const sourceDefinitionId = this.mapping.bucketSourceId(e.source); return { + def: sourceDefinitionId, bucket: e.bucket, table: e.table, id: e.id @@ -1026,7 +1033,7 @@ export class MongoBucketBatch session: session }); const batch = await cursor.toArray(); - const persistedBatch = new PersistedBatch(this.group_id, 0, { logger: this.logger }); + const persistedBatch = new PersistedBatch(this.group_id, 0, { logger: this.logger, mapping: this.mapping }); for (let value of batch) { persistedBatch.saveBucketData({ diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts index 2d6ed9b64..39e3c9b34 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts @@ -1,13 +1,15 @@ -import { SqlSyncRules, HydratedSyncRules } from '@powersync/service-sync-rules'; +import { + BucketDataScope, + BucketDataSource, + HydratedSyncRules, + HydrationState, + ParameterIndexLookupCreator, + SqlSyncRules +} from '@powersync/service-sync-rules'; import { storage } from '@powersync/service-core'; import { versionedHydrationState } from '@powersync/service-sync-rules'; -import { ServiceAssertionError } from '@powersync/lib-services-framework'; - -export interface SyncDefinitionMapping { - definitions: Record; - parameterLookups: Record; -} +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; export class MongoPersistedSyncRules implements storage.PersistedSyncRules { public readonly slot_name: string; @@ -17,7 +19,7 @@ export class MongoPersistedSyncRules implements storage.PersistedSyncRules { public readonly sync_rules: SqlSyncRules, public readonly checkpoint_lsn: string | null, slot_name: string | null, - private readonly mapping: SyncDefinitionMapping | null + public readonly mapping: BucketDefinitionMapping ) { this.slot_name = slot_name ?? `powersync_${id}`; } @@ -27,29 +29,28 @@ export class MongoPersistedSyncRules implements storage.PersistedSyncRules { return this.sync_rules.hydrate({ hydrationState: versionedHydrationState(this.id) }); } else { return this.sync_rules.hydrate({ - hydrationState: { - getBucketSourceScope: (source) => { - const defId = this.mapping!.definitions[source.uniqueName]; - if (defId == null) { - throw new ServiceAssertionError(`No mapping found for bucket source ${source.uniqueName}`); - } - return { - bucketPrefix: defId.toString(16) - }; - }, - getParameterIndexLookupScope: (source) => { - const key = `${source.defaultLookupScope.lookupName}#${source.defaultLookupScope.queryId}`; - const defId = this.mapping!.parameterLookups[key]; - if (defId == null) { - throw new ServiceAssertionError(`No mapping found for parameter lookup ${key}`); - } - return { - lookupName: defId.toString(16), - queryId: '' - }; - } - } + hydrationState: new MongoHydrationState(this.mapping) }); } } } + +class MongoHydrationState implements HydrationState { + constructor(private mapping: BucketDefinitionMapping) {} + + getBucketSourceScope(source: BucketDataSource): BucketDataScope { + const defId = this.mapping.bucketSourceId(source); + return { + bucketPrefix: defId.toString(16), + source: source + }; + } + getParameterIndexLookupScope(source: ParameterIndexLookupCreator) { + const defId = this.mapping.parameterLookupId(source); + return { + lookupName: defId.toString(16), + queryId: '', + source + }; + } +} diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts index ca0fd3379..8a2bbc092 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRulesContent.ts @@ -1,7 +1,8 @@ import { mongo } from '@powersync/lib-service-mongodb'; import { storage } from '@powersync/service-core'; import { SqlSyncRules } from '@powersync/service-sync-rules'; -import { MongoPersistedSyncRules, SyncDefinitionMapping } from './MongoPersistedSyncRules.js'; +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; +import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; import { MongoSyncRulesLock } from './MongoSyncRulesLock.js'; import { PowerSyncMongo } from './db.js'; import { SyncRuleDocument } from './models.js'; @@ -17,7 +18,7 @@ export class MongoPersistedSyncRulesContent implements storage.PersistedSyncRule public readonly last_keepalive_ts: Date | null; public readonly last_checkpoint_ts: Date | null; public readonly active: boolean; - private readonly rules_mapping: SyncDefinitionMapping | null = null; + public readonly mapping: BucketDefinitionMapping; public current_lock: MongoSyncRulesLock | null = null; @@ -34,9 +35,7 @@ export class MongoPersistedSyncRulesContent implements storage.PersistedSyncRule this.last_fatal_error_ts = doc.last_fatal_error_ts; this.last_checkpoint_ts = doc.last_checkpoint_ts; this.last_keepalive_ts = doc.last_keepalive_ts; - this.rules_mapping = doc.rule_mapping - ? { definitions: doc.rule_mapping.definitions, parameterLookups: doc.rule_mapping.parameter_lookups } - : null; + this.mapping = BucketDefinitionMapping.fromSyncRules(doc); this.active = doc.state == 'ACTIVE'; } @@ -46,7 +45,7 @@ export class MongoPersistedSyncRulesContent implements storage.PersistedSyncRule SqlSyncRules.fromYaml(this.sync_rules_content, options), this.last_checkpoint_lsn, this.slot_name, - this.rules_mapping + this.mapping ); } diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 921947801..715e12df1 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -8,6 +8,7 @@ import { } from '@powersync/lib-services-framework'; import { BroadcastIterable, + BucketDataRequest, CHECKPOINT_INVALIDATE_ALL, CheckpointChanges, deserializeParameterLookup, @@ -24,7 +25,12 @@ import { WatchWriteCheckpointOptions } from '@powersync/service-core'; import { JSONBig } from '@powersync/service-jsonbig'; -import { HydratedSyncRules, ScopedParameterLookup, SqliteJsonRow } from '@powersync/service-sync-rules'; +import { + BucketDataSource, + HydratedSyncRules, + ScopedParameterLookup, + SqliteJsonRow +} from '@powersync/service-sync-rules'; import * as bson from 'bson'; import { LRUCache } from 'lru-cache'; import * as timers from 'timers/promises'; @@ -37,6 +43,8 @@ import { MongoChecksumOptions, MongoChecksums } from './MongoChecksums.js'; import { MongoCompactor } from './MongoCompactor.js'; import { MongoParameterCompactor } from './MongoParameterCompactor.js'; import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js'; +import { MongoPersistedSyncRulesContent } from './MongoPersistedSyncRulesContent.js'; +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; export interface MongoSyncBucketStorageOptions { checksumOptions?: MongoChecksumOptions; @@ -62,17 +70,19 @@ export class MongoSyncBucketStorage private parsedSyncRulesCache: { parsed: HydratedSyncRules; options: storage.ParseSyncRulesOptions } | undefined; private writeCheckpointAPI: MongoWriteCheckpointAPI; + private mapping: BucketDefinitionMapping; constructor( public readonly factory: MongoBucketStorage, public readonly group_id: number, - private readonly sync_rules: storage.PersistedSyncRulesContent, + private readonly sync_rules: MongoPersistedSyncRulesContent, public readonly slot_name: string, writeCheckpointMode?: storage.WriteCheckpointMode, options?: MongoSyncBucketStorageOptions ) { super(); this.db = factory.db; + this.mapping = this.sync_rules.mapping; this.checksums = new MongoChecksums(this.db, this.group_id, options?.checksumOptions); this.writeCheckpointAPI = new MongoWriteCheckpointAPI({ db: this.db, @@ -163,10 +173,12 @@ export class MongoSyncBucketStorage ); const checkpoint_lsn = doc?.last_checkpoint_lsn ?? null; + const parsedSyncRules = this.sync_rules.parsed(options); + const batch = new MongoBucketBatch({ logger: options.logger, db: this.db, - syncRules: this.sync_rules.parsed(options).hydratedSyncRules(), + syncRules: parsedSyncRules, groupId: this.group_id, slotName: this.slot_name, lastCheckpointLsn: checkpoint_lsn, @@ -364,28 +376,30 @@ export class MongoSyncBucketStorage async *getBucketDataBatch( checkpoint: utils.InternalOpId, - dataBuckets: Map, + dataBuckets: BucketDataRequest[], options?: storage.BucketDataBatchOptions ): AsyncIterable { - if (dataBuckets.size == 0) { + if (dataBuckets.length == 0) { return; } let filters: mongo.Filter[] = []; + const bucketMap = new Map(dataBuckets.map((d) => [d.bucket, d.start])); if (checkpoint == null) { throw new ServiceAssertionError('checkpoint is null'); } const end = checkpoint; - for (let [name, start] of dataBuckets.entries()) { + for (let { bucket: name, start, source } of dataBuckets) { + const sourceDefinitionId = this.mapping.bucketSourceId(source); filters.push({ _id: { $gt: { - g: this.group_id, + g: sourceDefinitionId, b: name, o: start }, $lte: { - g: this.group_id, + g: sourceDefinitionId, b: name, o: end as any } @@ -469,7 +483,7 @@ export class MongoSyncBucketStorage } if (start == null) { - const startOpId = dataBuckets.get(bucket); + const startOpId = bucketMap.get(bucket); if (startOpId == null) { throw new ServiceAssertionError(`data for unexpected bucket: ${bucket}`); } diff --git a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts index 1b41fa1f6..c8b428ae1 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts @@ -1,10 +1,11 @@ import { mongo } from '@powersync/lib-service-mongodb'; import { JSONBig } from '@powersync/service-jsonbig'; -import { EvaluatedParameters, EvaluatedRow } from '@powersync/service-sync-rules'; +import { BucketDataSource, EvaluatedParameters, EvaluatedRow } from '@powersync/service-sync-rules'; import * as bson from 'bson'; -import { Logger, logger as defaultLogger } from '@powersync/lib-services-framework'; +import { Logger, ReplicationAssertionError } from '@powersync/lib-services-framework'; import { InternalOpId, storage, utils } from '@powersync/service-core'; +import { mongoTableId, replicaIdToSubkey } from '../../utils/util.js'; import { currentBucketKey, EMPTY_DATA, MAX_ROW_SIZE } from './MongoBucketBatch.js'; import { MongoIdSequence } from './MongoIdSequence.js'; import { PowerSyncMongo } from './db.js'; @@ -16,7 +17,7 @@ import { CurrentDataDocument, SourceKey } from './models.js'; -import { mongoTableId, replicaIdToSubkey } from '../../utils/util.js'; +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; /** * Maximum size of operations we write in a single transaction. @@ -51,6 +52,7 @@ export class PersistedBatch { bucketParameters: mongo.AnyBulkWriteOperation[] = []; currentData: mongo.AnyBulkWriteOperation[] = []; bucketStates: Map = new Map(); + mapping: BucketDefinitionMapping; /** * For debug logging only. @@ -65,13 +67,14 @@ export class PersistedBatch { constructor( private group_id: number, writtenSize: number, - options?: { logger?: Logger } + options: { logger: Logger; mapping: BucketDefinitionMapping } ) { this.currentSize = writtenSize; - this.logger = options?.logger ?? defaultLogger; + this.logger = options.logger; + this.mapping = options.mapping; } - private incrementBucket(bucket: string, op_id: InternalOpId, bytes: number) { + private incrementBucket(defId: number, bucket: string, op_id: InternalOpId, bytes: number) { let existingState = this.bucketStates.get(bucket); if (existingState) { existingState.lastOp = op_id; @@ -81,7 +84,8 @@ export class PersistedBatch { this.bucketStates.set(bucket, { lastOp: op_id, incrementCount: 1, - incrementBytes: bytes + incrementBytes: bytes, + def: defId }); } } @@ -102,7 +106,14 @@ export class PersistedBatch { const dchecksum = BigInt(utils.hashDelete(replicaIdToSubkey(options.table.id, options.sourceKey))); for (const k of options.evaluated) { - const key = currentBucketKey(k); + const source = k.source; + const sourceDefinitionId = this.mapping.bucketSourceId(source); + const key = currentBucketKey({ + bucket: k.bucket, + table: k.table, + id: k.id, + def: sourceDefinitionId + }); // INSERT const recordData = JSONBig.stringify(k.data); @@ -127,7 +138,7 @@ export class PersistedBatch { insertOne: { document: { _id: { - g: this.group_id, + g: sourceDefinitionId, b: k.bucket, o: op_id }, @@ -141,7 +152,7 @@ export class PersistedBatch { } } }); - this.incrementBucket(k.bucket, op_id, byteEstimate); + this.incrementBucket(sourceDefinitionId, k.bucket, op_id, byteEstimate); } for (let bd of remaining_buckets.values()) { @@ -154,7 +165,7 @@ export class PersistedBatch { insertOne: { document: { _id: { - g: this.group_id, + g: bd.def, b: bd.bucket, o: op_id }, @@ -169,7 +180,7 @@ export class PersistedBatch { } }); this.currentSize += 200; - this.incrementBucket(bd.bucket, op_id, 200); + this.incrementBucket(bd.def, bd.bucket, op_id, 200); } } @@ -393,7 +404,7 @@ export class PersistedBatch { updateOne: { filter: { _id: { - g: this.group_id, + g: state.def, b: bucket } }, @@ -417,4 +428,5 @@ interface BucketStateUpdate { lastOp: InternalOpId; incrementCount: number; incrementBytes: number; + def: number; } diff --git a/modules/module-mongodb-storage/src/storage/implementation/models.ts b/modules/module-mongodb-storage/src/storage/implementation/models.ts index 4eeacfc02..1b8852365 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/models.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/models.ts @@ -44,6 +44,7 @@ export interface CurrentDataDocument { } export interface CurrentBucket { + def: number; bucket: string; table: string; id: string; diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index 12c676f79..f36f124b4 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -2,6 +2,7 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; import { BroadcastIterable, BucketChecksum, + BucketDataRequest, CHECKPOINT_INVALIDATE_ALL, CheckpointChanges, GetCheckpointChangesOptions, @@ -419,10 +420,10 @@ export class PostgresSyncRulesStorage async *getBucketDataBatch( checkpoint: InternalOpId, - dataBuckets: Map, + dataBuckets: BucketDataRequest[], options?: storage.BucketDataBatchOptions ): AsyncIterable { - if (dataBuckets.size == 0) { + if (dataBuckets.length == 0) { return; } @@ -434,10 +435,11 @@ export class PostgresSyncRulesStorage // not match up with chunks. const end = checkpoint ?? BIGINT_MAX; - const filters = Array.from(dataBuckets.entries()).map(([name, start]) => ({ - bucket_name: name, + const filters = dataBuckets.map(({ bucket, start }) => ({ + bucket_name: bucket, start: start })); + const bucketMap = new Map(dataBuckets.map((d) => [d.bucket, d.start])); const batchRowLimit = options?.limit ?? storage.DEFAULT_DOCUMENT_BATCH_LIMIT; const chunkSizeLimitBytes = options?.chunkLimitBytes ?? storage.DEFAULT_DOCUMENT_CHUNK_LIMIT_BYTES; @@ -537,7 +539,7 @@ export class PostgresSyncRulesStorage } if (start == null) { - const startOpId = dataBuckets.get(bucket_name); + const startOpId = bucketMap.get(bucket_name); if (startOpId == null) { throw new framework.ServiceAssertionError(`data for unexpected bucket: ${bucket_name}`); } diff --git a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts index 2b409780d..30b7c7c07 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts @@ -17,7 +17,7 @@ import * as test_utils from '../test-utils/test-utils-index.js'; export function registerDataStorageParameterTests(config: storage.TestStorageConfig) { const generateStorageFactory = config.factory; const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); - const MYBUCKET_1: ParameterLookupScope = { lookupName: 'mybucket', queryId: '1' }; + const MYBUCKET_1: ParameterLookupScope = { lookupName: 'mybucket', queryId: '1', source: null as any }; test('save and load parameters', async () => { await using factory = await generateStorageFactory(); @@ -344,7 +344,9 @@ bucket_definitions: const querier = sync_rules.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; const lookups = querier.parameterQueryLookups; - expect(lookups).toEqual([ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '1' }, ['u1'])]); + expect(lookups).toEqual([ + ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '1', source: null as any }, ['u1']) + ]); const parameter_sets = await checkpoint.getParameterSets(lookups); expect(parameter_sets).toEqual([{ workspace_id: 'workspace1' }]); @@ -418,7 +420,9 @@ bucket_definitions: const querier = sync_rules.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; const lookups = querier.parameterQueryLookups; - expect(lookups).toEqual([ScopedParameterLookup.direct({ lookupName: 'by_public_workspace', queryId: '1' }, [])]); + expect(lookups).toEqual([ + ScopedParameterLookup.direct({ lookupName: 'by_public_workspace', queryId: '1', source: null as any }, []) + ]); const parameter_sets = await checkpoint.getParameterSets(lookups); parameter_sets.sort((a, b) => JSON.stringify(a).localeCompare(JSON.stringify(b))); @@ -521,8 +525,8 @@ bucket_definitions: const lookups = querier.parameterQueryLookups; expect(lookups).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '1' }, []), - ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '2' }, ['u1']) + ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '1', source: null as any }, []), + ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '2', source: null as any }, ['u1']) ]); const parameter_sets = await checkpoint.getParameterSets(lookups); diff --git a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts index 609b6f6fd..7d21ed8d6 100644 --- a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts @@ -43,7 +43,7 @@ bucket_definitions: await batch.commit('1/1'); }); - const lookup = ScopedParameterLookup.direct({ lookupName: 'test', queryId: '1' }, ['t1']); + const lookup = ScopedParameterLookup.direct({ lookupName: 'test', queryId: '1', source: null as any }, ['t1']); const checkpoint1 = await bucketStorage.getCheckpoint(); const parameters1 = await checkpoint1.getParameterSets([lookup]); @@ -155,7 +155,7 @@ bucket_definitions: await batch.commit('3/1'); }); - const lookup = ScopedParameterLookup.direct({ lookupName: 'test', queryId: '1' }, ['u1']); + const lookup = ScopedParameterLookup.direct({ lookupName: 'test', queryId: '1', source: null as any }, ['u1']); const checkpoint1 = await bucketStorage.getCheckpoint(); const parameters1 = await checkpoint1.getParameterSets([lookup]); diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index eaf1696ff..813855bb0 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -1,5 +1,10 @@ import { Logger, ObserverClient } from '@powersync/lib-services-framework'; -import { HydratedSyncRules, ScopedParameterLookup, SqliteJsonRow } from '@powersync/service-sync-rules'; +import { + BucketDataSource, + HydratedSyncRules, + ScopedParameterLookup, + SqliteJsonRow +} from '@powersync/service-sync-rules'; import * as util from '../util/util-index.js'; import { BucketStorageBatch, FlushedResult, SaveUpdate } from './BucketStorageBatch.js'; import { BucketStorageFactory } from './BucketStorageFactory.js'; @@ -115,7 +120,7 @@ export interface SyncRulesBucketStorage */ getBucketDataBatch( checkpoint: util.InternalOpId, - dataBuckets: Map, + dataBuckets: BucketDataRequest[], options?: BucketDataBatchOptions ): AsyncIterable; @@ -139,6 +144,12 @@ export interface SyncRulesBucketStorageListener { batchStarted: (batch: BucketStorageBatch) => void; } +export interface BucketDataRequest { + bucket: string; + start: util.InternalOpId; + source: BucketDataSource; +} + export interface SyncRuleStatus { checkpoint_lsn: string | null; active: boolean; diff --git a/packages/service-core/src/sync/BucketChecksumState.ts b/packages/service-core/src/sync/BucketChecksumState.ts index d9afa0efb..4b6a7b155 100644 --- a/packages/service-core/src/sync/BucketChecksumState.ts +++ b/packages/service-core/src/sync/BucketChecksumState.ts @@ -6,7 +6,8 @@ import { RequestedStream, RequestJwtPayload, RequestParameters, - ResolvedBucket + ResolvedBucket, + SOURCE } from '@powersync/service-sync-rules'; import * as storage from '../storage/storage-index.js'; @@ -207,8 +208,10 @@ export class BucketChecksumState { ...this.parameterState.translateResolvedBucket(bucketDescriptionMap.get(e.bucket)!, streamNameToIndex) })); bucketsToFetch = [...generateBucketsToFetch].map((b) => { + const description = bucketDescriptionMap.get(b); return { - priority: bucketDescriptionMap.get(b)!.priority, + priority: description!.priority, + [SOURCE]: description![SOURCE], bucket: b }; }); @@ -242,7 +245,7 @@ export class BucketChecksumState { message += `buckets: ${allBuckets.length} ${limitedBuckets(allBuckets, 20)}`; this.logger.info(message, { checkpoint: base.checkpoint, user_id: user_id, buckets: allBuckets.length }); }; - bucketsToFetch = allBuckets.map((b) => ({ bucket: b.bucket, priority: b.priority })); + bucketsToFetch = allBuckets.map((b) => ({ bucket: b.bucket, priority: b.priority, [SOURCE]: b[SOURCE] })); const subscriptions: util.StreamDescription[] = []; const streamNameToIndex = new Map(); @@ -319,17 +322,21 @@ export class BucketChecksumState { deferredLog(); }, - getFilteredBucketPositions: (buckets?: BucketDescription[]): Map => { + getFilteredBucketPositions: (buckets?: BucketDescription[]): storage.BucketDataRequest[] => { if (!hasAdvanced) { throw new ServiceAssertionError('Call line.advance() before getFilteredBucketPositions()'); } buckets ??= bucketsToFetch; - const filtered = new Map(); + const filtered: storage.BucketDataRequest[] = []; for (let bucket of buckets) { const state = this.bucketDataPositions.get(bucket.bucket); if (state) { - filtered.set(bucket.bucket, state.start_op_id); + filtered.push({ + bucket: bucket.bucket, + start: state.start_op_id, + source: bucket[SOURCE] + }); } } return filtered; @@ -617,7 +624,7 @@ export interface CheckpointLine { * * @param bucketsToFetch List of buckets to fetch - either this.bucketsToFetch, or a subset of it. Defaults to this.bucketsToFetch. */ - getFilteredBucketPositions(bucketsToFetch?: BucketDescription[]): Map; + getFilteredBucketPositions(bucketsToFetch?: BucketDescription[]): storage.BucketDataRequest[]; /** * Update the position of bucket data the client has, after it was sent to the client. @@ -668,7 +675,9 @@ function mergeBuckets(buckets: ResolvedBucket[]): ResolvedBucket[] { if (Object.hasOwn(byBucketId, bucket.bucket)) { byBucketId[bucket.bucket].inclusion_reasons.push(...bucket.inclusion_reasons); } else { - byBucketId[bucket.bucket] = structuredClone(bucket); + let clone = structuredClone(bucket); + clone[SOURCE] = bucket[SOURCE]; // structuredClone does not clone symbol-keyed properties + byBucketId[bucket.bucket] = clone; } } diff --git a/packages/service-core/src/util/protocol-types.ts b/packages/service-core/src/util/protocol-types.ts index 82baa89e5..0156c0f23 100644 --- a/packages/service-core/src/util/protocol-types.ts +++ b/packages/service-core/src/util/protocol-types.ts @@ -1,5 +1,5 @@ import { JsonContainer } from '@powersync/service-jsonbig'; -import { BucketPriority, SqliteJsonRow } from '@powersync/service-sync-rules'; +import { BucketDataScope, BucketPriority, SqliteJsonRow } from '@powersync/service-sync-rules'; import * as t from 'ts-codec'; export const BucketRequest = t.object({ diff --git a/packages/sync-rules/src/BucketDescription.ts b/packages/sync-rules/src/BucketDescription.ts index 8dd732f34..032bcd282 100644 --- a/packages/sync-rules/src/BucketDescription.ts +++ b/packages/sync-rules/src/BucketDescription.ts @@ -1,3 +1,6 @@ +import { BucketDataSource } from './index.js'; +import { SOURCE } from './utils.js'; + /** * The priority in which to synchronize buckets. * @@ -29,6 +32,8 @@ export interface BucketDescription { * The priority used to synchronize this bucket, derived from its definition. */ priority: BucketPriority; + + [SOURCE]: BucketDataSource; } /** diff --git a/packages/sync-rules/src/BucketSource.ts b/packages/sync-rules/src/BucketSource.ts index 482fb5f5b..c84e0bca1 100644 --- a/packages/sync-rules/src/BucketSource.ts +++ b/packages/sync-rules/src/BucketSource.ts @@ -1,8 +1,8 @@ import { BucketParameterQuerier, - UnscopedParameterLookup, PendingQueriers, - ScopedParameterLookup + ScopedParameterLookup, + UnscopedParameterLookup } from './BucketParameterQuerier.js'; import { ColumnDefinition } from './ExpressionType.js'; import { DEFAULT_HYDRATION_STATE, HydrationState, ParameterLookupScope } from './HydrationState.js'; @@ -10,18 +10,18 @@ import { SourceTableInterface } from './SourceTableInterface.js'; import { GetQuerierOptions } from './SqlSyncRules.js'; import { TablePattern } from './TablePattern.js'; import { + EvaluatedParameters, EvaluatedParametersResult, EvaluatedRow, EvaluateRowOptions, EvaluationResult, isEvaluationError, - UnscopedEvaluationResult, SourceSchema, SqliteRow, UnscopedEvaluatedParametersResult, - EvaluatedParameters + UnscopedEvaluationResult } from './types.js'; -import { buildBucketName } from './utils.js'; +import { buildBucketInfo, SOURCE } from './utils.js'; export interface CreateSourceParams { hydrationState: HydrationState; @@ -171,11 +171,13 @@ export function hydrateEvaluateRow(hydrationState: HydrationState, source: Bucke if (isEvaluationError(result)) { return result; } + const info = buildBucketInfo(scope, result.serializedBucketParameters); return { - bucket: buildBucketName(scope, result.serializedBucketParameters), + bucket: info.bucket, id: result.id, table: result.table, - data: result.data + data: result.data, + source: info[SOURCE] } satisfies EvaluatedRow; }); }; diff --git a/packages/sync-rules/src/HydratedSyncRules.ts b/packages/sync-rules/src/HydratedSyncRules.ts index 36ccb8a1f..06ac260cb 100644 --- a/packages/sync-rules/src/HydratedSyncRules.ts +++ b/packages/sync-rules/src/HydratedSyncRules.ts @@ -1,10 +1,6 @@ -import { Scope } from 'ajv/dist/compile/codegen/scope.js'; import { BucketDataSource, CreateSourceParams, HydratedBucketSource } from './BucketSource.js'; -import { BucketDataScope, ParameterLookupScope } from './HydrationState.js'; import { - ParameterIndexLookupCreator, BucketParameterQuerier, - buildBucketName, CompatibilityContext, EvaluatedParameters, EvaluatedRow, @@ -17,6 +13,7 @@ import { mergeBucketParameterQueriers, mergeDataSources, mergeParameterIndexLookupCreators, + ParameterIndexLookupCreator, QuerierError, ScopedEvaluateParameterRow, ScopedEvaluateRow, diff --git a/packages/sync-rules/src/HydrationState.ts b/packages/sync-rules/src/HydrationState.ts index f836a62b4..a42831760 100644 --- a/packages/sync-rules/src/HydrationState.ts +++ b/packages/sync-rules/src/HydrationState.ts @@ -3,12 +3,14 @@ import { BucketDataSource, ParameterIndexLookupCreator } from './BucketSource.js export interface BucketDataScope { /** The prefix is the bucket name before the parameters. */ bucketPrefix: string; + source: BucketDataSource; } export interface ParameterLookupScope { /** The lookup name + queryid is used to reference the parameter lookup record. */ lookupName: string; queryId: string; + source: ParameterIndexLookupCreator; } /** @@ -37,7 +39,8 @@ export interface HydrationState { export const DEFAULT_HYDRATION_STATE: HydrationState = { getBucketSourceScope(source: BucketDataSource) { return { - bucketPrefix: source.uniqueName + bucketPrefix: source.uniqueName, + source }; }, getParameterIndexLookupScope(source) { @@ -61,7 +64,8 @@ export function versionedHydrationState(version: number): HydrationState { return { getBucketSourceScope(source: BucketDataSource): BucketDataScope { return { - bucketPrefix: `${version}#${source.uniqueName}` + bucketPrefix: `${version}#${source.uniqueName}`, + source }; }, diff --git a/packages/sync-rules/src/SqlParameterQuery.ts b/packages/sync-rules/src/SqlParameterQuery.ts index 218724573..08b04e591 100644 --- a/packages/sync-rules/src/SqlParameterQuery.ts +++ b/packages/sync-rules/src/SqlParameterQuery.ts @@ -42,7 +42,7 @@ import { SqliteRow } from './types.js'; import { - buildBucketName, + buildBucketInfo, filterJsonRow, isJsonValue, isSelectStatement, @@ -337,7 +337,8 @@ export class SqlParameterQuery implements ParameterIndexLookupCreator { public get defaultLookupScope(): ParameterLookupScope { return { lookupName: this.descriptorName, - queryId: this.queryId + queryId: this.queryId, + source: this }; } @@ -442,7 +443,7 @@ export class SqlParameterQuery implements ParameterIndexLookupCreator { const serializedParameters = serializeBucketParameters(this.bucketParameters, result); return { - bucket: buildBucketName(bucketScope, serializedParameters), + ...buildBucketInfo(bucketScope, serializedParameters), priority: this.priority }; }) diff --git a/packages/sync-rules/src/StaticSqlParameterQuery.ts b/packages/sync-rules/src/StaticSqlParameterQuery.ts index 7a55dbb92..4a0255c61 100644 --- a/packages/sync-rules/src/StaticSqlParameterQuery.ts +++ b/packages/sync-rules/src/StaticSqlParameterQuery.ts @@ -10,7 +10,7 @@ import { AvailableTable, SqlTools } from './sql_filters.js'; import { checkUnsupportedFeatures, isClauseError, sqliteBool } from './sql_support.js'; import { TablePattern } from './TablePattern.js'; import { ParameterValueClause, QueryParseOptions, RequestParameters, SqliteJsonValue } from './types.js'; -import { buildBucketName, isJsonValue, serializeBucketParameters } from './utils.js'; +import { buildBucketInfo, isJsonValue, serializeBucketParameters } from './utils.js'; import { DetectRequestParameters } from './validators.js'; export interface StaticSqlParameterQueryOptions { @@ -228,7 +228,7 @@ export class StaticSqlParameterQuery { return [ { - bucket: buildBucketName(bucketSourceScope, serializedParamters), + ...buildBucketInfo(bucketSourceScope, serializedParamters), priority: this.priority } ]; diff --git a/packages/sync-rules/src/TableValuedFunctionSqlParameterQuery.ts b/packages/sync-rules/src/TableValuedFunctionSqlParameterQuery.ts index d4e9bfff7..1fa5a3008 100644 --- a/packages/sync-rules/src/TableValuedFunctionSqlParameterQuery.ts +++ b/packages/sync-rules/src/TableValuedFunctionSqlParameterQuery.ts @@ -23,7 +23,7 @@ import { SqliteJsonValue, SqliteRow } from './types.js'; -import { buildBucketName, isJsonValue, serializeBucketParameters } from './utils.js'; +import { buildBucketInfo, isJsonValue, serializeBucketParameters } from './utils.js'; import { DetectRequestParameters } from './validators.js'; export interface TableValuedFunctionSqlParameterQueryOptions { @@ -309,7 +309,7 @@ export class TableValuedFunctionSqlParameterQuery { const serializedBucketParameters = serializeBucketParameters(this.bucketParameters, result); return { - bucket: buildBucketName(bucketScope, serializedBucketParameters), + ...buildBucketInfo(bucketScope, serializedBucketParameters), priority: this.priority }; } diff --git a/packages/sync-rules/src/streams/filter.ts b/packages/sync-rules/src/streams/filter.ts index 4ef22eab6..e211f377e 100644 --- a/packages/sync-rules/src/streams/filter.ts +++ b/packages/sync-rules/src/streams/filter.ts @@ -540,10 +540,11 @@ export class SubqueryParameterLookupSource implements ParameterIndexLookupCreato private streamName: string ) {} - public get defaultLookupScope() { + public get defaultLookupScope(): ParameterLookupScope { return { lookupName: this.streamName, - queryId: this.defaultQueryId + queryId: this.defaultQueryId, + source: this }; } diff --git a/packages/sync-rules/src/streams/variant.ts b/packages/sync-rules/src/streams/variant.ts index faa448f6f..44f910d63 100644 --- a/packages/sync-rules/src/streams/variant.ts +++ b/packages/sync-rules/src/streams/variant.ts @@ -4,9 +4,9 @@ import { BucketDataSource, BucketParameterQuerierSource, ParameterIndexLookupCre import { BucketDataScope } from '../HydrationState.js'; import { CreateSourceParams, GetQuerierOptions, RequestedStream, ScopedParameterLookup } from '../index.js'; import { RequestParameters, SqliteJsonValue, TableRow } from '../types.js'; -import { buildBucketName, isJsonValue, JSONBucketNameSerialize } from '../utils.js'; +import { buildBucketInfo, isJsonValue, JSONBucketNameSerialize } from '../utils.js'; import { BucketParameter, SubqueryEvaluator } from './parameter.js'; -import { SyncStream, SyncStreamDataSource } from './stream.js'; +import { SyncStream } from './stream.js'; import { cartesianProduct } from './utils.js'; /** @@ -295,7 +295,7 @@ export class StreamVariant { return { definition: stream.name, inclusion_reasons: [reason], - bucket: buildBucketName(bucketScope, this.serializeBucketParameters(instantiation)), + ...buildBucketInfo(bucketScope, this.serializeBucketParameters(instantiation)), priority: stream.priority }; } diff --git a/packages/sync-rules/src/types.ts b/packages/sync-rules/src/types.ts index 44bc7ee30..242ac52e8 100644 --- a/packages/sync-rules/src/types.ts +++ b/packages/sync-rules/src/types.ts @@ -8,7 +8,8 @@ import { SourceTableInterface } from './SourceTableInterface.js'; import { SyncRulesOptions } from './SqlSyncRules.js'; import { TablePattern } from './TablePattern.js'; import { CustomSqliteValue } from './types/custom_sqlite_value.js'; -import { toSyncRulesParameters } from './utils.js'; +import { SOURCE, toSyncRulesParameters } from './utils.js'; +import { BucketDataSource } from './index.js'; export interface QueryParseOptions extends SyncRulesOptions { accept_potentially_dangerous_queries?: boolean; @@ -58,6 +59,11 @@ export interface EvaluatedRow { /** Must be JSON-serializable. */ data: SqliteJsonRow; + + /** + * Source for the evaluated row. + */ + source: BucketDataSource; } /** diff --git a/packages/sync-rules/src/utils.ts b/packages/sync-rules/src/utils.ts index cd5ed1566..6f680714b 100644 --- a/packages/sync-rules/src/utils.ts +++ b/packages/sync-rules/src/utils.ts @@ -15,12 +15,28 @@ import { SqliteValue } from './types.js'; import { CustomArray, CustomObject, CustomSqliteValue } from './types/custom_sqlite_value.js'; +import { BucketDataSource } from './BucketSource.js'; export function isSelectStatement(q: Statement): q is SelectFromStatement { return q.type == 'select'; } -export function buildBucketName(scope: BucketDataScope, serializedParameters: string): string { +export const SOURCE = Symbol.for('BucketSourceStorage'); + +export function buildBucketInfo( + scope: BucketDataScope, + serializedParameters: string +): { bucket: string; [SOURCE]: BucketDataSource } { + if (scope.source == null) { + throw new Error('foooo'); + } + return { + bucket: scope.bucketPrefix + serializedParameters, + [SOURCE]: scope.source + }; +} + +function buildBucketName(scope: BucketDataScope, serializedParameters: string): string { return scope.bucketPrefix + serializedParameters; } diff --git a/packages/sync-rules/test/src/streams.test.ts b/packages/sync-rules/test/src/streams.test.ts index dc8b59b85..e372090b2 100644 --- a/packages/sync-rules/test/src/streams.test.ts +++ b/packages/sync-rules/test/src/streams.test.ts @@ -29,11 +29,13 @@ import { normalizeQuerierOptions, PARSE_OPTIONS, TestSourceTable } from './util. describe('streams', () => { const STREAM_0: ParameterLookupScope = { lookupName: 'stream', - queryId: '0' + queryId: '0', + source: null as any }; const STREAM_1: ParameterLookupScope = { lookupName: 'stream', - queryId: '1' + queryId: '1', + source: null as any }; test('refuses edition: 1', () => { @@ -761,7 +763,7 @@ describe('streams', () => { parameters: {}, getParameterSets(lookups) { expect(lookups).toStrictEqual([ - ScopedParameterLookup.direct({ lookupName: 'account_member', queryId: '0' }, ['id']) + ScopedParameterLookup.direct(stream.parameterIndexLookupCreators[0].defaultLookupScope, ['id']) ]); return [{ result: 'account_id' }]; } @@ -877,12 +879,13 @@ WHERE const hydrationState: HydrationState = { getBucketSourceScope(source) { - return { bucketPrefix: `${source.uniqueName}.test` }; + return { bucketPrefix: `${source.uniqueName}.test`, source }; }, getParameterIndexLookupScope(source) { return { lookupName: `${source.defaultLookupScope.lookupName}.test`, - queryId: `${source.defaultLookupScope.queryId}.test` + queryId: `${source.defaultLookupScope.queryId}.test`, + source }; } }; @@ -901,7 +904,10 @@ WHERE }) ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test' }, ['u1']), + lookup: ScopedParameterLookup.direct( + { lookupName: 'stream.test', queryId: '0.test', source: desc.parameterIndexLookupCreators[0] }, + ['u1'] + ), bucketParameters: [ { result: 'i1' @@ -910,7 +916,10 @@ WHERE }, { - lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '1.test' }, ['myname']), + lookup: ScopedParameterLookup.direct( + { lookupName: 'stream.test', queryId: '1.test', source: desc.parameterIndexLookupCreators[1] }, + ['myname'] + ), bucketParameters: [ { result: 'i1' @@ -926,7 +935,10 @@ WHERE }) ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test' }, ['u1']), + lookup: ScopedParameterLookup.direct( + { lookupName: 'stream.test', queryId: '0.test', source: desc.parameterIndexLookupCreators[0] }, + ['u1'] + ), bucketParameters: [ { result: 'i1' diff --git a/packages/sync-rules/test/src/sync_rules.test.ts b/packages/sync-rules/test/src/sync_rules.test.ts index 80a73bce8..e85949da8 100644 --- a/packages/sync-rules/test/src/sync_rules.test.ts +++ b/packages/sync-rules/test/src/sync_rules.test.ts @@ -109,7 +109,7 @@ bucket_definitions: expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 1 })).toEqual([ { bucketParameters: [{}], - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket', queryId: '1' }, ['user1']) + lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket', queryId: '1', source: null as any }, ['user1']) } ]); expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 0 })).toEqual([]); @@ -179,12 +179,13 @@ bucket_definitions: ); const hydrationState: HydrationState = { getBucketSourceScope(source) { - return { bucketPrefix: `${source.uniqueName}-test` }; + return { bucketPrefix: `${source.uniqueName}-test`, source }; }, getParameterIndexLookupScope(source) { return { lookupName: `${source.defaultLookupScope.lookupName}.test`, - queryId: `${source.defaultLookupScope.queryId}.test` + queryId: `${source.defaultLookupScope.queryId}.test`, + source }; } }; @@ -202,13 +203,19 @@ bucket_definitions: } ]); expect(querier.querier.parameterQueryLookups).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test' }, ['user1']) + ScopedParameterLookup.direct( + { lookupName: 'mybucket.test', queryId: '2.test', source: rules.bucketParameterLookupSources[1] }, + ['user1'] + ) ]); expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 1 })).toEqual([ { bucketParameters: [{ user_id: 'user1' }], - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test' }, ['user1']) + lookup: ScopedParameterLookup.direct( + { lookupName: 'mybucket.test', queryId: '2.test', source: rules.bucketParameterLookupSources[1] }, + ['user1'] + ) } ]); @@ -1027,10 +1034,19 @@ bucket_definitions: expect(hydrated.getBucketParameterQuerier(normalizeQuerierOptions({ user_id: 'user1' })).querier).toMatchObject({ hasDynamicBuckets: true, parameterQueryLookups: [ - ScopedParameterLookup.direct({ lookupName: 'mybucket', queryId: '2' }, ['user1']), - ScopedParameterLookup.direct({ lookupName: 'by_list', queryId: '1' }, ['user1']), + ScopedParameterLookup.direct( + { lookupName: 'mybucket', queryId: '2', source: rules.bucketParameterLookupSources[1] }, + ['user1'] + ), + ScopedParameterLookup.direct( + { lookupName: 'by_list', queryId: '1', source: rules.bucketParameterLookupSources[2] }, + ['user1'] + ), // These are not filtered out yet, due to how the lookups are structured internally - ScopedParameterLookup.direct({ lookupName: 'admin_only', queryId: '1' }, [1]) + ScopedParameterLookup.direct( + { lookupName: 'admin_only', queryId: '1', source: rules.bucketParameterLookupSources[3] }, + [1] + ) ], staticBuckets: [ { diff --git a/packages/sync-rules/test/src/table_valued_function_queries.test.ts b/packages/sync-rules/test/src/table_valued_function_queries.test.ts index 306fb6935..5ea6572a1 100644 --- a/packages/sync-rules/test/src/table_valued_function_queries.test.ts +++ b/packages/sync-rules/test/src/table_valued_function_queries.test.ts @@ -1,5 +1,6 @@ import { describe, expect, test } from 'vitest'; import { + BucketDataScope, CompatibilityContext, CompatibilityEdition, CompatibilityOption, @@ -10,6 +11,13 @@ import { StaticSqlParameterQuery } from '../../src/StaticSqlParameterQuery.js'; import { EMPTY_DATA_SOURCE, PARSE_OPTIONS } from './util.js'; describe('table-valued function queries', () => { + function scope(q: StaticSqlParameterQuery): BucketDataScope { + return { + bucketPrefix: 'mybucket', + source: q.querierDataSource + }; + } + test('json_each(array param)', function () { const sql = "SELECT json_each.value as v FROM json_each(request.parameters() -> 'array')"; const query = SqlParameterQuery.fromSql( @@ -26,9 +34,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3, null] }), { - bucketPrefix: 'mybucket' - }) + query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3, null] }), scope(query)) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, { bucket: 'mybucket[2]', priority: 3 }, @@ -57,9 +63,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3, null] }), { - bucketPrefix: 'mybucket' - }) + query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3, null] }), scope(query)) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, { bucket: 'mybucket[2]', priority: 3 }, @@ -80,11 +84,7 @@ describe('table-valued function queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['v']); - expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), { - bucketPrefix: 'mybucket' - }) - ).toEqual([ + expect(query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), scope(query))).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, { bucket: 'mybucket[2]', priority: 3 }, { bucket: 'mybucket[3]', priority: 3 } @@ -103,11 +103,7 @@ describe('table-valued function queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['v']); - expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), { - bucketPrefix: 'mybucket' - }) - ).toEqual([]); + expect(query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), scope(query))).toEqual([]); }); test('json_each(array param not present)', function () { @@ -125,11 +121,7 @@ describe('table-valued function queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['v']); - expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), { - bucketPrefix: 'mybucket' - }) - ).toEqual([]); + expect(query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), scope(query))).toEqual([]); }); test('json_each(array param not present, ifnull)', function () { @@ -147,11 +139,7 @@ describe('table-valued function queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['v']); - expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), { - bucketPrefix: 'mybucket' - }) - ).toEqual([]); + expect(query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), scope(query))).toEqual([]); }); test('json_each on json_keys', function () { @@ -166,11 +154,7 @@ describe('table-valued function queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['value']); - expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), { - bucketPrefix: 'mybucket' - }) - ).toEqual([ + expect(query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), scope(query))).toEqual([ { bucket: 'mybucket["a"]', priority: 3 }, { bucket: 'mybucket["b"]', priority: 3 }, { bucket: 'mybucket["c"]', priority: 3 } @@ -193,9 +177,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['value']); expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3] }), { - bucketPrefix: 'mybucket' - }) + query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3] }), scope(query)) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, { bucket: 'mybucket[2]', priority: 3 }, @@ -219,9 +201,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['value']); expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3] }), { - bucketPrefix: 'mybucket' - }) + query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3] }), scope(query)) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, { bucket: 'mybucket[2]', priority: 3 }, @@ -245,9 +225,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3] }), { - bucketPrefix: 'mybucket' - }) + query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3] }), scope(query)) ).toEqual([ { bucket: 'mybucket[2]', priority: 3 }, { bucket: 'mybucket[3]', priority: 3 } @@ -282,9 +260,7 @@ describe('table-valued function queries', () => { }, {} ), - { - bucketPrefix: 'mybucket' - } + scope(query) ) ).toEqual([{ bucket: 'mybucket[1]', priority: 3 }]); }); From 24c5484ffd9bb5232adf62bb40fc9e6778dd7c36 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 12 Jan 2026 16:22:13 +0200 Subject: [PATCH 016/101] Fix sync rules tests. --- packages/sync-rules/src/utils.ts | 2 +- .../sync-rules/test/src/compatibility.test.ts | 164 +++++++++------ .../test/src/parameter_queries.test.ts | 59 ++++-- .../test/src/static_parameter_queries.test.ts | 169 +++++++++------ packages/sync-rules/test/src/streams.test.ts | 4 +- .../sync-rules/test/src/sync_rules.test.ts | 199 +++++++++++------- .../src/table_valued_function_queries.test.ts | 58 +++-- packages/sync-rules/test/src/util.ts | 21 ++ 8 files changed, 427 insertions(+), 249 deletions(-) diff --git a/packages/sync-rules/src/utils.ts b/packages/sync-rules/src/utils.ts index 6f680714b..e9e295bac 100644 --- a/packages/sync-rules/src/utils.ts +++ b/packages/sync-rules/src/utils.ts @@ -28,7 +28,7 @@ export function buildBucketInfo( serializedParameters: string ): { bucket: string; [SOURCE]: BucketDataSource } { if (scope.source == null) { - throw new Error('foooo'); + throw new Error('source is required'); } return { bucket: scope.bucketPrefix + serializedParameters, diff --git a/packages/sync-rules/test/src/compatibility.test.ts b/packages/sync-rules/test/src/compatibility.test.ts index 11068de01..53d2149b6 100644 --- a/packages/sync-rules/test/src/compatibility.test.ts +++ b/packages/sync-rules/test/src/compatibility.test.ts @@ -2,7 +2,7 @@ import { describe, expect, test } from 'vitest'; import { DateTimeValue, SqlSyncRules, TimeValuePrecision, toSyncRulesValue } from '../../src/index.js'; import { versionedHydrationState } from '../../src/HydrationState.js'; -import { ASSETS, normalizeQuerierOptions, PARSE_OPTIONS } from './util.js'; +import { ASSETS, normalizeQuerierOptions, PARSE_OPTIONS, removeSource, removeSourceSymbol } from './util.js'; describe('compatibility options', () => { describe('timestamps', () => { @@ -23,13 +23,15 @@ bucket_definitions: ).hydrate(); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: value + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: value + }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: 'mybucket[]', data: { description: '2025-08-19 09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); @@ -50,13 +52,15 @@ config: ).hydrate(); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: value + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: value + }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: 'mybucket[]', data: { description: '2025-08-19T09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); @@ -77,18 +81,24 @@ config: ).hydrate({ hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: value + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: value + }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: '1#stream|0[]', data: { description: '2025-08-19T09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); - expect(rules.getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})).querier.staticBuckets).toStrictEqual([ + expect( + rules + .getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})) + .querier.staticBuckets.map(removeSourceSymbol) + ).toStrictEqual([ { bucket: '1#stream|0[]', definition: 'stream', @@ -115,19 +125,25 @@ config: ).hydrate({ hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: value + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: value + }) }) - }) + .map(removeSource) ).toStrictEqual([ - { bucket: 'stream|0[]', data: { description: '2025-08-19 09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } + { bucket: '1#stream|0[]', data: { description: '2025-08-19 09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); - expect(rules.getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})).querier.staticBuckets).toStrictEqual([ + expect( + rules + .getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})) + .querier.staticBuckets.map(removeSourceSymbol) + ).toStrictEqual([ { - bucket: 'stream|0[]', + bucket: '1#stream|0[]', definition: 'stream', inclusion_reasons: ['default'], priority: 3 @@ -152,13 +168,15 @@ config: ).hydrate({ hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: { - id: 'id', - description: 'desc' - } - }) + rules + .evaluateRow({ + sourceTable: ASSETS, + record: { + id: 'id', + description: 'desc' + } + }) + .map(removeSource) ).toStrictEqual([{ bucket: '1#mybucket[]', data: { description: 'desc', id: 'id' }, id: 'id', table: 'assets' }]); }); @@ -176,16 +194,18 @@ config: ).hydrate({ hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: new DateTimeValue('2025-08-19T09:21:00Z', undefined, { - subSecondPrecision: TimeValuePrecision.seconds, - defaultSubSecondPrecision: TimeValuePrecision.seconds + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: new DateTimeValue('2025-08-19T09:21:00Z', undefined, { + subSecondPrecision: TimeValuePrecision.seconds, + defaultSubSecondPrecision: TimeValuePrecision.seconds + }) }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: '1#stream|0[]', data: { description: '2025-08-19T09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); @@ -206,13 +226,15 @@ bucket_definitions: ).hydrate(); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: { - id: 'id', - description: description - } - }) + rules + .evaluateRow({ + sourceTable: ASSETS, + record: { + id: 'id', + description: description + } + }) + .map(removeSource) ).toStrictEqual([{ bucket: 'a[]', data: { desc: 'baz', id: 'id' }, id: 'id', table: 'assets' }]); }); @@ -230,13 +252,15 @@ config: ).hydrate(); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: { - id: 'id', - description: description - } - }) + rules + .evaluateRow({ + sourceTable: ASSETS, + record: { + id: 'id', + description: description + } + }) + .map(removeSource) ).toStrictEqual([{ bucket: 'a[]', data: { desc: null, id: 'id' }, id: 'id', table: 'assets' }]); }); }); @@ -286,16 +310,18 @@ config: hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: data + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: data + }) }) - }) + .map(removeSource) ).toStrictEqual([ { - bucket: withFixedQuirk ? '1#mybucket[]' : 'mybucket[]', + bucket: '1#mybucket[]', data: { description: withFixedQuirk ? '["static value","2025-08-19T09:21:00Z"]' @@ -307,9 +333,13 @@ config: } ]); - expect(rules.getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})).querier.staticBuckets).toStrictEqual([ + expect( + rules + .getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})) + .querier.staticBuckets.map(removeSourceSymbol) + ).toStrictEqual([ { - bucket: withFixedQuirk ? '1#mybucket[]' : 'mybucket[]', + bucket: '1#mybucket[]', definition: 'mybucket', inclusion_reasons: ['default'], priority: 3 diff --git a/packages/sync-rules/test/src/parameter_queries.test.ts b/packages/sync-rules/test/src/parameter_queries.test.ts index 81c5f690a..6dbdc3bc6 100644 --- a/packages/sync-rules/test/src/parameter_queries.test.ts +++ b/packages/sync-rules/test/src/parameter_queries.test.ts @@ -12,7 +12,13 @@ import { mergeParameterIndexLookupCreators } from '../../src/index.js'; import { StaticSqlParameterQuery } from '../../src/StaticSqlParameterQuery.js'; -import { BASIC_SCHEMA, EMPTY_DATA_SOURCE, normalizeTokenParameters, PARSE_OPTIONS } from './util.js'; +import { + BASIC_SCHEMA, + EMPTY_DATA_SOURCE, + normalizeTokenParameters, + PARSE_OPTIONS, + removeSourceSymbol +} from './util.js'; import { HydrationState } from '../../src/HydrationState.js'; describe('parameter queries', () => { @@ -124,15 +130,21 @@ describe('parameter queries', () => { // We _do_ need to care about the bucket string representation. expect( - query.resolveBucketDescriptions([{ int1: 314, float1: 3.14, float2: 314 }], normalizeTokenParameters({}), { - bucketPrefix: 'mybucket' - }) + query + .resolveBucketDescriptions([{ int1: 314, float1: 3.14, float2: 314 }], normalizeTokenParameters({}), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE + }) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[314,3.14,314]', priority: 3 }]); expect( - query.resolveBucketDescriptions([{ int1: 314n, float1: 3.14, float2: 314 }], normalizeTokenParameters({}), { - bucketPrefix: 'mybucket' - }) + query + .resolveBucketDescriptions([{ int1: 314n, float1: 3.14, float2: 314 }], normalizeTokenParameters({}), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE + }) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[314,3.14,314]', priority: 3 }]); }); @@ -496,11 +508,13 @@ describe('parameter queries', () => { ]); expect( - query.resolveBucketDescriptions( - [{ user_id: 'user1' }], - normalizeTokenParameters({ user_id: 'user1', is_admin: true }), - { bucketPrefix: 'mybucket' } - ) + query + .resolveBucketDescriptions( + [{ user_id: 'user1' }], + normalizeTokenParameters({ user_id: 'user1', is_admin: true }), + { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE } + ) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket["user1",1]', priority: 3 }]); }); @@ -870,12 +884,13 @@ describe('parameter queries', () => { describe('custom hydrationState', function () { const hydrationState: HydrationState = { getBucketSourceScope(source) { - return { bucketPrefix: `${source.uniqueName}-test` }; + return { bucketPrefix: `${source.uniqueName}-test`, source: EMPTY_DATA_SOURCE }; }, getParameterIndexLookupScope(source) { return { lookupName: `${source.defaultLookupScope.lookupName}.test`, - queryId: `${source.defaultLookupScope.queryId}.test` + queryId: `${source.defaultLookupScope.queryId}.test`, + source }; } }; @@ -903,13 +918,17 @@ describe('parameter queries', () => { }); expect(result).toEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test' }, ['test-user']), + lookup: ScopedParameterLookup.direct( + { lookupName: 'mybucket.test', queryId: 'myquery.test', source: query }, + ['test-user'] + ), bucketParameters: [{ group_id: 'group1' }] }, { - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test' }, [ - 'other-user' - ]), + lookup: ScopedParameterLookup.direct( + { lookupName: 'mybucket.test', queryId: 'myquery.test', source: query }, + ['other-user'] + ), bucketParameters: [{ group_id: 'group1' }] } ]); @@ -941,7 +960,9 @@ describe('parameter queries', () => { const querier = queriers[0]; expect(querier.parameterQueryLookups).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test' }, ['test-user']) + ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test', source: query }, [ + 'test-user' + ]) ]); }); }); diff --git a/packages/sync-rules/test/src/static_parameter_queries.test.ts b/packages/sync-rules/test/src/static_parameter_queries.test.ts index 62566d4eb..3453f82e2 100644 --- a/packages/sync-rules/test/src/static_parameter_queries.test.ts +++ b/packages/sync-rules/test/src/static_parameter_queries.test.ts @@ -9,11 +9,18 @@ import { SqlParameterQuery } from '../../src/index.js'; import { StaticSqlParameterQuery } from '../../src/StaticSqlParameterQuery.js'; -import { EMPTY_DATA_SOURCE, normalizeTokenParameters, PARSE_OPTIONS } from './util.js'; +import { + EMPTY_DATA_SOURCE, + normalizeTokenParameters, + PARSE_OPTIONS, + removeSource, + removeSourceSymbol +} from './util.js'; describe('static parameter queries', () => { const MYBUCKET_SCOPE: BucketDataScope = { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }; test('basic query', function () { @@ -27,9 +34,11 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect(query.bucketParameters!).toEqual(['user_id']); - expect(query.getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }), MYBUCKET_SCOPE)).toEqual([ - { bucket: 'mybucket["user1"]', priority: 3 } - ]); + expect( + query + .getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }), MYBUCKET_SCOPE) + .map(removeSourceSymbol) + ).toEqual([{ bucket: 'mybucket["user1"]', priority: 3 }]); }); test('uses bucketPrefix', function () { @@ -44,9 +53,12 @@ describe('static parameter queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters!).toEqual(['user_id']); expect( - query.getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }), { - bucketPrefix: '1#mybucket' - }) + query + .getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }), { + bucketPrefix: '1#mybucket', + source: EMPTY_DATA_SOURCE + }) + .map(removeSourceSymbol) ).toEqual([{ bucket: '1#mybucket["user1"]', priority: 3 }]); }); @@ -61,9 +73,11 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect(query.bucketParameters!).toEqual([]); - expect(query.getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }), MYBUCKET_SCOPE)).toEqual([ - { bucket: 'mybucket[]', priority: 3 } - ]); + expect( + query + .getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }), MYBUCKET_SCOPE) + .map(removeSourceSymbol) + ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); }); test('query with filter', function () { @@ -77,10 +91,14 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1', is_admin: true }), MYBUCKET_SCOPE) + query + .getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1', is_admin: true }), MYBUCKET_SCOPE) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket["user1"]', priority: 3 }]); expect( - query.getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1', is_admin: false }), MYBUCKET_SCOPE) + query + .getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1', is_admin: false }), MYBUCKET_SCOPE) + .map(removeSourceSymbol) ).toEqual([]); }); @@ -94,9 +112,11 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }), MYBUCKET_SCOPE)).toEqual([ - { bucket: 'mybucket["USER1"]', priority: 3 } - ]); + expect( + query + .getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }), MYBUCKET_SCOPE) + .map(removeSourceSymbol) + ).toEqual([{ bucket: 'mybucket["USER1"]', priority: 3 }]); expect(query.bucketParameters!).toEqual(['upper_id']); }); @@ -110,10 +130,16 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(normalizeTokenParameters({ role: 'admin' }), MYBUCKET_SCOPE)).toEqual([ - { bucket: 'mybucket[]', priority: 3 } - ]); - expect(query.getStaticBucketDescriptions(normalizeTokenParameters({ role: 'user' }), MYBUCKET_SCOPE)).toEqual([]); + expect( + query + .getStaticBucketDescriptions(normalizeTokenParameters({ role: 'admin' }), MYBUCKET_SCOPE) + .map(removeSourceSymbol) + ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); + expect( + query + .getStaticBucketDescriptions(normalizeTokenParameters({ role: 'user' }), MYBUCKET_SCOPE) + .map(removeSourceSymbol) + ).toEqual([]); }); test('comparison in filter clause', function () { @@ -127,10 +153,14 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions(normalizeTokenParameters({ id1: 't1', id2: 't1' }), MYBUCKET_SCOPE) + query + .getStaticBucketDescriptions(normalizeTokenParameters({ id1: 't1', id2: 't1' }), MYBUCKET_SCOPE) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); expect( - query.getStaticBucketDescriptions(normalizeTokenParameters({ id1: 't1', id2: 't2' }), MYBUCKET_SCOPE) + query + .getStaticBucketDescriptions(normalizeTokenParameters({ id1: 't1', id2: 't2' }), MYBUCKET_SCOPE) + .map(removeSourceSymbol) ).toEqual([]); }); @@ -148,9 +178,11 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(normalizeTokenParameters({}, { org_id: 'test' }), MYBUCKET_SCOPE)).toEqual( - [{ bucket: 'mybucket["test"]', priority: 3 }] - ); + expect( + query + .getStaticBucketDescriptions(normalizeTokenParameters({}, { org_id: 'test' }), MYBUCKET_SCOPE) + .map(removeSourceSymbol) + ).toEqual([{ bucket: 'mybucket["test"]', priority: 3 }]); }); test('request.jwt()', function () { @@ -165,9 +197,11 @@ describe('static parameter queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['user_id']); - expect(query.getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }), MYBUCKET_SCOPE)).toEqual([ - { bucket: 'mybucket["user1"]', priority: 3 } - ]); + expect( + query + .getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }), MYBUCKET_SCOPE) + .map(removeSourceSymbol) + ).toEqual([{ bucket: 'mybucket["user1"]', priority: 3 }]); }); test('request.user_id()', function () { @@ -182,9 +216,11 @@ describe('static parameter queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['user_id']); - expect(query.getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }), MYBUCKET_SCOPE)).toEqual([ - { bucket: 'mybucket["user1"]', priority: 3 } - ]); + expect( + query + .getStaticBucketDescriptions(normalizeTokenParameters({ user_id: 'user1' }), MYBUCKET_SCOPE) + .map(removeSourceSymbol) + ).toEqual([{ bucket: 'mybucket["user1"]', priority: 3 }]); }); test('static value', function () { @@ -197,9 +233,9 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), MYBUCKET_SCOPE)).toEqual([ - { bucket: 'mybucket[]', priority: 3 } - ]); + expect( + query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), MYBUCKET_SCOPE).map(removeSourceSymbol) + ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); }); test('static expression (1)', function () { @@ -212,9 +248,9 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), MYBUCKET_SCOPE)).toEqual([ - { bucket: 'mybucket[]', priority: 3 } - ]); + expect( + query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), MYBUCKET_SCOPE).map(removeSourceSymbol) + ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); }); test('static expression (2)', function () { @@ -240,9 +276,9 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), MYBUCKET_SCOPE)).toEqual([ - { bucket: 'mybucket[]', priority: 3 } - ]); + expect( + query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), MYBUCKET_SCOPE).map(removeSourceSymbol) + ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); }); test('IN for permissions in request.jwt() (1)', function () { @@ -257,16 +293,20 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions( - new RequestParameters({ sub: '', permissions: ['write', 'read:users'] }, {}), - MYBUCKET_SCOPE - ) + query + .getStaticBucketDescriptions( + new RequestParameters({ sub: '', permissions: ['write', 'read:users'] }, {}), + MYBUCKET_SCOPE + ) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[1]', priority: 3 }]); expect( - query.getStaticBucketDescriptions( - new RequestParameters({ sub: '', permissions: ['write', 'write:users'] }, {}), - MYBUCKET_SCOPE - ) + query + .getStaticBucketDescriptions( + new RequestParameters({ sub: '', permissions: ['write', 'write:users'] }, {}), + MYBUCKET_SCOPE + ) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[0]', priority: 3 }]); }); @@ -282,16 +322,20 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions( - new RequestParameters({ sub: '', permissions: ['write', 'read:users'] }, {}), - MYBUCKET_SCOPE - ) + query + .getStaticBucketDescriptions( + new RequestParameters({ sub: '', permissions: ['write', 'read:users'] }, {}), + MYBUCKET_SCOPE + ) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); expect( - query.getStaticBucketDescriptions( - new RequestParameters({ sub: '', permissions: ['write', 'write:users'] }, {}), - MYBUCKET_SCOPE - ) + query + .getStaticBucketDescriptions( + new RequestParameters({ sub: '', permissions: ['write', 'write:users'] }, {}), + MYBUCKET_SCOPE + ) + .map(removeSourceSymbol) ).toEqual([]); }); @@ -306,10 +350,14 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '', role: 'superuser' }, {}), MYBUCKET_SCOPE) + query + .getStaticBucketDescriptions(new RequestParameters({ sub: '', role: 'superuser' }, {}), MYBUCKET_SCOPE) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '', role: 'superadmin' }, {}), MYBUCKET_SCOPE) + query + .getStaticBucketDescriptions(new RequestParameters({ sub: '', role: 'superadmin' }, {}), MYBUCKET_SCOPE) + .map(removeSourceSymbol) ).toEqual([]); }); @@ -387,12 +435,13 @@ describe('static parameter queries', () => { const hydrationState: HydrationState = { getBucketSourceScope(source) { - return { bucketPrefix: `${source.uniqueName}-test` }; + return { bucketPrefix: `${source.uniqueName}-test`, source }; }, getParameterIndexLookupScope(source) { return { lookupName: `${source.defaultLookupScope.lookupName}.test`, - queryId: `${source.defaultLookupScope.queryId}.test` + queryId: `${source.defaultLookupScope.queryId}.test`, + source }; } }; diff --git a/packages/sync-rules/test/src/streams.test.ts b/packages/sync-rules/test/src/streams.test.ts index e372090b2..4935f177d 100644 --- a/packages/sync-rules/test/src/streams.test.ts +++ b/packages/sync-rules/test/src/streams.test.ts @@ -24,7 +24,7 @@ import { syncStreamFromSql, ScopedParameterLookup } from '../../src/index.js'; -import { normalizeQuerierOptions, PARSE_OPTIONS, TestSourceTable } from './util.js'; +import { normalizeQuerierOptions, PARSE_OPTIONS, removeSourceSymbol, TestSourceTable } from './util.js'; describe('streams', () => { const STREAM_0: ParameterLookupScope = { @@ -92,7 +92,7 @@ describe('streams', () => { normalizeQuerierOptions({ test: 'foo' }, {}, { stream: [{ opaque_id: 0, parameters: null }] }) ); - expect(mergeBucketParameterQueriers(queriers).staticBuckets).toEqual([ + expect(mergeBucketParameterQueriers(queriers).staticBuckets.map(removeSourceSymbol)).toEqual([ { bucket: '1#stream|0["foo"]', definition: 'stream', diff --git a/packages/sync-rules/test/src/sync_rules.test.ts b/packages/sync-rules/test/src/sync_rules.test.ts index e85949da8..5bdc069ff 100644 --- a/packages/sync-rules/test/src/sync_rules.test.ts +++ b/packages/sync-rules/test/src/sync_rules.test.ts @@ -11,7 +11,9 @@ import { TestSourceTable, USERS, normalizeQuerierOptions, - normalizeTokenParameters + normalizeTokenParameters, + removeSource, + removeSourceSymbol } from './util.js'; describe('sync rules', () => { @@ -41,10 +43,12 @@ bucket_definitions: expect(dataQuery.bucketParameters).toEqual([]); expect(dataQuery.columnOutputNames()).toEqual(['id', 'description']); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset1', description: 'test' } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset1', description: 'test' } + }) + .map(removeSource) ).toEqual([ { table: 'assets', @@ -130,17 +134,20 @@ bucket_definitions: const bucketData = rules.bucketDataSources[0]; expect(bucketData.bucketParameters).toEqual(['user_id', 'device_id']); expect( - hydrated.getBucketParameterQuerier(normalizeQuerierOptions({ user_id: 'user1' }, { device_id: 'device1' })) - .querier.staticBuckets + hydrated + .getBucketParameterQuerier(normalizeQuerierOptions({ user_id: 'user1' }, { device_id: 'device1' })) + .querier.staticBuckets.map(removeSourceSymbol) ).toEqual([ { bucket: 'mybucket["user1","device1"]', definition: 'mybucket', inclusion_reasons: ['default'], priority: 3 } ]); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset1', description: 'test', user_id: 'user1', device_id: 'device1' } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset1', description: 'test', user_id: 'user1', device_id: 'device1' } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket["user1","device1"]', @@ -153,10 +160,12 @@ bucket_definitions: } ]); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset1', description: 'test', user_id: 'user1', archived: 1, device_id: 'device1' } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset1', description: 'test', user_id: 'user1', archived: 1, device_id: 'device1' } + }) + .map(removeSource) ).toEqual([]); }); @@ -194,7 +203,7 @@ bucket_definitions: normalizeQuerierOptions({ user_id: 'user1' }, { device_id: 'device1' }) ); expect(querier.errors).toEqual([]); - expect(querier.querier.staticBuckets).toEqual([ + expect(querier.querier.staticBuckets.map(removeSourceSymbol)).toEqual([ { bucket: 'mybucket-test["user1"]', definition: 'mybucket', @@ -220,10 +229,12 @@ bucket_definitions: ]); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset1', description: 'test', user_id: 'user1', device_id: 'device1' } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset1', description: 'test', user_id: 'user1', device_id: 'device1' } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket-test["user1"]', @@ -252,14 +263,18 @@ bucket_definitions: const bucketData = rules.bucketDataSources[0]; expect(bucketData.bucketParameters).toEqual(['user_id']); expect( - hydrated.getBucketParameterQuerier(normalizeQuerierOptions({ user_id: 'user1' })).querier.staticBuckets + hydrated + .getBucketParameterQuerier(normalizeQuerierOptions({ user_id: 'user1' })) + .querier.staticBuckets.map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket["user1"]', definition: 'mybucket', inclusion_reasons: ['default'], priority: 3 }]); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset1', description: 'test', user_id: 'user1' } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset1', description: 'test', user_id: 'user1' } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket["user1"]', @@ -272,10 +287,12 @@ bucket_definitions: } ]); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset1', description: 'test', owner_id: 'user1' } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset1', description: 'test', owner_id: 'user1' } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket["user1"]', @@ -398,10 +415,12 @@ bucket_definitions: }); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset1', description: 'test', user_id: 'user1' } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset1', description: 'test', user_id: 'user1' } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket["USER1"]', @@ -436,10 +455,12 @@ bucket_definitions: }); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset1', description: 'test', user_id: 'user1' } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset1', description: 'test', user_id: 'user1' } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket["USER1"]', @@ -465,10 +486,12 @@ bucket_definitions: ); const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset1', data: JSON.stringify({ count: 5, bool: true }) } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset1', data: JSON.stringify({ count: 5, bool: true }) } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket[]', @@ -500,14 +523,16 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { - id: 'asset1', - description: 'test', - region_ids: JSON.stringify(['region1', 'region2']) - } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { + id: 'asset1', + description: 'test', + region_ids: JSON.stringify(['region1', 'region2']) + } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket["region1"]', @@ -545,10 +570,12 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset1', description: 'test', role: 'admin' } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset1', description: 'test', role: 'admin' } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket[1]', @@ -564,10 +591,12 @@ bucket_definitions: ]); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset2', description: 'test', role: 'normal' } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset2', description: 'test', role: 'normal' } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket[1]', @@ -605,7 +634,9 @@ bucket_definitions: ]); expect( - hydrated.getBucketParameterQuerier(normalizeQuerierOptions({ is_admin: true })).querier.staticBuckets + hydrated + .getBucketParameterQuerier(normalizeQuerierOptions({ is_admin: true })) + .querier.staticBuckets.map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[1]', definition: 'mybucket', inclusion_reasons: ['default'], priority: 3 }]); }); @@ -621,7 +652,7 @@ bucket_definitions: ); const hydrated = rules.hydrate(hydrationParams); - expect(hydrated.evaluateRow({ sourceTable: ASSETS, record: { id: 'asset1' } })).toEqual([ + expect(hydrated.evaluateRow({ sourceTable: ASSETS, record: { id: 'asset1' } }).map(removeSource)).toEqual([ { bucket: 'mybucket[]', id: 'asset1', @@ -653,10 +684,12 @@ bucket_definitions: ).toMatchObject({ staticBuckets: [{ bucket: 'mybucket[314,3.14,314]', priority: 3 }] }); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset1', int1: 314n, float1: 3.14, float2: 314 } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset1', int1: 314n, float1: 3.14, float2: 314 } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket[314,3.14,314]', @@ -701,10 +734,12 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ - sourceTable: new TestSourceTable('assets_123'), - record: { client_id: 'asset1', description: 'test', archived: 0n, other_id: 'other1' } - }) + hydrated + .evaluateRow({ + sourceTable: new TestSourceTable('assets_123'), + record: { client_id: 'asset1', description: 'test', archived: 0n, other_id: 'other1' } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket[]', @@ -742,10 +777,12 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ - sourceTable: new TestSourceTable('assets_123'), - record: { client_id: 'asset1', description: 'test', archived: 0n, other_id: 'other1' } - }) + hydrated + .evaluateRow({ + sourceTable: new TestSourceTable('assets_123'), + record: { client_id: 'asset1', description: 'test', archived: 0n, other_id: 'other1' } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket[]', @@ -776,10 +813,12 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset1', description: 'test', archived: 0n } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset1', description: 'test', archived: 0n } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket[]', @@ -812,10 +851,12 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ - sourceTable: ASSETS, - record: { id: 'asset1' } - }) + hydrated + .evaluateRow({ + sourceTable: ASSETS, + record: { id: 'asset1' } + }) + .map(removeSource) ).toEqual([ { bucket: 'mybucket[]', diff --git a/packages/sync-rules/test/src/table_valued_function_queries.test.ts b/packages/sync-rules/test/src/table_valued_function_queries.test.ts index 5ea6572a1..3c252367e 100644 --- a/packages/sync-rules/test/src/table_valued_function_queries.test.ts +++ b/packages/sync-rules/test/src/table_valued_function_queries.test.ts @@ -8,7 +8,7 @@ import { SqlParameterQuery } from '../../src/index.js'; import { StaticSqlParameterQuery } from '../../src/StaticSqlParameterQuery.js'; -import { EMPTY_DATA_SOURCE, PARSE_OPTIONS } from './util.js'; +import { EMPTY_DATA_SOURCE, PARSE_OPTIONS, removeSourceSymbol } from './util.js'; describe('table-valued function queries', () => { function scope(q: StaticSqlParameterQuery): BucketDataScope { @@ -34,7 +34,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3, null] }), scope(query)) + query + .getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3, null] }), scope(query)) + .map(removeSourceSymbol) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, { bucket: 'mybucket[2]', priority: 3 }, @@ -63,7 +65,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3, null] }), scope(query)) + query + .getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3, null] }), scope(query)) + .map(removeSourceSymbol) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, { bucket: 'mybucket[2]', priority: 3 }, @@ -84,7 +88,9 @@ describe('table-valued function queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['v']); - expect(query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), scope(query))).toEqual([ + expect( + query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), scope(query)).map(removeSourceSymbol) + ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, { bucket: 'mybucket[2]', priority: 3 }, { bucket: 'mybucket[3]', priority: 3 } @@ -154,7 +160,9 @@ describe('table-valued function queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['value']); - expect(query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), scope(query))).toEqual([ + expect( + query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, {}), scope(query)).map(removeSourceSymbol) + ).toEqual([ { bucket: 'mybucket["a"]', priority: 3 }, { bucket: 'mybucket["b"]', priority: 3 }, { bucket: 'mybucket["c"]', priority: 3 } @@ -177,7 +185,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['value']); expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3] }), scope(query)) + query + .getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3] }), scope(query)) + .map(removeSourceSymbol) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, { bucket: 'mybucket[2]', priority: 3 }, @@ -201,7 +211,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['value']); expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3] }), scope(query)) + query + .getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3] }), scope(query)) + .map(removeSourceSymbol) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, { bucket: 'mybucket[2]', priority: 3 }, @@ -225,7 +237,9 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3] }), scope(query)) + query + .getStaticBucketDescriptions(new RequestParameters({ sub: '' }, { array: [1, 2, 3] }), scope(query)) + .map(removeSourceSymbol) ).toEqual([ { bucket: 'mybucket[2]', priority: 3 }, { bucket: 'mybucket[3]', priority: 3 } @@ -249,19 +263,21 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['project_id']); expect( - query.getStaticBucketDescriptions( - new RequestParameters( - { - sub: '', - projects: [ - { id: 1, role: 'admin' }, - { id: 2, role: 'user' } - ] - }, - {} - ), - scope(query) - ) + query + .getStaticBucketDescriptions( + new RequestParameters( + { + sub: '', + projects: [ + { id: 1, role: 'admin' }, + { id: 2, role: 'user' } + ] + }, + {} + ), + scope(query) + ) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[1]', priority: 3 }]); }); diff --git a/packages/sync-rules/test/src/util.ts b/packages/sync-rules/test/src/util.ts index 61478218a..6d2a9efa8 100644 --- a/packages/sync-rules/test/src/util.ts +++ b/packages/sync-rules/test/src/util.ts @@ -8,6 +8,7 @@ import { RequestedStream, RequestJwtPayload, RequestParameters, + SOURCE, SourceSchema, SourceTableInterface, StaticSchema, @@ -110,3 +111,23 @@ export const EMPTY_DATA_SOURCE: BucketDataSource = { throw new Error('Function not implemented.'); } }; + +/** + * Removes the source property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSource(obj: T): Omit { + const { source, ...rest } = obj; + return rest; +} + +/** + * Removes the [SOURCE] symbol property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSourceSymbol(obj: T): Omit { + const { [SOURCE]: source, ...rest } = obj; + return rest; +} From 3e45cc707382f58f374e9244eefec11566c40316 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 12 Jan 2026 16:51:22 +0200 Subject: [PATCH 017/101] Tests build again. --- .../implementation/BucketDefinitionMapping.ts | 31 +++++++++ .../test/src/storage_sync.test.ts | 8 +-- .../test/src/change_stream_utils.ts | 10 +-- .../test/src/CDCStreamTestContext.ts | 14 ++-- .../test/src/BinlogStreamUtils.ts | 8 ++- .../test/src/storage.test.ts | 10 +-- .../test/src/wal_stream_utils.ts | 13 ++-- .../src/test-utils/general-utils.ts | 22 +++++- .../src/tests/register-compacting-tests.ts | 22 +++--- .../tests/register-data-storage-data-tests.ts | 69 +++++++++++-------- .../test/src/sync/BucketChecksumState.test.ts | 8 ++- 11 files changed, 147 insertions(+), 68 deletions(-) create mode 100644 modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts diff --git a/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts b/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts new file mode 100644 index 000000000..d559b039b --- /dev/null +++ b/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts @@ -0,0 +1,31 @@ +import { ServiceAssertionError } from '@powersync/lib-services-framework'; +import { BucketDataSource, ParameterIndexLookupCreator } from '@powersync/service-sync-rules'; +import { SyncRuleDocument } from './models.js'; + +export class BucketDefinitionMapping { + static fromSyncRules(doc: Pick): BucketDefinitionMapping { + return new BucketDefinitionMapping(doc.rule_mapping.definitions, doc.rule_mapping.parameter_lookups); + } + + constructor( + private definitions: Record, + private parameterLookupMapping: Record = {} + ) {} + + bucketSourceId(source: BucketDataSource): number { + const defId = this.definitions[source.uniqueName]; + if (defId == null) { + throw new ServiceAssertionError(`No mapping found for bucket source ${source.uniqueName}`); + } + return defId; + } + + parameterLookupId(source: ParameterIndexLookupCreator): number { + const key = `${source.defaultLookupScope.lookupName}#${source.defaultLookupScope.queryId}`; + const defId = this.parameterLookupMapping[key]; + if (defId == null) { + throw new ServiceAssertionError(`No mapping found for parameter lookup source ${key}`); + } + return defId; + } +} diff --git a/modules/module-mongodb-storage/test/src/storage_sync.test.ts b/modules/module-mongodb-storage/test/src/storage_sync.test.ts index eaa636600..55ab5cdd2 100644 --- a/modules/module-mongodb-storage/test/src/storage_sync.test.ts +++ b/modules/module-mongodb-storage/test/src/storage_sync.test.ts @@ -1,5 +1,5 @@ import { storage } from '@powersync/service-core'; -import { register, test_utils } from '@powersync/service-core-tests'; +import { bucketRequest, register, test_utils } from '@powersync/service-core-tests'; import { describe, expect, test } from 'vitest'; import { INITIALIZED_MONGO_STORAGE_FACTORY } from './util.js'; @@ -75,7 +75,7 @@ describe('sync - mongodb', () => { const options: storage.BucketDataBatchOptions = {}; const batch1 = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]), options) + bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(sync_rules, 'global[]', 0n)], options) ); expect(test_utils.getBatchData(batch1)).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 }, @@ -90,7 +90,7 @@ describe('sync - mongodb', () => { const batch2 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch1[0].chunkData.next_after)]]), + [bucketRequest(sync_rules, 'global[]', batch1[0].chunkData.next_after)], options ) ); @@ -106,7 +106,7 @@ describe('sync - mongodb', () => { const batch3 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch2[0].chunkData.next_after)]]), + [bucketRequest(sync_rules, 'global[]', batch2[0].chunkData.next_after)], options ) ); diff --git a/modules/module-mongodb/test/src/change_stream_utils.ts b/modules/module-mongodb/test/src/change_stream_utils.ts index 2357f3ac4..ad1b24a7c 100644 --- a/modules/module-mongodb/test/src/change_stream_utils.ts +++ b/modules/module-mongodb/test/src/change_stream_utils.ts @@ -12,7 +12,7 @@ import { TestStorageOptions, unsettledPromise } from '@powersync/service-core'; -import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; +import { bucketRequest, METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; import { ChangeStream, ChangeStreamOptions } from '@module/replication/ChangeStream.js'; import { MongoManager } from '@module/replication/MongoManager.js'; @@ -184,7 +184,8 @@ export class ChangeStreamTestContext { async getBucketsDataBatch(buckets: Record, options?: { timeout?: number }) { let checkpoint = await this.getCheckpoint(options); - const map = new Map(Object.entries(buckets)); + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); + const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules.definition, bucket, start)); return test_utils.fromAsync(this.storage!.getBucketDataBatch(checkpoint, map)); } @@ -193,8 +194,9 @@ export class ChangeStreamTestContext { if (typeof start == 'string') { start = BigInt(start); } + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const checkpoint = await this.getCheckpoint(options); - const map = new Map([[bucket, start]]); + let map = [bucketRequest(syncRules.definition, bucket, start)]; let data: OplogEntry[] = []; while (true) { const batch = this.storage!.getBucketDataBatch(checkpoint, map); @@ -204,7 +206,7 @@ export class ChangeStreamTestContext { if (batches.length == 0 || !batches[0]!.chunkData.has_more) { break; } - map.set(bucket, BigInt(batches[0]!.chunkData.next_after)); + map = [bucketRequest(syncRules.definition, bucket, start)]; } return data; } diff --git a/modules/module-mssql/test/src/CDCStreamTestContext.ts b/modules/module-mssql/test/src/CDCStreamTestContext.ts index 6b674befc..5f21204b6 100644 --- a/modules/module-mssql/test/src/CDCStreamTestContext.ts +++ b/modules/module-mssql/test/src/CDCStreamTestContext.ts @@ -7,7 +7,7 @@ import { storage, SyncRulesBucketStorage } from '@powersync/service-core'; -import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; +import { bucketRequest, METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; import { clearTestDb, getClientCheckpoint, TEST_CONNECTION_OPTIONS } from './util.js'; import { CDCStream, CDCStreamOptions } from '@module/replication/CDCStream.js'; import { MSSQLConnectionManager } from '@module/replication/MSSQLConnectionManager.js'; @@ -167,7 +167,8 @@ export class CDCStreamTestContext implements AsyncDisposable { async getBucketsDataBatch(buckets: Record, options?: { timeout?: number }) { let checkpoint = await this.getCheckpoint(options); - const map = new Map(Object.entries(buckets)); + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); + const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules.definition, bucket, start)); return test_utils.fromAsync(this.storage!.getBucketDataBatch(checkpoint, map)); } @@ -179,8 +180,10 @@ export class CDCStreamTestContext implements AsyncDisposable { if (typeof start == 'string') { start = BigInt(start); } + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const checkpoint = await this.getCheckpoint(options); - const map = new Map([[bucket, start]]); + let map = [bucketRequest(syncRules.definition, bucket, start)]; + let data: OplogEntry[] = []; while (true) { const batch = this.storage!.getBucketDataBatch(checkpoint, map); @@ -190,7 +193,7 @@ export class CDCStreamTestContext implements AsyncDisposable { if (batches.length == 0 || !batches[0]!.chunkData.has_more) { break; } - map.set(bucket, BigInt(batches[0]!.chunkData.next_after)); + map = [bucketRequest(syncRules.definition, bucket, start)]; } return data; } @@ -204,7 +207,8 @@ export class CDCStreamTestContext implements AsyncDisposable { start = BigInt(start); } const { checkpoint } = await this.storage!.getCheckpoint(); - const map = new Map([[bucket, start]]); + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); + const map = [bucketRequest(syncRules.definition, bucket, start)]; const batch = this.storage!.getBucketDataBatch(checkpoint, map); const batches = await test_utils.fromAsync(batch); return batches[0]?.chunkData.data ?? []; diff --git a/modules/module-mysql/test/src/BinlogStreamUtils.ts b/modules/module-mysql/test/src/BinlogStreamUtils.ts index 665be6c21..1a6ca8e33 100644 --- a/modules/module-mysql/test/src/BinlogStreamUtils.ts +++ b/modules/module-mysql/test/src/BinlogStreamUtils.ts @@ -13,7 +13,7 @@ import { storage, SyncRulesBucketStorage } from '@powersync/service-core'; -import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; +import { bucketRequest, METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; import mysqlPromise from 'mysql2/promise'; import { clearTestDb, TEST_CONNECTION_OPTIONS } from './util.js'; import timers from 'timers/promises'; @@ -150,7 +150,8 @@ export class BinlogStreamTestContext { async getBucketsDataBatch(buckets: Record, options?: { timeout?: number }) { const checkpoint = await this.getCheckpoint(options); - const map = new Map(Object.entries(buckets)); + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); + const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules.definition, bucket, start)); return test_utils.fromAsync(this.storage!.getBucketDataBatch(checkpoint, map)); } @@ -163,8 +164,9 @@ export class BinlogStreamTestContext { if (typeof start == 'string') { start = BigInt(start); } + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const checkpoint = await this.getCheckpoint(options); - const map = new Map([[bucket, start]]); + const map = [bucketRequest(syncRules.definition, bucket, start)]; const batch = this.storage!.getBucketDataBatch(checkpoint, map); const batches = await test_utils.fromAsync(batch); return batches[0]?.chunkData.data ?? []; diff --git a/modules/module-postgres-storage/test/src/storage.test.ts b/modules/module-postgres-storage/test/src/storage.test.ts index 2e701aa56..672026e55 100644 --- a/modules/module-postgres-storage/test/src/storage.test.ts +++ b/modules/module-postgres-storage/test/src/storage.test.ts @@ -1,5 +1,5 @@ import { storage } from '@powersync/service-core'; -import { register, test_utils } from '@powersync/service-core-tests'; +import { bucketRequest, register, test_utils } from '@powersync/service-core-tests'; import { describe, expect, test } from 'vitest'; import { POSTGRES_STORAGE_FACTORY } from './util.js'; @@ -87,7 +87,7 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { const options: storage.BucketDataBatchOptions = {}; const batch1 = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]), options) + bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(sync_rules, 'global[]', 0n)], options) ); expect(test_utils.getBatchData(batch1)).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 } @@ -101,7 +101,7 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { const batch2 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch1[0].chunkData.next_after)]]), + [bucketRequest(sync_rules, 'global[]', batch1[0].chunkData.next_after)], options ) ); @@ -117,7 +117,7 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { const batch3 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch2[0].chunkData.next_after)]]), + [bucketRequest(sync_rules, 'global[]', batch2[0].chunkData.next_after)], options ) ); @@ -133,7 +133,7 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { const batch4 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch3[0].chunkData.next_after)]]), + [bucketRequest(sync_rules, 'global[]', batch3[0].chunkData.next_after)], options ) ); diff --git a/modules/module-postgres/test/src/wal_stream_utils.ts b/modules/module-postgres/test/src/wal_stream_utils.ts index 96c49a441..f051100de 100644 --- a/modules/module-postgres/test/src/wal_stream_utils.ts +++ b/modules/module-postgres/test/src/wal_stream_utils.ts @@ -11,7 +11,7 @@ import { SyncRulesBucketStorage, unsettledPromise } from '@powersync/service-core'; -import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; +import { bucketRequest, METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; import * as pgwire from '@powersync/service-jpgwire'; import { clearTestDb, getClientCheckpoint, TEST_CONNECTION_OPTIONS } from './util.js'; import { CustomTypeRegistry } from '@module/types/registry.js'; @@ -171,7 +171,8 @@ export class WalStreamTestContext implements AsyncDisposable { async getBucketsDataBatch(buckets: Record, options?: { timeout?: number }) { let checkpoint = await this.getCheckpoint(options); - const map = new Map(Object.entries(buckets)); + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); + const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules.definition, bucket, start)); return test_utils.fromAsync(this.storage!.getBucketDataBatch(checkpoint, map)); } @@ -183,8 +184,9 @@ export class WalStreamTestContext implements AsyncDisposable { if (typeof start == 'string') { start = BigInt(start); } + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const checkpoint = await this.getCheckpoint(options); - const map = new Map([[bucket, start]]); + let map = [bucketRequest(syncRules.definition, bucket, start)]; let data: OplogEntry[] = []; while (true) { const batch = this.storage!.getBucketDataBatch(checkpoint, map); @@ -194,7 +196,7 @@ export class WalStreamTestContext implements AsyncDisposable { if (batches.length == 0 || !batches[0]!.chunkData.has_more) { break; } - map.set(bucket, BigInt(batches[0]!.chunkData.next_after)); + map = [bucketRequest(syncRules.definition, bucket, start)]; } return data; } @@ -207,8 +209,9 @@ export class WalStreamTestContext implements AsyncDisposable { if (typeof start == 'string') { start = BigInt(start); } + const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const { checkpoint } = await this.storage!.getCheckpoint(); - const map = new Map([[bucket, start]]); + const map = [bucketRequest(syncRules.definition, bucket, start)]; const batch = this.storage!.getBucketDataBatch(checkpoint, map); const batches = await test_utils.fromAsync(batch); return batches[0]?.chunkData.data ?? []; diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index fec74712d..dc4e6582e 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -1,4 +1,4 @@ -import { storage, utils } from '@powersync/service-core'; +import { BucketDataRequest, InternalOpId, storage, utils } from '@powersync/service-core'; import { GetQuerierOptions, RequestParameters, SqlSyncRules } from '@powersync/service-sync-rules'; import { versionedHydrationState } from '@powersync/service-sync-rules'; import * as bson from 'bson'; @@ -119,3 +119,23 @@ export function querierOptions(globalParameters: RequestParameters): GetQuerierO streams: {} }; } + +export function bucketRequest( + syncRules: storage.PersistedSyncRulesContent | SqlSyncRules, + bucket?: string, + start?: InternalOpId | string | number +): BucketDataRequest { + const parsed = + syncRules instanceof SqlSyncRules ? syncRules : syncRules.parsed({ defaultSchema: 'not-applicable' }).sync_rules; + bucket ??= 'global[]'; + const definitionName = bucket.substring(0, bucket.indexOf('[')); + const source = parsed.bucketDataSources.find((b) => b.uniqueName === definitionName); + if (source == null) { + throw new Error('Failed to find global bucket'); + } + return { + bucket, + start: BigInt(start ?? 0n), + source: source + }; +} diff --git a/packages/service-core-tests/src/tests/register-compacting-tests.ts b/packages/service-core-tests/src/tests/register-compacting-tests.ts index 1c43bf25a..e497e5c47 100644 --- a/packages/service-core-tests/src/tests/register-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-compacting-tests.ts @@ -1,6 +1,7 @@ import { addChecksums, storage } from '@powersync/service-core'; import { expect, test } from 'vitest'; import * as test_utils from '../test-utils/test-utils-index.js'; +import { bucketRequest } from '../test-utils/test-utils-index.js'; export function registerCompactTests(config: storage.TestStorageConfig) { const generateStorageFactory = config.factory; @@ -52,7 +53,7 @@ bucket_definitions: const checkpoint = result!.flushed_op; const batchBefore = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]])) + bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)]) ); const dataBefore = batchBefore.chunkData.data; const checksumBefore = await bucketStorage.getChecksums(checkpoint, ['global[]']); @@ -84,7 +85,7 @@ bucket_definitions: }); const batchAfter = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]])) + bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)]) ); const dataAfter = batchAfter.chunkData.data; const checksumAfter = await bucketStorage.getChecksums(checkpoint, ['global[]']); @@ -168,7 +169,7 @@ bucket_definitions: const checkpoint = result!.flushed_op; const batchBefore = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]])) + bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)]) ); const dataBefore = batchBefore.chunkData.data; const checksumBefore = await bucketStorage.getChecksums(checkpoint, ['global[]']); @@ -201,7 +202,7 @@ bucket_definitions: }); const batchAfter = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]])) + bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)]) ); const dataAfter = batchAfter.chunkData.data; bucketStorage.clearChecksumCache(); @@ -297,7 +298,7 @@ bucket_definitions: }); const batchAfter = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint2, new Map([['global[]', 0n]])) + bucketStorage.getBucketDataBatch(checkpoint2, [bucketRequest(syncRules)]) ); const dataAfter = batchAfter.chunkData.data; await bucketStorage.clearChecksumCache(); @@ -408,13 +409,10 @@ bucket_definitions: }); const batchAfter = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch( - checkpoint, - new Map([ - ['grouped["b1"]', 0n], - ['grouped["b2"]', 0n] - ]) - ) + bucketStorage.getBucketDataBatch(checkpoint, [ + bucketRequest(syncRules, 'grouped["b1"]', 0n), + bucketRequest(syncRules, 'grouped["b2"]', 0n) + ]) ); const dataAfter = batchAfter.flatMap((b) => b.chunkData.data); diff --git a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts index 5ba42456d..acd4d540d 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts @@ -1,7 +1,15 @@ -import { BucketDataBatchOptions, getUuidReplicaIdentityBson, OplogEntry, storage } from '@powersync/service-core'; +import { + BucketDataBatchOptions, + BucketDataRequest, + BucketRequest, + getUuidReplicaIdentityBson, + InternalOpId, + OplogEntry, + storage +} from '@powersync/service-core'; import { describe, expect, test } from 'vitest'; import * as test_utils from '../test-utils/test-utils-index.js'; - +import { bucketRequest } from '../test-utils/test-utils-index.js'; /** * Normalize data from OplogEntries for comparison in tests. * Tests typically expect the stringified result @@ -26,6 +34,7 @@ const normalizeOplogData = (data: OplogEntry['data']) => { export function registerDataStorageDataTests(config: storage.TestStorageConfig) { const generateStorageFactory = config.factory; const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); + test('removing row', async () => { await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ @@ -37,6 +46,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + bucketStorage.getParsedSyncRules; await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { const sourceTable = TEST_TABLE; @@ -61,7 +71,7 @@ bucket_definitions: const { checkpoint } = await bucketStorage.getCheckpoint(); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -130,7 +140,7 @@ bucket_definitions: const { checkpoint } = await bucketStorage.getCheckpoint(); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -200,7 +210,7 @@ bucket_definitions: const { checkpoint } = await bucketStorage.getCheckpoint(); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -258,7 +268,7 @@ bucket_definitions: const { checkpoint } = await bucketStorage.getCheckpoint(); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -331,7 +341,7 @@ bucket_definitions: await batch.commit('1/1'); }); const { checkpoint } = await bucketStorage.getCheckpoint(); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -398,7 +408,7 @@ bucket_definitions: const { checkpoint } = await bucketStorage.getCheckpoint(); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -520,7 +530,7 @@ bucket_definitions: const { checkpoint } = await bucketStorage.getCheckpoint(); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); const data = batch[0].chunkData.data.map((d) => { return { @@ -681,7 +691,7 @@ bucket_definitions: const checkpoint2 = result2!.flushed_op; const batch = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint2, new Map([['global[]', checkpoint1]])) + bucketStorage.getBucketDataBatch(checkpoint2, [bucketRequest(syncRules, 'global[]', checkpoint1)]) ); const data = batch[0].chunkData.data.map((d) => { @@ -782,7 +792,7 @@ bucket_definitions: const checkpoint3 = result3!.flushed_op; const batch = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint3, new Map([['global[]', checkpoint1]])) + bucketStorage.getBucketDataBatch(checkpoint3, [bucketRequest(syncRules, 'global[]', checkpoint1)]) ); const data = batch[0].chunkData.data.map((d) => { return { @@ -891,7 +901,7 @@ bucket_definitions: const checkpoint3 = result3!.flushed_op; const batch = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint3, new Map([['global[]', checkpoint1]])) + bucketStorage.getBucketDataBatch(checkpoint3, [bucketRequest(syncRules, 'global[]', checkpoint1)]) ); const data = batch[0].chunkData.data.map((d) => { return { @@ -994,7 +1004,7 @@ bucket_definitions: }; const batch1 = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]), options) + bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)], options) ); expect(test_utils.getBatchData(batch1)).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 }, @@ -1009,7 +1019,7 @@ bucket_definitions: const batch2 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch1[0].chunkData.next_after)]]), + [bucketRequest(syncRules, 'global[]', BigInt(batch1[0].chunkData.next_after))], options ) ); @@ -1026,7 +1036,7 @@ bucket_definitions: const batch3 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([['global[]', BigInt(batch2[0].chunkData.next_after)]]), + [bucketRequest(syncRules, 'global[]', BigInt(batch2[0].chunkData.next_after))], options ) ); @@ -1069,7 +1079,7 @@ bucket_definitions: const { checkpoint } = await bucketStorage.getCheckpoint(); const batch1 = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]), { limit: 4 }) + bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)], { limit: 4 }) ); expect(test_utils.getBatchData(batch1)).toEqual([ @@ -1086,9 +1096,13 @@ bucket_definitions: }); const batch2 = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', BigInt(batch1.chunkData.next_after)]]), { - limit: 4 - }) + bucketStorage.getBucketDataBatch( + checkpoint, + [bucketRequest(syncRules, 'global[]', batch1.chunkData.next_after)], + { + limit: 4 + } + ) ); expect(test_utils.getBatchData(batch2)).toEqual([ { op_id: '5', op: 'PUT', object_id: 'test5', checksum: 3686902721 }, @@ -1102,9 +1116,13 @@ bucket_definitions: }); const batch3 = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', BigInt(batch2.chunkData.next_after)]]), { - limit: 4 - }) + bucketStorage.getBucketDataBatch( + checkpoint, + [bucketRequest(syncRules, 'global[]', batch2.chunkData.next_after)], + { + limit: 4 + } + ) ); expect(test_utils.getBatchData(batch3)).toEqual([]); @@ -1151,10 +1169,7 @@ bucket_definitions: return await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - new Map([ - ['global1[]', 0n], - ['global2[]', 0n] - ]), + [bucketRequest(syncRules, 'global1[]', 0n), bucketRequest(syncRules, 'global2[]', 0n)], options ) ); @@ -1506,7 +1521,7 @@ bucket_definitions: const cp = await bucketStorage.getCheckpoint(); expect(cp.lsn).toEqual('3/1'); const data = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(cp.checkpoint, new Map([['global[]', 0n]])) + bucketStorage.getBucketDataBatch(cp.checkpoint, [bucketRequest(syncRules)]) ); expect(data).toEqual([]); diff --git a/packages/service-core/test/src/sync/BucketChecksumState.test.ts b/packages/service-core/test/src/sync/BucketChecksumState.test.ts index a5925f53b..51a5d4f45 100644 --- a/packages/service-core/test/src/sync/BucketChecksumState.test.ts +++ b/packages/service-core/test/src/sync/BucketChecksumState.test.ts @@ -505,7 +505,9 @@ bucket_definitions: const line = (await state.buildNextCheckpointLine({ base: storage.makeCheckpoint(1n, (lookups) => { - expect(lookups).toEqual([ScopedParameterLookup.direct({ lookupName: 'by_project', queryId: '1' }, ['u1'])]); + expect(lookups).toEqual([ + ScopedParameterLookup.direct({ lookupName: 'by_project', queryId: '1', source: null as any }, ['u1']) + ]); return [{ id: 1 }, { id: 2 }]; }), writeCheckpoint: null, @@ -566,7 +568,9 @@ bucket_definitions: // Now we get a new line const line2 = (await state.buildNextCheckpointLine({ base: storage.makeCheckpoint(2n, (lookups) => { - expect(lookups).toEqual([ScopedParameterLookup.direct({ lookupName: 'by_project', queryId: '1' }, ['u1'])]); + expect(lookups).toEqual([ + ScopedParameterLookup.direct({ lookupName: 'by_project', queryId: '1', source: null as any }, ['u1']) + ]); return [{ id: 1 }, { id: 2 }, { id: 3 }]; }), writeCheckpoint: null, From 9f5034634f4277c67f24acda24f4c6dc24bc55e7 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 12 Jan 2026 17:28:10 +0200 Subject: [PATCH 018/101] Minor restructure and test fixes. --- .../implementation/MongoPersistedSyncRules.ts | 8 +- .../PostgresPersistedSyncRulesContent.ts | 14 ++- .../test/src/wal_stream_utils.ts | 3 +- .../src/routes/endpoints/admin.ts | 4 +- .../test/src/routes/stream.test.ts | 6 +- .../test/src/sync/BucketChecksumState.test.ts | 111 ++++++++++++------ packages/sync-rules/src/SqlSyncRules.ts | 10 +- .../sync-rules/test/src/compatibility.test.ts | 10 +- 8 files changed, 105 insertions(+), 61 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts index 39e3c9b34..d29ce9a16 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts @@ -1,6 +1,8 @@ import { BucketDataScope, BucketDataSource, + CompatibilityOption, + DEFAULT_HYDRATION_STATE, HydratedSyncRules, HydrationState, ParameterIndexLookupCreator, @@ -26,7 +28,11 @@ export class MongoPersistedSyncRules implements storage.PersistedSyncRules { hydratedSyncRules(): HydratedSyncRules { if (this.mapping == null) { - return this.sync_rules.hydrate({ hydrationState: versionedHydrationState(this.id) }); + if (this.sync_rules.compatibility.isEnabled(CompatibilityOption.versionedBucketIds)) { + return this.sync_rules.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); + } else { + return this.sync_rules.hydrate({ hydrationState: versionedHydrationState(this.id) }); + } } else { return this.sync_rules.hydrate({ hydrationState: new MongoHydrationState(this.mapping) diff --git a/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts b/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts index 53db43dfa..7a05cba07 100644 --- a/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts +++ b/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts @@ -1,7 +1,7 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; import { ErrorCode, logger, ServiceError } from '@powersync/lib-services-framework'; import { storage } from '@powersync/service-core'; -import { SqlSyncRules } from '@powersync/service-sync-rules'; +import { CompatibilityOption, DEFAULT_HYDRATION_STATE, SqlSyncRules } from '@powersync/service-sync-rules'; import { models } from '../../types/types.js'; import { versionedHydrationState } from '@powersync/service-sync-rules'; @@ -38,9 +38,15 @@ export class PostgresPersistedSyncRulesContent implements storage.PersistedSyncR slot_name: this.slot_name, sync_rules: SqlSyncRules.fromYaml(this.sync_rules_content, options), hydratedSyncRules() { - return this.sync_rules.hydrate({ - hydrationState: versionedHydrationState(this.id) - }); + if (this.sync_rules.compatibility.isEnabled(CompatibilityOption.versionedBucketIds)) { + return this.sync_rules.hydrate({ + hydrationState: versionedHydrationState(this.id) + }); + } else { + return this.sync_rules.hydrate({ + hydrationState: DEFAULT_HYDRATION_STATE + }); + } } }; } diff --git a/modules/module-postgres/test/src/wal_stream_utils.ts b/modules/module-postgres/test/src/wal_stream_utils.ts index f051100de..486208e8c 100644 --- a/modules/module-postgres/test/src/wal_stream_utils.ts +++ b/modules/module-postgres/test/src/wal_stream_utils.ts @@ -1,5 +1,6 @@ import { PgManager } from '@module/replication/PgManager.js'; import { PUBLICATION_NAME, WalStream, WalStreamOptions } from '@module/replication/WalStream.js'; +import { ReplicationAbortedError } from '@powersync/lib-services-framework'; import { BucketStorageFactory, createCoreReplicationMetrics, @@ -14,8 +15,6 @@ import { import { bucketRequest, METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; import * as pgwire from '@powersync/service-jpgwire'; import { clearTestDb, getClientCheckpoint, TEST_CONNECTION_OPTIONS } from './util.js'; -import { CustomTypeRegistry } from '@module/types/registry.js'; -import { ReplicationAbortedError } from '@powersync/lib-services-framework'; export class WalStreamTestContext implements AsyncDisposable { private _walStream?: WalStream; diff --git a/packages/service-core/src/routes/endpoints/admin.ts b/packages/service-core/src/routes/endpoints/admin.ts index 6d162a072..7b8b74e7b 100644 --- a/packages/service-core/src/routes/endpoints/admin.ts +++ b/packages/service-core/src/routes/endpoints/admin.ts @@ -1,5 +1,5 @@ import { ErrorCode, errors, router, schema } from '@powersync/lib-services-framework'; -import { SqlSyncRules, StaticSchema } from '@powersync/service-sync-rules'; +import { DEFAULT_HYDRATION_STATE, SqlSyncRules, StaticSchema } from '@powersync/service-sync-rules'; import { internal_routes } from '@powersync/service-types'; import * as api from '../../api/api-index.js'; @@ -179,7 +179,7 @@ export const validate = routeDefinition({ schema }), hydratedSyncRules() { - return this.sync_rules.hydrate(); + return this.sync_rules.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); } }; }, diff --git a/packages/service-core/test/src/routes/stream.test.ts b/packages/service-core/test/src/routes/stream.test.ts index 5e273d61c..948bcc4ef 100644 --- a/packages/service-core/test/src/routes/stream.test.ts +++ b/packages/service-core/test/src/routes/stream.test.ts @@ -1,6 +1,6 @@ import { BasicRouterRequest, Context, SyncRulesBucketStorage } from '@/index.js'; import { RouterResponse, ServiceError, logger } from '@powersync/lib-services-framework'; -import { SqlSyncRules } from '@powersync/service-sync-rules'; +import { DEFAULT_HYDRATION_STATE, SqlSyncRules } from '@powersync/service-sync-rules'; import { Readable, Writable } from 'stream'; import { pipeline } from 'stream/promises'; import { describe, expect, it } from 'vitest'; @@ -45,7 +45,7 @@ describe('Stream Route', () => { const storage = { getParsedSyncRules() { - return new SqlSyncRules('bucket_definitions: {}').hydrate(); + return new SqlSyncRules('bucket_definitions: {}').hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); }, watchCheckpointChanges: async function* (options) { throw new Error('Simulated storage error'); @@ -83,7 +83,7 @@ describe('Stream Route', () => { it('logs the application metadata', async () => { const storage = { getParsedSyncRules() { - return new SqlSyncRules('bucket_definitions: {}').hydrate(); + return new SqlSyncRules('bucket_definitions: {}').hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); }, watchCheckpointChanges: async function* (options) { throw new Error('Simulated storage error'); diff --git a/packages/service-core/test/src/sync/BucketChecksumState.test.ts b/packages/service-core/test/src/sync/BucketChecksumState.test.ts index 51a5d4f45..8ee74be2f 100644 --- a/packages/service-core/test/src/sync/BucketChecksumState.test.ts +++ b/packages/service-core/test/src/sync/BucketChecksumState.test.ts @@ -12,7 +12,14 @@ import { WatchFilterEvent } from '@/index.js'; import { JSONBig } from '@powersync/service-jsonbig'; -import { RequestJwtPayload, ScopedParameterLookup, SqliteJsonRow, SqlSyncRules } from '@powersync/service-sync-rules'; +import { + DEFAULT_HYDRATION_STATE, + RequestJwtPayload, + ScopedParameterLookup, + SOURCE, + SqliteJsonRow, + SqlSyncRules +} from '@powersync/service-sync-rules'; import { versionedHydrationState } from '@powersync/service-sync-rules'; import { beforeEach, describe, expect, test } from 'vitest'; @@ -26,7 +33,7 @@ bucket_definitions: data: [] `, { defaultSchema: 'public' } - ).hydrate({ hydrationState: versionedHydrationState(1) }); + ).hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); // global[1] and global[2] const SYNC_RULES_GLOBAL_TWO = SqlSyncRules.fromYaml( @@ -39,7 +46,7 @@ bucket_definitions: data: [] `, { defaultSchema: 'public' } - ).hydrate({ hydrationState: versionedHydrationState(2) }); + ).hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); // by_project[n] const SYNC_RULES_DYNAMIC = SqlSyncRules.fromYaml( @@ -50,7 +57,7 @@ bucket_definitions: data: [] `, { defaultSchema: 'public' } - ).hydrate({ hydrationState: versionedHydrationState(3) }); + ).hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); const syncContext = new SyncContext({ maxBuckets: 100, @@ -88,14 +95,14 @@ bucket_definitions: streams: [{ name: 'global', is_default: true, errors: [] }] } }); - expect(line.bucketsToFetch).toEqual([ + expect(line.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[]', priority: 3 } ]); // This is the bucket data to be fetched - expect(line.getFilteredBucketPositions()).toEqual(new Map([['global[]', 0n]])); + expect(line.getFilteredBucketPositions().map(removeSource)).toEqual([{ bucket: 'global[]', start: 0n }]); // This similuates the bucket data being sent line.advance(); @@ -124,7 +131,7 @@ bucket_definitions: write_checkpoint: undefined } }); - expect(line2.getFilteredBucketPositions()).toEqual(new Map([['global[]', 1n]])); + expect(line2.getFilteredBucketPositions().map(removeSource)).toEqual([{ bucket: 'global[]', start: 1n }]); }); test('global bucket with initial state', async () => { @@ -158,14 +165,14 @@ bucket_definitions: streams: [{ name: 'global', is_default: true, errors: [] }] } }); - expect(line.bucketsToFetch).toEqual([ + expect(line.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[]', priority: 3 } ]); // This is the main difference between this and the previous test - expect(line.getFilteredBucketPositions()).toEqual(new Map([['global[]', 1n]])); + expect(line.getFilteredBucketPositions().map(removeSource)).toEqual([{ bucket: 'global[]', start: 1n }]); }); test('multiple static buckets', async () => { @@ -198,7 +205,7 @@ bucket_definitions: streams: [{ name: 'global', is_default: true, errors: [] }] } }); - expect(line.bucketsToFetch).toEqual([ + expect(line.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[1]', priority: 3 @@ -266,13 +273,13 @@ bucket_definitions: streams: [{ name: 'global', is_default: true, errors: [] }] } }); - expect(line.bucketsToFetch).toEqual([ + expect(line.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[]', priority: 3 } ]); - expect(line.getFilteredBucketPositions()).toEqual(new Map([['global[]', 0n]])); + expect(line.getFilteredBucketPositions().map(removeSource)).toEqual([{ bucket: 'global[]', start: 0n }]); }); test('invalidating individual bucket', async () => { @@ -329,7 +336,7 @@ bucket_definitions: write_checkpoint: undefined } }); - expect(line2.bucketsToFetch).toEqual([{ bucket: 'global[1]', priority: 3 }]); + expect(line2.bucketsToFetch.map(removeSourceSymbol)).toEqual([{ bucket: 'global[1]', priority: 3 }]); }); test('invalidating all buckets', async () => { @@ -379,7 +386,7 @@ bucket_definitions: write_checkpoint: undefined } }); - expect(line2.bucketsToFetch).toEqual([ + expect(line2.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[1]', priority: 3 }, { bucket: 'global[2]', priority: 3 } ]); @@ -416,7 +423,7 @@ bucket_definitions: streams: [{ name: 'global', is_default: true, errors: [] }] } }); - expect(line.bucketsToFetch).toEqual([ + expect(line.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[1]', priority: 3 @@ -428,12 +435,16 @@ bucket_definitions: ]); // This is the bucket data to be fetched - expect(line.getFilteredBucketPositions()).toEqual( - new Map([ - ['global[1]', 0n], - ['global[2]', 0n] - ]) - ); + expect(line.getFilteredBucketPositions().map(removeSource)).toEqual([ + { + bucket: 'global[1]', + start: 0n + }, + { + bucket: 'global[2]', + start: 0n + } + ]); // No data changes here. // We simulate partial data sent, before a checkpoint is interrupted. @@ -469,7 +480,7 @@ bucket_definitions: } }); // This should contain both buckets, even though only one changed. - expect(line2.bucketsToFetch).toEqual([ + expect(line2.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'global[1]', priority: 3 @@ -480,12 +491,16 @@ bucket_definitions: } ]); - expect(line2.getFilteredBucketPositions()).toEqual( - new Map([ - ['global[1]', 3n], - ['global[2]', 1n] - ]) - ); + expect(line2.getFilteredBucketPositions().map(removeSource)).toEqual([ + { + bucket: 'global[1]', + start: 3n + }, + { + bucket: 'global[2]', + start: 1n + } + ]); }); test('dynamic buckets with updates', async () => { @@ -542,7 +557,7 @@ bucket_definitions: write_checkpoint: undefined } }); - expect(line.bucketsToFetch).toEqual([ + expect(line.bucketsToFetch.map(removeSourceSymbol)).toEqual([ { bucket: 'by_project[1]', priority: 3 @@ -554,12 +569,16 @@ bucket_definitions: ]); line.advance(); // This is the bucket data to be fetched - expect(line.getFilteredBucketPositions()).toEqual( - new Map([ - ['by_project[1]', 0n], - ['by_project[2]', 0n] - ]) - ); + expect(line.getFilteredBucketPositions().map(removeSource)).toEqual([ + { + bucket: 'by_project[1]', + start: 0n + }, + { + bucket: 'by_project[2]', + start: 0n + } + ]); line.advance(); line.updateBucketPosition({ bucket: 'by_project[1]', nextAfter: 1n, hasMore: false }); @@ -598,7 +617,7 @@ bucket_definitions: write_checkpoint: undefined } }); - expect(line2.getFilteredBucketPositions()).toEqual(new Map([['by_project[3]', 0n]])); + expect(line2.getFilteredBucketPositions().map(removeSource)).toEqual([{ bucket: 'by_project[3]', start: 0n }]); }); describe('streams', () => { @@ -872,3 +891,23 @@ class MockBucketChecksumStateStorage implements BucketChecksumStateStorage { }; } } + +/** + * Removes the source property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSource(obj: T): Omit { + const { source, ...rest } = obj; + return rest; +} + +/** + * Removes the [SOURCE] symbol property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSourceSymbol(obj: T): Omit { + const { [SOURCE]: source, ...rest } = obj; + return rest; +} diff --git a/packages/sync-rules/src/SqlSyncRules.ts b/packages/sync-rules/src/SqlSyncRules.ts index 373972eeb..ce2031b1f 100644 --- a/packages/sync-rules/src/SqlSyncRules.ts +++ b/packages/sync-rules/src/SqlSyncRules.ts @@ -398,16 +398,10 @@ export class SqlSyncRules { * * @param params.hydrationState Transforms bucket ids based on persisted state. May omit for tests. */ - hydrate(params?: CreateSourceParams): HydratedSyncRules { - let hydrationState = params?.hydrationState; - // FIXME: Check logic for this: !this.compatibility.isEnabled(CompatibilityOption.versionedBucketIds) - if (hydrationState == null) { - hydrationState = DEFAULT_HYDRATION_STATE; - } - const resolvedParams = { hydrationState }; + hydrate(params: CreateSourceParams): HydratedSyncRules { return new HydratedSyncRules({ definition: this, - createParams: resolvedParams, + createParams: params, bucketDataSources: this.bucketDataSources, bucketParameterIndexLookupCreators: this.bucketParameterLookupSources, eventDescriptors: this.eventDescriptors, diff --git a/packages/sync-rules/test/src/compatibility.test.ts b/packages/sync-rules/test/src/compatibility.test.ts index 53d2149b6..0dfe4e697 100644 --- a/packages/sync-rules/test/src/compatibility.test.ts +++ b/packages/sync-rules/test/src/compatibility.test.ts @@ -1,7 +1,7 @@ import { describe, expect, test } from 'vitest'; import { DateTimeValue, SqlSyncRules, TimeValuePrecision, toSyncRulesValue } from '../../src/index.js'; -import { versionedHydrationState } from '../../src/HydrationState.js'; +import { DEFAULT_HYDRATION_STATE, versionedHydrationState } from '../../src/HydrationState.js'; import { ASSETS, normalizeQuerierOptions, PARSE_OPTIONS, removeSource, removeSourceSymbol } from './util.js'; describe('compatibility options', () => { @@ -20,7 +20,7 @@ bucket_definitions: - SELECT id, description FROM assets `, PARSE_OPTIONS - ).hydrate(); + ).hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); expect( rules @@ -49,7 +49,7 @@ config: timestamps_iso8601: true `, PARSE_OPTIONS - ).hydrate(); + ).hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); expect( rules @@ -223,7 +223,7 @@ bucket_definitions: - SELECT id, description ->> 'foo.bar' AS "desc" FROM assets `, PARSE_OPTIONS - ).hydrate(); + ).hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); expect( rules @@ -249,7 +249,7 @@ config: fixed_json_extract: true `, PARSE_OPTIONS - ).hydrate(); + ).hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); expect( rules From 25344b8112150b2f62aea1dde1eb5619fd00b6c0 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 13 Jan 2026 10:41:43 +0200 Subject: [PATCH 019/101] Fixes for checksums and some tests. --- .../storage/implementation/MongoChecksums.ts | 17 +- .../storage/implementation/MongoCompactor.ts | 14 +- .../implementation/MongoPersistedSyncRules.ts | 21 ++- .../implementation/MongoSyncBucketStorage.ts | 5 +- .../src/storage/PostgresSyncRulesStorage.ts | 3 +- .../PostgresPersistedSyncRulesContent.ts | 31 ++-- .../src/test-utils/general-utils.ts | 17 +- .../src/tests/register-compacting-tests.ts | 75 ++++----- .../tests/register-data-storage-data-tests.ts | 147 +++++++++--------- .../src/routes/endpoints/admin.ts | 3 +- .../service-core/src/storage/ChecksumCache.ts | 21 ++- .../src/storage/PersistedSyncRulesContent.ts | 6 +- .../src/storage/SyncRulesBucketStorage.ts | 6 +- .../src/sync/BucketChecksumState.ts | 19 ++- tsconfig.json | 3 + 15 files changed, 213 insertions(+), 175 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts index abcb15845..5ebcb4020 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoChecksums.ts @@ -3,6 +3,7 @@ import { addPartialChecksums, bson, BucketChecksum, + BucketChecksumRequest, ChecksumCache, ChecksumMap, FetchPartialBucketChecksum, @@ -13,6 +14,7 @@ import { PartialOrFullChecksum } from '@powersync/service-core'; import { PowerSyncMongo } from './db.js'; +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; /** * Checksum calculation options, primarily for tests. @@ -47,6 +49,7 @@ export class MongoChecksums { constructor( private db: PowerSyncMongo, private group_id: number, + private mapping: BucketDefinitionMapping, private options?: MongoChecksumOptions ) {} @@ -68,7 +71,7 @@ export class MongoChecksums { * Calculate checksums, utilizing the cache for partial checkums, and querying the remainder from * the database (bucket_state + bucket_data). */ - async getChecksums(checkpoint: InternalOpId, buckets: string[]): Promise { + async getChecksums(checkpoint: InternalOpId, buckets: BucketChecksumRequest[]): Promise { return this.cache.getChecksumMap(checkpoint, buckets); } @@ -92,10 +95,12 @@ export class MongoChecksums { const preFilters: any[] = []; for (let request of batch) { + const sourceId = this.mapping.bucketSourceId(request.source); + if (request.start == null) { preFilters.push({ _id: { - g: this.group_id, + g: sourceId, b: request.bucket }, 'compacted_state.op_id': { $exists: true, $lte: request.end } @@ -206,15 +211,16 @@ export class MongoChecksums { while (requests.size > 0) { const filters: any[] = []; for (let request of requests.values()) { + const sourceId = this.mapping.bucketSourceId(request.source); filters.push({ _id: { $gt: { - g: this.group_id, + g: sourceId, b: request.bucket, o: request.start ?? new bson.MinKey() }, $lte: { - g: this.group_id, + g: sourceId, b: request.bucket, o: request.end } @@ -291,7 +297,8 @@ export class MongoChecksums { requests.set(bucket, { bucket, start: doc.last_op, - end: req!.end + end: req!.end, + source: req!.source }); } else { // All done for this bucket diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoCompactor.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoCompactor.ts index 50ee3ac10..a4225096d 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoCompactor.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoCompactor.ts @@ -2,6 +2,7 @@ import { mongo, MONGO_OPERATION_TIMEOUT_MS } from '@powersync/lib-service-mongod import { logger, ReplicationAssertionError, ServiceAssertionError } from '@powersync/lib-services-framework'; import { addChecksums, + BucketChecksumRequest, InternalOpId, isPartialChecksum, PopulateChecksumCacheResults, @@ -13,6 +14,7 @@ import { PowerSyncMongo } from './db.js'; import { BucketDataDocument, BucketDataKey, BucketStateDocument } from './models.js'; import { MongoSyncBucketStorage } from './MongoSyncBucketStorage.js'; import { cacheKey } from './OperationBatch.js'; +import { BucketDataSource } from '@powersync/service-sync-rules'; interface CurrentBucketState { /** Bucket name */ @@ -509,7 +511,7 @@ export class MongoCompactor { break; } } - await this.updateChecksumsBatch(checkBuckets.map((b) => b.bucket)); + await this.updateChecksumsBatch(checkBuckets); logger.info(`Updated checksums for batch of ${checkBuckets.length} buckets in ${Date.now() - start}ms`); count += buckets.length; } @@ -525,7 +527,7 @@ export class MongoCompactor { private async dirtyBucketBatch(options: { minBucketChanges: number; exclude?: string[]; - }): Promise<{ bucket: string; estimatedCount: number }[]> { + }): Promise<{ bucket: string; estimatedCount: number; source: BucketDataSource }[]> { if (options.minBucketChanges <= 0) { throw new ReplicationAssertionError('minBucketChanges must be >= 1'); } @@ -554,15 +556,17 @@ export class MongoCompactor { return dirtyBuckets.map((bucket) => ({ bucket: bucket._id.b, - estimatedCount: bucket.estimate_since_compact!.count + (bucket.compacted_state?.count ?? 0) + estimatedCount: bucket.estimate_since_compact!.count + (bucket.compacted_state?.count ?? 0), + source: null as any // FIXME: Implement this })); } - private async updateChecksumsBatch(buckets: string[]) { + private async updateChecksumsBatch(buckets: BucketChecksumRequest[]) { const checksums = await this.storage.checksums.computePartialChecksumsDirect( buckets.map((bucket) => { return { - bucket, + bucket: bucket.bucket, + source: bucket.source, end: this.maxOpId }; }) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts index d29ce9a16..d530143fe 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoPersistedSyncRules.ts @@ -15,6 +15,7 @@ import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; export class MongoPersistedSyncRules implements storage.PersistedSyncRules { public readonly slot_name: string; + public readonly hydrationState: HydrationState; constructor( public readonly id: number, @@ -24,21 +25,19 @@ export class MongoPersistedSyncRules implements storage.PersistedSyncRules { public readonly mapping: BucketDefinitionMapping ) { this.slot_name = slot_name ?? `powersync_${id}`; - } - - hydratedSyncRules(): HydratedSyncRules { + if (!this.sync_rules.compatibility.isEnabled(CompatibilityOption.versionedBucketIds)) { + this.hydrationState = DEFAULT_HYDRATION_STATE; + } if (this.mapping == null) { - if (this.sync_rules.compatibility.isEnabled(CompatibilityOption.versionedBucketIds)) { - return this.sync_rules.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); - } else { - return this.sync_rules.hydrate({ hydrationState: versionedHydrationState(this.id) }); - } + this.hydrationState = versionedHydrationState(this.id); } else { - return this.sync_rules.hydrate({ - hydrationState: new MongoHydrationState(this.mapping) - }); + this.hydrationState = new MongoHydrationState(this.mapping); } } + + hydratedSyncRules(): HydratedSyncRules { + return this.sync_rules.hydrate({ hydrationState: this.hydrationState }); + } } class MongoHydrationState implements HydrationState { diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 715e12df1..b39b0991e 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -8,6 +8,7 @@ import { } from '@powersync/lib-services-framework'; import { BroadcastIterable, + BucketChecksumRequest, BucketDataRequest, CHECKPOINT_INVALIDATE_ALL, CheckpointChanges, @@ -83,7 +84,7 @@ export class MongoSyncBucketStorage super(); this.db = factory.db; this.mapping = this.sync_rules.mapping; - this.checksums = new MongoChecksums(this.db, this.group_id, options?.checksumOptions); + this.checksums = new MongoChecksums(this.db, this.group_id, this.mapping, options?.checksumOptions); this.writeCheckpointAPI = new MongoWriteCheckpointAPI({ db: this.db, mode: writeCheckpointMode ?? storage.WriteCheckpointMode.MANAGED, @@ -525,7 +526,7 @@ export class MongoSyncBucketStorage } } - async getChecksums(checkpoint: utils.InternalOpId, buckets: string[]): Promise { + async getChecksums(checkpoint: utils.InternalOpId, buckets: BucketChecksumRequest[]): Promise { return this.checksums.getChecksums(checkpoint, buckets); } diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index f36f124b4..bb42986b4 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -2,6 +2,7 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; import { BroadcastIterable, BucketChecksum, + BucketChecksumRequest, BucketDataRequest, CHECKPOINT_INVALIDATE_ALL, CheckpointChanges, @@ -594,7 +595,7 @@ export class PostgresSyncRulesStorage } } - async getChecksums(checkpoint: utils.InternalOpId, buckets: string[]): Promise { + async getChecksums(checkpoint: utils.InternalOpId, buckets: BucketChecksumRequest[]): Promise { return this.checksumCache.getChecksumMap(checkpoint, buckets); } diff --git a/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts b/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts index 7a05cba07..4b234877b 100644 --- a/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts +++ b/modules/module-postgres-storage/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts @@ -1,7 +1,12 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; import { ErrorCode, logger, ServiceError } from '@powersync/lib-services-framework'; import { storage } from '@powersync/service-core'; -import { CompatibilityOption, DEFAULT_HYDRATION_STATE, SqlSyncRules } from '@powersync/service-sync-rules'; +import { + CompatibilityOption, + DEFAULT_HYDRATION_STATE, + HydrationState, + SqlSyncRules +} from '@powersync/service-sync-rules'; import { models } from '../../types/types.js'; import { versionedHydrationState } from '@powersync/service-sync-rules'; @@ -33,21 +38,23 @@ export class PostgresPersistedSyncRulesContent implements storage.PersistedSyncR } parsed(options: storage.ParseSyncRulesOptions): storage.PersistedSyncRules { + let hydrationState: HydrationState; + const syncRules = SqlSyncRules.fromYaml(this.sync_rules_content, options); + if (syncRules.compatibility.isEnabled(CompatibilityOption.versionedBucketIds)) { + hydrationState = versionedHydrationState(this.id); + } else { + hydrationState = DEFAULT_HYDRATION_STATE; + } return { id: this.id, slot_name: this.slot_name, - sync_rules: SqlSyncRules.fromYaml(this.sync_rules_content, options), + sync_rules: syncRules, hydratedSyncRules() { - if (this.sync_rules.compatibility.isEnabled(CompatibilityOption.versionedBucketIds)) { - return this.sync_rules.hydrate({ - hydrationState: versionedHydrationState(this.id) - }); - } else { - return this.sync_rules.hydrate({ - hydrationState: DEFAULT_HYDRATION_STATE - }); - } - } + return syncRules.hydrate({ + hydrationState + }); + }, + hydrationState }; } diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index dc4e6582e..a131559c3 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -29,7 +29,8 @@ export function testRules(content: string): storage.PersistedSyncRulesContent { slot_name: 'test', hydratedSyncRules() { return this.sync_rules.hydrate({ hydrationState: versionedHydrationState(1) }); - } + }, + hydrationState: versionedHydrationState(1) }; }, lock() { @@ -121,20 +122,24 @@ export function querierOptions(globalParameters: RequestParameters): GetQuerierO } export function bucketRequest( - syncRules: storage.PersistedSyncRulesContent | SqlSyncRules, + syncRules: storage.PersistedSyncRulesContent, bucket?: string, start?: InternalOpId | string | number ): BucketDataRequest { - const parsed = - syncRules instanceof SqlSyncRules ? syncRules : syncRules.parsed({ defaultSchema: 'not-applicable' }).sync_rules; + const parsed = syncRules.parsed({ defaultSchema: 'not-applicable' }); + const hydrationState = parsed.hydrationState; bucket ??= 'global[]'; const definitionName = bucket.substring(0, bucket.indexOf('[')); - const source = parsed.bucketDataSources.find((b) => b.uniqueName === definitionName); + const parameters = bucket.substring(bucket.indexOf('[')); + const source = parsed.sync_rules.bucketDataSources.find((b) => b.uniqueName === definitionName); + if (source == null) { throw new Error('Failed to find global bucket'); } + const bucketName = hydrationState.getBucketSourceScope(source).bucketPrefix + parameters; + console.log('query for bucket', bucketName); return { - bucket, + bucket: bucketName, start: BigInt(start ?? 0n), source: source }; diff --git a/packages/service-core-tests/src/tests/register-compacting-tests.ts b/packages/service-core-tests/src/tests/register-compacting-tests.ts index e497e5c47..4475d4b7e 100644 --- a/packages/service-core-tests/src/tests/register-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-compacting-tests.ts @@ -17,6 +17,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + const request = bucketRequest(syncRules); const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { await batch.markAllSnapshotDone('1/1'); @@ -51,12 +52,11 @@ bucket_definitions: }); const checkpoint = result!.flushed_op; + const request2 = bucketRequest(syncRules); - const batchBefore = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)]) - ); + const batchBefore = await test_utils.oneFromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request2])); const dataBefore = batchBefore.chunkData.data; - const checksumBefore = await bucketStorage.getChecksums(checkpoint, ['global[]']); + const checksumBefore = await bucketStorage.getChecksums(checkpoint, [request2]); expect(dataBefore).toMatchObject([ { @@ -84,13 +84,11 @@ bucket_definitions: minBucketChanges: 1 }); - const batchAfter = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)]) - ); + const batchAfter = await test_utils.oneFromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const dataAfter = batchAfter.chunkData.data; - const checksumAfter = await bucketStorage.getChecksums(checkpoint, ['global[]']); + const checksumAfter = await bucketStorage.getChecksums(checkpoint, [request]); bucketStorage.clearChecksumCache(); - const checksumAfter2 = await bucketStorage.getChecksums(checkpoint, ['global[]']); + const checksumAfter2 = await bucketStorage.getChecksums(checkpoint, [request]); expect(batchAfter.targetOp).toEqual(3n); expect(dataAfter).toMatchObject([ @@ -108,8 +106,8 @@ bucket_definitions: } ]); - expect(checksumAfter.get('global[]')).toEqual(checksumBefore.get('global[]')); - expect(checksumAfter2.get('global[]')).toEqual(checksumBefore.get('global[]')); + expect(checksumAfter.get(request.bucket)).toEqual(checksumBefore.get(request.bucket)); + expect(checksumAfter2.get(request.bucket)).toEqual(checksumBefore.get(request.bucket)); test_utils.validateCompactedBucket(dataBefore, dataAfter); }); @@ -167,12 +165,11 @@ bucket_definitions: }); const checkpoint = result!.flushed_op; + const request = bucketRequest(syncRules); - const batchBefore = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)]) - ); + const batchBefore = await test_utils.oneFromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const dataBefore = batchBefore.chunkData.data; - const checksumBefore = await bucketStorage.getChecksums(checkpoint, ['global[]']); + const checksumBefore = await bucketStorage.getChecksums(checkpoint, [request]); // op_id sequence depends on the storage implementation expect(dataBefore).toMatchObject([ @@ -201,12 +198,10 @@ bucket_definitions: minBucketChanges: 1 }); - const batchAfter = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)]) - ); + const batchAfter = await test_utils.oneFromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const dataAfter = batchAfter.chunkData.data; bucketStorage.clearChecksumCache(); - const checksumAfter = await bucketStorage.getChecksums(checkpoint, ['global[]']); + const checksumAfter = await bucketStorage.getChecksums(checkpoint, [request]); expect(batchAfter.targetOp).toBeLessThanOrEqual(checkpoint); expect(dataAfter).toMatchObject([ @@ -223,8 +218,8 @@ bucket_definitions: op: 'PUT' } ]); - expect(checksumAfter.get('global[]')).toEqual({ - ...checksumBefore.get('global[]'), + expect(checksumAfter.get(request.bucket)).toEqual({ + ...checksumBefore.get(request.bucket), count: 2 }); @@ -275,7 +270,8 @@ bucket_definitions: }); const checkpoint1 = result!.flushed_op; - await bucketStorage.getChecksums(checkpoint1, ['global[]']); + const request = bucketRequest(syncRules); + await bucketStorage.getChecksums(checkpoint1, [request]); const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { await batch.save({ @@ -297,20 +293,18 @@ bucket_definitions: minBucketChanges: 1 }); - const batchAfter = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint2, [bucketRequest(syncRules)]) - ); + const batchAfter = await test_utils.oneFromAsync(bucketStorage.getBucketDataBatch(checkpoint2, [request])); const dataAfter = batchAfter.chunkData.data; await bucketStorage.clearChecksumCache(); - const checksumAfter = await bucketStorage.getChecksums(checkpoint2, ['global[]']); + const checksumAfter = await bucketStorage.getChecksums(checkpoint2, [request]); expect(dataAfter).toMatchObject([ { op: 'CLEAR' } ]); - expect(checksumAfter.get('global[]')).toEqual({ - bucket: 'global[]', + expect(checksumAfter.get(request.bucket)).toEqual({ + bucket: request.bucket, count: 1, checksum: dataAfter[0].checksum }); @@ -408,12 +402,9 @@ bucket_definitions: minBucketChanges: 1 }); - const batchAfter = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint, [ - bucketRequest(syncRules, 'grouped["b1"]', 0n), - bucketRequest(syncRules, 'grouped["b2"]', 0n) - ]) - ); + const groupedBuckets = ['b1', 'b2']; + const groupedRequests = groupedBuckets.map((bucket) => bucketRequest(syncRules, `grouped["${bucket}"]`, 0n)); + const batchAfter = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, groupedRequests)); const dataAfter = batchAfter.flatMap((b) => b.chunkData.data); // The op_ids will vary between MongoDB and Postgres storage @@ -506,9 +497,10 @@ bucket_definitions: }); const checkpoint2 = result2!.flushed_op; await bucketStorage.clearChecksumCache(); - const checksumAfter = await bucketStorage.getChecksums(checkpoint2, ['global[]']); - expect(checksumAfter.get('global[]')).toMatchObject({ - bucket: 'global[]', + const request = bucketRequest(syncRules); + const checksumAfter = await bucketStorage.getChecksums(checkpoint2, [request]); + expect(checksumAfter.get(request.bucket)).toMatchObject({ + bucket: request.bucket, count: 4 }); }); @@ -523,6 +515,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + const request = bucketRequest(syncRules); const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { await batch.markAllSnapshotDone('1/1'); @@ -548,7 +541,7 @@ bucket_definitions: }); // Get checksums here just to populate the cache - await bucketStorage.getChecksums(result!.flushed_op, ['global[]']); + await bucketStorage.getChecksums(result!.flushed_op, [request]); const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { await batch.save({ sourceTable: TEST_TABLE, @@ -569,9 +562,9 @@ bucket_definitions: }); const checkpoint2 = result2!.flushed_op; - const checksumAfter = await bucketStorage.getChecksums(checkpoint2, ['global[]']); - expect(checksumAfter.get('global[]')).toMatchObject({ - bucket: 'global[]', + const checksumAfter = await bucketStorage.getChecksums(checkpoint2, [request]); + expect(checksumAfter.get(request.bucket)).toMatchObject({ + bucket: request.bucket, count: 1 }); }); diff --git a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts index acd4d540d..3b778158f 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts @@ -46,7 +46,6 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - bucketStorage.getParsedSyncRules; await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { const sourceTable = TEST_TABLE; @@ -71,7 +70,8 @@ bucket_definitions: const { checkpoint } = await bucketStorage.getCheckpoint(); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); + const request = bucketRequest(syncRules); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -88,10 +88,10 @@ bucket_definitions: { op: 'REMOVE', object_id: 'test1', checksum: c2 } ]); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; expect(checksums).toEqual([ { - bucket: 'global[]', + bucket: request.bucket, checksum: (c1 + c2) & 0xffffffff, count: 2 } @@ -140,7 +140,8 @@ bucket_definitions: const { checkpoint } = await bucketStorage.getCheckpoint(); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); + const request = bucketRequest(syncRules); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -153,10 +154,10 @@ bucket_definitions: expect(data).toEqual([{ op: 'PUT', object_id: 'test1', checksum: c1 }]); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; expect(checksums).toEqual([ { - bucket: 'global[]', + bucket: request.bucket, checksum: c1 & 0xffffffff, count: 1 } @@ -209,8 +210,9 @@ bucket_definitions: }); const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -223,10 +225,10 @@ bucket_definitions: expect(data).toEqual([{ op: 'PUT', object_id: 'test1', checksum: c1 }]); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; expect(checksums).toEqual([ { - bucket: 'global[]', + bucket: request.bucket, checksum: c1 & 0xffffffff, count: 1 } @@ -267,8 +269,9 @@ bucket_definitions: }); const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -281,10 +284,10 @@ bucket_definitions: expect(data).toEqual([{ op: 'PUT', object_id: 'test1', checksum: c1 }]); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; expect(checksums).toEqual([ { - bucket: 'global[]', + bucket: request.bucket, checksum: c1 & 0xffffffff, count: 1 } @@ -407,8 +410,9 @@ bucket_definitions: }); const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const data = batch[0].chunkData.data.map((d) => { return { op: d.op, @@ -425,10 +429,10 @@ bucket_definitions: { op: 'REMOVE', object_id: 'test1', checksum: c2 } ]); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; expect(checksums).toEqual([ { - bucket: 'global[]', + bucket: request.bucket, checksum: (c1 + c2) & 0xffffffff, count: 2 } @@ -529,8 +533,9 @@ bucket_definitions: }); const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); - const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request])); const data = batch[0].chunkData.data.map((d) => { return { @@ -550,10 +555,10 @@ bucket_definitions: { op: 'REMOVE', object_id: 'test1', checksum: c2 } ]); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; expect(checksums).toEqual([ { - bucket: 'global[]', + bucket: request.bucket, checksum: (c1 + c1 + c1 + c2) & 0xffffffff, count: 4 } @@ -689,10 +694,9 @@ bucket_definitions: }); const checkpoint2 = result2!.flushed_op; + const request = bucketRequest(syncRules, 'global[]', checkpoint1); - const batch = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint2, [bucketRequest(syncRules, 'global[]', checkpoint1)]) - ); + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint2, [request])); const data = batch[0].chunkData.data.map((d) => { return { @@ -790,9 +794,10 @@ bucket_definitions: }); const checkpoint3 = result3!.flushed_op; + const request = bucketRequest(syncRules); const batch = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint3, [bucketRequest(syncRules, 'global[]', checkpoint1)]) + bucketStorage.getBucketDataBatch(checkpoint3, [{ ...request, start: checkpoint1 }]) ); const data = batch[0].chunkData.data.map((d) => { return { @@ -899,9 +904,10 @@ bucket_definitions: }); const checkpoint3 = result3!.flushed_op; + const request = bucketRequest(syncRules); const batch = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint3, [bucketRequest(syncRules, 'global[]', checkpoint1)]) + bucketStorage.getBucketDataBatch(checkpoint3, [{ ...request, start: checkpoint1 }]) ); const data = batch[0].chunkData.data.map((d) => { return { @@ -998,14 +1004,13 @@ bucket_definitions: }); const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); const options: storage.BucketDataBatchOptions = { chunkLimitBytes: 16 * 1024 * 1024 }; - const batch1 = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)], options) - ); + const batch1 = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request], options)); expect(test_utils.getBatchData(batch1)).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 }, { op_id: '2', op: 'PUT', object_id: 'large1', checksum: 454746904 } @@ -1019,7 +1024,7 @@ bucket_definitions: const batch2 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - [bucketRequest(syncRules, 'global[]', BigInt(batch1[0].chunkData.next_after))], + [{ ...request, start: BigInt(batch1[0].chunkData.next_after) }], options ) ); @@ -1036,7 +1041,7 @@ bucket_definitions: const batch3 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - [bucketRequest(syncRules, 'global[]', BigInt(batch2[0].chunkData.next_after))], + [{ ...request, start: BigInt(batch2[0].chunkData.next_after) }], options ) ); @@ -1077,10 +1082,9 @@ bucket_definitions: }); const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); - const batch1 = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)], { limit: 4 }) - ); + const batch1 = await test_utils.oneFromAsync(bucketStorage.getBucketDataBatch(checkpoint, [request], { limit: 4 })); expect(test_utils.getBatchData(batch1)).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 }, @@ -1096,13 +1100,9 @@ bucket_definitions: }); const batch2 = await test_utils.oneFromAsync( - bucketStorage.getBucketDataBatch( - checkpoint, - [bucketRequest(syncRules, 'global[]', batch1.chunkData.next_after)], - { - limit: 4 - } - ) + bucketStorage.getBucketDataBatch(checkpoint, [{ ...request, start: BigInt(batch1.chunkData.next_after) }], { + limit: 4 + }) ); expect(test_utils.getBatchData(batch2)).toEqual([ { op_id: '5', op: 'PUT', object_id: 'test5', checksum: 3686902721 }, @@ -1116,13 +1116,9 @@ bucket_definitions: }); const batch3 = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch( - checkpoint, - [bucketRequest(syncRules, 'global[]', batch2.chunkData.next_after)], - { - limit: 4 - } - ) + bucketStorage.getBucketDataBatch(checkpoint, [{ ...request, start: BigInt(batch2.chunkData.next_after) }], { + limit: 4 + }) ); expect(test_utils.getBatchData(batch3)).toEqual([]); @@ -1166,21 +1162,20 @@ bucket_definitions: }); const { checkpoint } = await bucketStorage.getCheckpoint(); - return await test_utils.fromAsync( - bucketStorage.getBucketDataBatch( - checkpoint, - [bucketRequest(syncRules, 'global1[]', 0n), bucketRequest(syncRules, 'global2[]', 0n)], - options - ) + const global1Request = bucketRequest(syncRules, 'global1[]', 0n); + const global2Request = bucketRequest(syncRules, 'global2[]', 0n); + const batch = await test_utils.fromAsync( + bucketStorage.getBucketDataBatch(checkpoint, [global1Request, global2Request], options) ); + return { batch, global1Request, global2Request }; }; test('batch has_more (1)', async () => { - const batch = await setup({ limit: 5 }); + const { batch, global1Request, global2Request } = await setup({ limit: 5 }); expect(batch.length).toEqual(2); - expect(batch[0].chunkData.bucket).toEqual('global1[]'); - expect(batch[1].chunkData.bucket).toEqual('global2[]'); + expect(batch[0].chunkData.bucket).toEqual(global1Request.bucket); + expect(batch[1].chunkData.bucket).toEqual(global2Request.bucket); expect(test_utils.getBatchData(batch[0])).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 } @@ -1207,11 +1202,11 @@ bucket_definitions: }); test('batch has_more (2)', async () => { - const batch = await setup({ limit: 11 }); + const { batch, global1Request, global2Request } = await setup({ limit: 11 }); expect(batch.length).toEqual(2); - expect(batch[0].chunkData.bucket).toEqual('global1[]'); - expect(batch[1].chunkData.bucket).toEqual('global2[]'); + expect(batch[0].chunkData.bucket).toEqual(global1Request.bucket); + expect(batch[1].chunkData.bucket).toEqual(global2Request.bucket); expect(test_utils.getBatchData(batch[0])).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 } @@ -1244,12 +1239,12 @@ bucket_definitions: test('batch has_more (3)', async () => { // 50 bytes is more than 1 row, less than 2 rows - const batch = await setup({ limit: 3, chunkLimitBytes: 50 }); + const { batch, global1Request, global2Request } = await setup({ limit: 3, chunkLimitBytes: 50 }); expect(batch.length).toEqual(3); - expect(batch[0].chunkData.bucket).toEqual('global1[]'); - expect(batch[1].chunkData.bucket).toEqual('global2[]'); - expect(batch[2].chunkData.bucket).toEqual('global2[]'); + expect(batch[0].chunkData.bucket).toEqual(global1Request.bucket); + expect(batch[1].chunkData.bucket).toEqual(global2Request.bucket); + expect(batch[2].chunkData.bucket).toEqual(global2Request.bucket); expect(test_utils.getBatchData(batch[0])).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 } @@ -1382,11 +1377,12 @@ bucket_definitions: await batch.commit('1/1'); }); const { checkpoint } = await bucketStorage.getCheckpoint(); + const request = bucketRequest(syncRules); - const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; - expect(checksums).toEqual([{ bucket: 'global[]', checksum: 1917136889, count: 1 }]); - const checksums2 = [...(await bucketStorage.getChecksums(checkpoint + 1n, ['global[]'])).values()]; - expect(checksums2).toEqual([{ bucket: 'global[]', checksum: 1917136889, count: 1 }]); + const checksums = [...(await bucketStorage.getChecksums(checkpoint, [request])).values()]; + expect(checksums).toEqual([{ bucket: request.bucket, checksum: 1917136889, count: 1 }]); + const checksums2 = [...(await bucketStorage.getChecksums(checkpoint + 1n, [request])).values()]; + expect(checksums2).toEqual([{ bucket: request.bucket, checksum: 1917136889, count: 1 }]); }); testChecksumBatching(config); @@ -1570,14 +1566,17 @@ bucket_definitions: const { checkpoint } = await bucketStorage.getCheckpoint(); bucketStorage.clearChecksumCache(); - const buckets = ['user["u1"]', 'user["u2"]', 'user["u3"]', 'user["u4"]']; - const checksums = [...(await bucketStorage.getChecksums(checkpoint, buckets)).values()]; + const users = ['u1', 'u2', 'u3', 'u4']; + const expectedChecksums = [346204588, 5261081, 134760718, -302639724]; + const bucketRequests = users.map((user) => bucketRequest(syncRules, `user["${user}"]`)); + const checksums = [...(await bucketStorage.getChecksums(checkpoint, bucketRequests)).values()]; checksums.sort((a, b) => a.bucket.localeCompare(b.bucket)); - expect(checksums).toEqual([ - { bucket: 'user["u1"]', count: 4, checksum: 346204588 }, - { bucket: 'user["u2"]', count: 4, checksum: 5261081 }, - { bucket: 'user["u3"]', count: 4, checksum: 134760718 }, - { bucket: 'user["u4"]', count: 4, checksum: -302639724 } - ]); + const expected = bucketRequests.map((request, index) => ({ + bucket: request.bucket, + count: 4, + checksum: expectedChecksums[index] + })); + expected.sort((a, b) => a.bucket.localeCompare(b.bucket)); + expect(checksums).toEqual(expected); }); } diff --git a/packages/service-core/src/routes/endpoints/admin.ts b/packages/service-core/src/routes/endpoints/admin.ts index 7b8b74e7b..852e44b84 100644 --- a/packages/service-core/src/routes/endpoints/admin.ts +++ b/packages/service-core/src/routes/endpoints/admin.ts @@ -180,7 +180,8 @@ export const validate = routeDefinition({ }), hydratedSyncRules() { return this.sync_rules.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); - } + }, + hydrationState: DEFAULT_HYDRATION_STATE }; }, sync_rules_content: content, diff --git a/packages/service-core/src/storage/ChecksumCache.ts b/packages/service-core/src/storage/ChecksumCache.ts index 2e5ac1c22..e315d61e8 100644 --- a/packages/service-core/src/storage/ChecksumCache.ts +++ b/packages/service-core/src/storage/ChecksumCache.ts @@ -2,6 +2,8 @@ import { OrderedSet } from '@js-sdsl/ordered-set'; import { LRUCache } from 'lru-cache/min'; import { BucketChecksum } from '../util/protocol-types.js'; import { addBucketChecksums, ChecksumMap, InternalOpId, PartialChecksum } from '../util/utils.js'; +import { BucketChecksumRequest } from './SyncRulesBucketStorage.js'; +import { BucketDataSource } from '@powersync/service-sync-rules'; interface ChecksumFetchContext { fetch(bucket: string): Promise; @@ -10,6 +12,7 @@ interface ChecksumFetchContext { export interface FetchPartialBucketChecksum { bucket: string; + source: BucketDataSource; start?: InternalOpId; end: InternalOpId; } @@ -113,10 +116,10 @@ export class ChecksumCache { this.bucketCheckpoints.clear(); } - async getChecksums(checkpoint: InternalOpId, buckets: string[]): Promise { + async getChecksums(checkpoint: InternalOpId, buckets: BucketChecksumRequest[]): Promise { const checksums = await this.getChecksumMap(checkpoint, buckets); // Return results in the same order as the request - return buckets.map((bucket) => checksums.get(bucket)!); + return buckets.map((bucket) => checksums.get(bucket.bucket)!); } /** @@ -126,7 +129,7 @@ export class ChecksumCache { * * @returns a Map with exactly one entry for each bucket requested */ - async getChecksumMap(checkpoint: InternalOpId, buckets: string[]): Promise { + async getChecksumMap(checkpoint: InternalOpId, buckets: BucketChecksumRequest[]): Promise { // Buckets that don't have a cached checksum for this checkpoint yet let toFetch = new Set(); @@ -163,20 +166,21 @@ export class ChecksumCache { // One promise to await to ensure all fetch requests completed. let settledPromise: Promise[]> | null = null; + let sourceMap = new Map(); try { // Individual cache fetch promises let cacheFetchPromises: Promise[] = []; for (let bucket of buckets) { - const cacheKey = makeCacheKey(checkpoint, bucket); + const cacheKey = makeCacheKey(checkpoint, bucket.bucket); let status: LRUCache.Status = {}; const p = this.cache.fetch(cacheKey, { context: context, status: status }).then((checksums) => { if (checksums == null) { // Should never happen throw new Error(`Failed to get checksums for ${cacheKey}`); } - finalResults.set(bucket, checksums); + finalResults.set(bucket.bucket, checksums); }); cacheFetchPromises.push(p); if (status.fetch == 'hit' || status.fetch == 'inflight') { @@ -185,7 +189,8 @@ export class ChecksumCache { // In either case, we don't need to fetch a new checksum. } else { // We need a new request for this checksum. - toFetch.add(bucket); + toFetch.add(bucket.bucket); + sourceMap.set(bucket.bucket, bucket.source); } } // We do this directly after creating the promises, otherwise @@ -220,6 +225,7 @@ export class ChecksumCache { // Partial checksum found - make a partial checksum request bucketRequest = { bucket, + source: sourceMap.get(bucket)!, start: cp, end: checkpoint }; @@ -240,7 +246,8 @@ export class ChecksumCache { // No partial checksum found - make a new full checksum request bucketRequest = { bucket, - end: checkpoint + end: checkpoint, + source: sourceMap.get(bucket)! }; add.set(bucket, { bucket, diff --git a/packages/service-core/src/storage/PersistedSyncRulesContent.ts b/packages/service-core/src/storage/PersistedSyncRulesContent.ts index dd3922764..9b5232a08 100644 --- a/packages/service-core/src/storage/PersistedSyncRulesContent.ts +++ b/packages/service-core/src/storage/PersistedSyncRulesContent.ts @@ -1,4 +1,4 @@ -import { SqlSyncRules, HydratedSyncRules } from '@powersync/service-sync-rules'; +import { SqlSyncRules, HydratedSyncRules, HydrationState } from '@powersync/service-sync-rules'; import { ReplicationLock } from './ReplicationLock.js'; export interface ParseSyncRulesOptions { @@ -30,6 +30,10 @@ export interface PersistedSyncRules { readonly id: number; readonly sync_rules: SqlSyncRules; readonly slot_name: string; + /** + * For testing only. + */ + readonly hydrationState: HydrationState; hydratedSyncRules(): HydratedSyncRules; } diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index 813855bb0..1f965ef37 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -132,7 +132,7 @@ export interface SyncRulesBucketStorage * This may be slow, depending on the size of the buckets. * The checksums are cached internally to compensate for this, but does not cover all cases. */ - getChecksums(checkpoint: util.InternalOpId, buckets: string[]): Promise; + getChecksums(checkpoint: util.InternalOpId, buckets: BucketChecksumRequest[]): Promise; /** * Clear checksum cache. Primarily intended for tests. @@ -149,6 +149,10 @@ export interface BucketDataRequest { start: util.InternalOpId; source: BucketDataSource; } +export interface BucketChecksumRequest { + bucket: string; + source: BucketDataSource; +} export interface SyncRuleStatus { checkpoint_lsn: string | null; diff --git a/packages/service-core/src/sync/BucketChecksumState.ts b/packages/service-core/src/sync/BucketChecksumState.ts index 4b6a7b155..d85e0c3e4 100644 --- a/packages/service-core/src/sync/BucketChecksumState.ts +++ b/packages/service-core/src/sync/BucketChecksumState.ts @@ -138,20 +138,23 @@ export class BucketChecksumState { } // Re-check updated buckets only - let checksumLookups: string[] = []; + let checksumLookups: storage.BucketChecksumRequest[] = []; let newChecksums = new Map(); - for (let bucket of bucketDescriptionMap.keys()) { - if (!updatedBuckets.has(bucket)) { - const existing = this.lastChecksums.get(bucket); + for (let desc of bucketDescriptionMap.values()) { + if (!updatedBuckets.has(desc.bucket)) { + const existing = this.lastChecksums.get(desc.bucket); if (existing == null) { // If this happens, it means updatedBuckets did not correctly include all new buckets - throw new ServiceAssertionError(`Existing checksum not found for bucket ${bucket}`); + throw new ServiceAssertionError(`Existing checksum not found for bucket ${desc}`); } // Bucket is not specifically updated, and we have a previous checksum - newChecksums.set(bucket, existing); + newChecksums.set(desc.bucket, existing); } else { - checksumLookups.push(bucket); + checksumLookups.push({ + bucket: desc.bucket, + source: desc[SOURCE] + }); } } @@ -164,7 +167,7 @@ export class BucketChecksumState { checksumMap = newChecksums; } else { // Re-check all buckets - const bucketList = [...bucketDescriptionMap.keys()]; + const bucketList = [...bucketDescriptionMap.values()].map((b) => ({ bucket: b.bucket, source: b[SOURCE] })); checksumMap = await storage.getChecksums(base.checkpoint, bucketList); } diff --git a/tsconfig.json b/tsconfig.json index 78586205e..1947504ba 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -13,6 +13,9 @@ { "path": "./packages/service-core" }, + { + "path": "./packages/service-core-tests" + }, { "path": "./packages/service-errors" }, From 99bca49c133fcd3d99136e1568917bd3eed3af4c Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 13 Jan 2026 11:29:11 +0200 Subject: [PATCH 020/101] Refactor current_data. --- .../implementation/MongoBucketBatch.ts | 10 +++---- .../implementation/MongoSyncBucketStorage.ts | 26 ++++++++++++++----- .../storage/implementation/PersistedBatch.ts | 4 +-- .../src/storage/implementation/models.ts | 2 +- 4 files changed, 27 insertions(+), 15 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 0074c4754..6efab50f8 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -216,7 +216,7 @@ export class MongoBucketBatch // the order of processing, which then becomes really tricky to manage. // This now takes 2+ queries, but doesn't have any issues with order of operations. const sizeLookups: SourceKey[] = batch.batch.map((r) => { - return { g: this.group_id, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId }; + return { g: 0, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId }; }); sizes = new Map(); @@ -259,7 +259,7 @@ export class MongoBucketBatch continue; } const lookups: SourceKey[] = b.map((r) => { - return { g: this.group_id, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId }; + return { g: 0, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId }; }); let current_data_lookup = new Map(); // With skipExistingRows, we only need to know whether or not the row exists. @@ -340,7 +340,7 @@ export class MongoBucketBatch let existing_lookups: bson.Binary[] = []; let new_lookups: bson.Binary[] = []; - const before_key: SourceKey = { g: this.group_id, t: mongoTableId(record.sourceTable.id), k: beforeId }; + const before_key: SourceKey = { g: 0, t: mongoTableId(record.sourceTable.id), k: beforeId }; if (this.skipExistingRows) { if (record.tag == SaveOperationTag.INSERT) { @@ -553,7 +553,7 @@ export class MongoBucketBatch // 5. TOAST: Update current data and bucket list. if (afterId) { // Insert or update - const after_key: SourceKey = { g: this.group_id, t: mongoTableId(sourceTable.id), k: afterId }; + const after_key: SourceKey = { g: 0, t: mongoTableId(sourceTable.id), k: afterId }; batch.upsertCurrentData(after_key, { data: afterData, buckets: new_buckets, @@ -1018,7 +1018,7 @@ export class MongoBucketBatch while (lastBatchCount == BATCH_LIMIT) { await this.withReplicationTransaction(`Truncate ${sourceTable.qualifiedName}`, async (session, opSeq) => { const current_data_filter: mongo.Filter = { - _id: idPrefixFilter({ g: this.group_id, t: mongoTableId(sourceTable.id) }, ['k']), + _id: idPrefixFilter({ g: 0, t: mongoTableId(sourceTable.id) }, ['k']), // Skip soft-deleted data pending_delete: { $exists: false } }; diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index b39b0991e..1639a672b 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -603,6 +603,7 @@ export class MongoSyncBucketStorage ); if (doc?.rule_mapping != null) { + // TODO: Only delete if not used by other sync rules for (let [name, id] of Object.entries(doc.rule_mapping.definitions)) { await this.retriedDelete(`deleting bucket data for ${name}`, signal, () => this.db.bucket_data.deleteMany( @@ -643,11 +644,10 @@ export class MongoSyncBucketStorage ) ); - // FIXME: handle refactored current_data structure await this.retriedDelete('deleting current data records', signal, () => this.db.current_data.deleteMany( { - _id: idPrefixFilter({ g: this.group_id }, ['t', 'k']) + _id: idPrefixFilter({ g: this.group_id as any }, ['t', 'k']) }, { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } ) @@ -665,15 +665,27 @@ export class MongoSyncBucketStorage // First remove the reference this.db.source_tables.updateMany({ sync_rules_ids: this.group_id }, { $pull: { sync_rules_ids: this.group_id } }); - // Then delete any source tables no longer referenced - await this.retriedDelete('deleting source table records', signal, () => - this.db.source_tables.deleteMany( + // Then delete the data associated with unreferenced source tables + const tables = await this.db.source_tables + .find( { sync_rules_ids: [] }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + { projection: { _id: 1 } } ) - ); + .toArray(); + + for (let table of tables) { + await this.retriedDelete('deleting current data records for table', signal, () => + this.db.current_data.deleteMany( + { + _id: idPrefixFilter({ g: 0, t: table._id }, ['k']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) + ); + await this.db.source_tables.deleteOne({ _id: table._id }); // Delete the source table record itself + } } private async retriedDelete( diff --git a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts index c8b428ae1..5278146ff 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts @@ -218,7 +218,7 @@ export class PersistedBatch { document: { _id: op_id, key: { - g: this.group_id, + g: 0, t: mongoTableId(sourceTable.id), k: sourceKey }, @@ -240,7 +240,7 @@ export class PersistedBatch { document: { _id: op_id, key: { - g: this.group_id, + g: 0, t: mongoTableId(sourceTable.id), k: sourceKey }, diff --git a/modules/module-mongodb-storage/src/storage/implementation/models.ts b/modules/module-mongodb-storage/src/storage/implementation/models.ts index 1b8852365..b80e26ddc 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/models.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/models.ts @@ -14,7 +14,7 @@ export type ReplicaId = bson.UUID | bson.Document | any; export interface SourceKey { /** group_id */ - g: number; + g: 0; /** source table id */ t: bson.ObjectId; /** source key */ From 017e6dd190f7ba928a24e95647bfc4fccf4a7152 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 13 Jan 2026 11:43:02 +0200 Subject: [PATCH 021/101] Fix more tests. --- .../test/src/checksum_cache.test.ts | 159 +++++++++++------- .../test/src/sync/BucketChecksumState.test.ts | 11 +- 2 files changed, 108 insertions(+), 62 deletions(-) diff --git a/packages/service-core/test/src/checksum_cache.test.ts b/packages/service-core/test/src/checksum_cache.test.ts index f0b61342c..a9cc56aaf 100644 --- a/packages/service-core/test/src/checksum_cache.test.ts +++ b/packages/service-core/test/src/checksum_cache.test.ts @@ -1,7 +1,9 @@ import { ChecksumCache, FetchChecksums, FetchPartialBucketChecksum } from '@/storage/ChecksumCache.js'; import { addChecksums, BucketChecksum, InternalOpId, PartialChecksum } from '@/util/util-index.js'; +import { BucketDataSource } from '@powersync/service-sync-rules'; import * as crypto from 'node:crypto'; import { describe, expect, it } from 'vitest'; +import { removeSource } from './sync/BucketChecksumState.test.js'; /** * Create a deterministic BucketChecksum based on the bucket name and checkpoint for testing purposes. @@ -67,6 +69,12 @@ describe('checksum cache', function () { return new ChecksumCache({ fetchChecksums: fetch }); }; + const DUMMY_SOURCE: BucketDataSource = null as any; + + function removeLookupSources(lookups: FetchPartialBucketChecksum[]) { + return lookups.map((b) => removeSource(b)); + } + it('should handle a sequential lookups (a)', async function () { let lookups: FetchPartialBucketChecksum[][] = []; const cache = factory(async (batch) => { @@ -74,13 +82,13 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); - expect(await cache.getChecksums(1234n, ['test'])).toEqual([TEST_1234]); + expect(await cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_1234]); - expect(await cache.getChecksums(123n, ['test2'])).toEqual([TEST2_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test2', source: DUMMY_SOURCE }])).toEqual([TEST2_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toMatchObject([ [{ bucket: 'test', end: 123n }], // This should use the previous lookup [{ bucket: 'test', start: 123n, end: 1234n }], @@ -96,13 +104,13 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test2'])).toEqual([TEST2_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test2', source: DUMMY_SOURCE }])).toEqual([TEST2_123]); - expect(await cache.getChecksums(1234n, ['test'])).toEqual([TEST_1234]); + expect(await cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_1234]); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ // With this order, there is no option for a partial lookup [{ bucket: 'test2', end: 123n }], [{ bucket: 'test', end: 1234n }], @@ -117,16 +125,16 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - const p1 = cache.getChecksums(123n, ['test']); - const p2 = cache.getChecksums(1234n, ['test']); - const p3 = cache.getChecksums(123n, ['test2']); + const p1 = cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }]); + const p2 = cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }]); + const p3 = cache.getChecksums(123n, [{ bucket: 'test2', source: DUMMY_SOURCE }]); expect(await p1).toEqual([TEST_123]); expect(await p2).toEqual([TEST_1234]); expect(await p3).toEqual([TEST2_123]); // Concurrent requests, so we can't do a partial lookup for 123 -> 1234 - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ [{ bucket: 'test', end: 123n }], [{ bucket: 'test', end: 1234n }], [{ bucket: 'test2', end: 123n }] @@ -140,15 +148,15 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - const p1 = cache.getChecksums(123n, ['test']); - const p2 = cache.getChecksums(123n, ['test']); + const p1 = cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }]); + const p2 = cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }]); expect(await p1).toEqual([TEST_123]); expect(await p2).toEqual([TEST_123]); // The lookup should be deduplicated, even though it's in progress - expect(lookups).toEqual([[{ bucket: 'test', end: 123n }]]); + expect(lookups.map(removeLookupSources)).toEqual([[{ bucket: 'test', end: 123n }]]); }); it('should handle serial + concurrent lookups', async function () { @@ -158,15 +166,15 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); - const p2 = cache.getChecksums(1234n, ['test']); - const p3 = cache.getChecksums(1234n, ['test']); + const p2 = cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }]); + const p3 = cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }]); expect(await p2).toEqual([TEST_1234]); expect(await p3).toEqual([TEST_1234]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ [{ bucket: 'test', end: 123n }], // This lookup is deduplicated [{ bucket: 'test', start: 123n, end: 1234n }] @@ -180,9 +188,14 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test', 'test2'])).toEqual([TEST_123, TEST2_123]); + expect( + await cache.getChecksums(123n, [ + { bucket: 'test', source: DUMMY_SOURCE }, + { bucket: 'test2', source: DUMMY_SOURCE } + ]) + ).toEqual([TEST_123, TEST2_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ [ // Both lookups in the same request { bucket: 'test', end: 123n }, @@ -198,10 +211,15 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); - expect(await cache.getChecksums(123n, ['test', 'test2'])).toEqual([TEST_123, TEST2_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); + expect( + await cache.getChecksums(123n, [ + { bucket: 'test', source: DUMMY_SOURCE }, + { bucket: 'test2', source: DUMMY_SOURCE } + ]) + ).toEqual([TEST_123, TEST2_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ // Request 1 [{ bucket: 'test', end: 123n }], // Request 2 @@ -216,13 +234,19 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - const a = cache.getChecksums(123n, ['test', 'test2']); - const b = cache.getChecksums(123n, ['test2', 'test3']); + const a = cache.getChecksums(123n, [ + { bucket: 'test', source: DUMMY_SOURCE }, + { bucket: 'test2', source: DUMMY_SOURCE } + ]); + const b = cache.getChecksums(123n, [ + { bucket: 'test2', source: DUMMY_SOURCE }, + { bucket: 'test3', source: DUMMY_SOURCE } + ]); expect(await a).toEqual([TEST_123, TEST2_123]); expect(await b).toEqual([TEST2_123, TEST3_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ // Request A [ { bucket: 'test', end: 123n }, @@ -240,9 +264,9 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); - expect(await cache.getChecksums(125n, ['test'])).toEqual([ + expect(await cache.getChecksums(125n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: -1865121912, @@ -250,14 +274,14 @@ describe('checksum cache', function () { } ]); - expect(await cache.getChecksums(124n, ['test'])).toEqual([ + expect(await cache.getChecksums(124n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: 1887460431, count: 124 } ]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ [{ bucket: 'test', end: 123n }], [{ bucket: 'test', start: 123n, end: 125n }], [{ bucket: 'test', start: 123n, end: 124n }] @@ -275,19 +299,31 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - const a = cache.getChecksums(123n, ['test', 'test2']); - const b = cache.getChecksums(123n, ['test2', 'test3']); + const a = cache.getChecksums(123n, [ + { bucket: 'test', source: DUMMY_SOURCE }, + { bucket: 'test2', source: DUMMY_SOURCE } + ]); + const b = cache.getChecksums(123n, [ + { bucket: 'test2', source: DUMMY_SOURCE }, + { bucket: 'test3', source: DUMMY_SOURCE } + ]); await expect(a).rejects.toEqual(TEST_ERROR); await expect(b).rejects.toEqual(TEST_ERROR); - const a2 = cache.getChecksums(123n, ['test', 'test2']); - const b2 = cache.getChecksums(123n, ['test2', 'test3']); + const a2 = cache.getChecksums(123n, [ + { bucket: 'test', source: DUMMY_SOURCE }, + { bucket: 'test2', source: DUMMY_SOURCE } + ]); + const b2 = cache.getChecksums(123n, [ + { bucket: 'test2', source: DUMMY_SOURCE }, + { bucket: 'test3', source: DUMMY_SOURCE } + ]); expect(await a2).toEqual([TEST_123, TEST2_123]); expect(await b2).toEqual([TEST2_123, TEST3_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ // Request A (fails) [ { bucket: 'test', end: 123n }, @@ -311,11 +347,15 @@ describe('checksum cache', function () { return fetchTestChecksums(batch.filter((b) => b.bucket != 'test')); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([{ bucket: 'test', checksum: 0, count: 0 }]); - expect(await cache.getChecksums(123n, ['test', 'test2'])).toEqual([ - { bucket: 'test', checksum: 0, count: 0 }, - TEST2_123 + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ + { bucket: 'test', checksum: 0, count: 0 } ]); + expect( + await cache.getChecksums(123n, [ + { bucket: 'test', source: DUMMY_SOURCE }, + { bucket: 'test2', source: DUMMY_SOURCE } + ]) + ).toEqual([{ bucket: 'test', checksum: 0, count: 0 }, TEST2_123]); }); it('should handle missing checksums (b)', async function () { @@ -325,8 +365,10 @@ describe('checksum cache', function () { return fetchTestChecksums(batch.filter((b) => b.bucket != 'test' || b.end != 123n)); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([{ bucket: 'test', checksum: 0, count: 0 }]); - expect(await cache.getChecksums(1234n, ['test'])).toEqual([ + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ + { bucket: 'test', checksum: 0, count: 0 } + ]); + expect(await cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: 1597020602, @@ -334,7 +376,10 @@ describe('checksum cache', function () { } ]); - expect(lookups).toEqual([[{ bucket: 'test', end: 123n }], [{ bucket: 'test', start: 123n, end: 1234n }]]); + expect(lookups.map(removeLookupSources)).toEqual([ + [{ bucket: 'test', end: 123n }], + [{ bucket: 'test', start: 123n, end: 1234n }] + ]); }); it('should use maxSize', async function () { @@ -347,8 +392,8 @@ describe('checksum cache', function () { maxSize: 2 }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); - expect(await cache.getChecksums(124n, ['test'])).toEqual([ + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); + expect(await cache.getChecksums(124n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: 1887460431, @@ -356,30 +401,30 @@ describe('checksum cache', function () { } ]); - expect(await cache.getChecksums(125n, ['test'])).toEqual([ + expect(await cache.getChecksums(125n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: -1865121912, count: 125 } ]); - expect(await cache.getChecksums(126n, ['test'])).toEqual([ + expect(await cache.getChecksums(126n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: -1720007310, count: 126 } ]); - expect(await cache.getChecksums(124n, ['test'])).toEqual([ + expect(await cache.getChecksums(124n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([ { bucket: 'test', checksum: 1887460431, count: 124 } ]); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ [{ bucket: 'test', end: 123n }], [{ bucket: 'test', start: 123n, end: 124n }], [{ bucket: 'test', start: 124n, end: 125n }], @@ -400,10 +445,10 @@ describe('checksum cache', function () { maxSize: 2 }); - const p3 = cache.getChecksums(123n, ['test3']); - const p4 = cache.getChecksums(123n, ['test4']); - const p1 = cache.getChecksums(123n, ['test']); - const p2 = cache.getChecksums(123n, ['test2']); + const p3 = cache.getChecksums(123n, [{ bucket: 'test3', source: DUMMY_SOURCE }]); + const p4 = cache.getChecksums(123n, [{ bucket: 'test4', source: DUMMY_SOURCE }]); + const p1 = cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }]); + const p2 = cache.getChecksums(123n, [{ bucket: 'test2', source: DUMMY_SOURCE }]); expect(await p1).toEqual([TEST_123]); expect(await p2).toEqual([TEST2_123]); @@ -417,7 +462,7 @@ describe('checksum cache', function () { ]); // The lookup should be deduplicated, even though it's in progress - expect(lookups).toEqual([ + expect(lookups.map(removeLookupSources)).toEqual([ [{ bucket: 'test3', end: 123n }], [{ bucket: 'test4', end: 123n }], [{ bucket: 'test', end: 123n }], @@ -434,7 +479,7 @@ describe('checksum cache', function () { return fetchTestChecksums(batch); }); - expect(await cache.getChecksums(123n, ['test'])).toEqual([TEST_123]); - expect(await cache.getChecksums(1234n, ['test'])).toEqual([TEST_1234]); + expect(await cache.getChecksums(123n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_123]); + expect(await cache.getChecksums(1234n, [{ bucket: 'test', source: DUMMY_SOURCE }])).toEqual([TEST_1234]); }); }); diff --git a/packages/service-core/test/src/sync/BucketChecksumState.test.ts b/packages/service-core/test/src/sync/BucketChecksumState.test.ts index 8ee74be2f..aef48b61d 100644 --- a/packages/service-core/test/src/sync/BucketChecksumState.test.ts +++ b/packages/service-core/test/src/sync/BucketChecksumState.test.ts @@ -1,5 +1,6 @@ import { BucketChecksum, + BucketChecksumRequest, BucketChecksumState, BucketChecksumStateOptions, BucketChecksumStateStorage, @@ -859,14 +860,14 @@ class MockBucketChecksumStateStorage implements BucketChecksumStateStorage { this.filter?.({ invalidate: true }); } - async getChecksums(checkpoint: InternalOpId, buckets: string[]): Promise { + async getChecksums(checkpoint: InternalOpId, requests: BucketChecksumRequest[]): Promise { return new Map( - buckets.map((bucket) => { - const checksum = this.state.get(bucket); + requests.map((request) => { + const checksum = this.state.get(request.bucket); return [ - bucket, + request.bucket, { - bucket: bucket, + bucket: request.bucket, checksum: checksum?.checksum ?? 0, count: checksum?.count ?? 0 } From 03b07dd89318f510611e8ba3a3eece7584c9d7e1 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 13 Jan 2026 11:44:43 +0200 Subject: [PATCH 022/101] More fixes for clearing data. --- .../implementation/MongoSyncBucketStorage.ts | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 1639a672b..19a8df7fc 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -22,6 +22,7 @@ import { ProtocolOpId, ReplicationCheckpoint, storage, + SyncRuleState, utils, WatchWriteCheckpointOptions } from '@powersync/service-core'; @@ -603,8 +604,31 @@ export class MongoSyncBucketStorage ); if (doc?.rule_mapping != null) { + // TODO: Handle consistency + + const otherSyncRules = await this.db.sync_rules + .find({ + _id: { $ne: this.group_id }, + state: { $in: [SyncRuleState.ACTIVE, SyncRuleState.PROCESSING, SyncRuleState.ERRORED] }, + 'rule_mapping.definitions': { $exists: true } + }) + .toArray(); + const keepSyncDefinitionIds = new Set(); + const keepParameterLookupIds = new Set(); + for (let other of otherSyncRules) { + for (let id of Object.values(other.rule_mapping.definitions)) { + keepSyncDefinitionIds.add(id); + } + for (let id of Object.values(other.rule_mapping.parameter_lookups)) { + keepParameterLookupIds.add(id); + } + } + // TODO: Only delete if not used by other sync rules for (let [name, id] of Object.entries(doc.rule_mapping.definitions)) { + if (keepSyncDefinitionIds.has(id)) { + continue; + } await this.retriedDelete(`deleting bucket data for ${name}`, signal, () => this.db.bucket_data.deleteMany( { @@ -613,9 +637,21 @@ export class MongoSyncBucketStorage { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } ) ); + + await this.retriedDelete(`deleting bucket_state data for ${name}`, signal, () => + this.db.bucket_state.deleteMany( + { + _id: idPrefixFilter({ g: id }, ['b']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) + ); } for (let [name, id] of Object.entries(doc.rule_mapping.parameter_lookups)) { + if (keepParameterLookupIds.has(id)) { + continue; + } await this.retriedDelete(`deleting parameter lookup data for ${name}`, signal, () => this.db.bucket_parameters.deleteMany( { @@ -627,6 +663,7 @@ export class MongoSyncBucketStorage } } + // Legacy await this.retriedDelete('deleting bucket data', signal, () => this.db.bucket_data.deleteMany( { @@ -635,6 +672,8 @@ export class MongoSyncBucketStorage { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } ) ); + + // Legacy await this.retriedDelete('deleting bucket parameter lookup values', signal, () => this.db.bucket_parameters.deleteMany( { @@ -644,6 +683,7 @@ export class MongoSyncBucketStorage ) ); + // Legacy await this.retriedDelete('deleting current data records', signal, () => this.db.current_data.deleteMany( { @@ -653,6 +693,7 @@ export class MongoSyncBucketStorage ) ); + // Legacy await this.retriedDelete('deleting bucket state records', signal, () => this.db.bucket_state.deleteMany( { From df561c0176f7eb2c44896b7f58959ee0db5296e4 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 13 Jan 2026 12:15:07 +0200 Subject: [PATCH 023/101] Fix test build issues. --- .../implementation/MongoSyncBucketStorage.ts | 17 ++++++-- .../test/src/storage_compacting.test.ts | 30 ++++++++------ .../src/replication/ChangeStream.ts | 2 +- .../src/replication/MongoSnapshotter.ts | 2 +- .../test/src/change_stream_utils.ts | 20 ++++----- .../test/src/slow_tests.test.ts | 7 +++- .../module-mssql/src/replication/CDCStream.ts | 2 +- .../test/src/CDCStreamTestContext.ts | 8 ++-- .../src/replication/BinLogStream.ts | 2 +- .../test/src/BinlogStreamUtils.ts | 4 +- .../src/storage/PostgresSyncRulesStorage.ts | 16 ++++++-- .../src/replication/PostgresSnapshotter.ts | 2 +- .../src/replication/WalStream.ts | 2 +- .../test/src/large_batch.test.ts | 41 ++++++++++++++----- .../test/src/wal_stream_utils.ts | 8 ++-- .../src/test-utils/general-utils.ts | 12 ++++-- .../register-data-storage-parameter-tests.ts | 6 +-- .../src/tests/register-sync-tests.ts | 32 +++++++-------- .../src/routes/endpoints/socket-route.ts | 2 +- .../src/routes/endpoints/sync-stream.ts | 2 +- .../src/storage/SyncRulesBucketStorage.ts | 9 +++- .../test/src/routes/stream.test.ts | 4 +- 22 files changed, 142 insertions(+), 88 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 19a8df7fc..5a507b9ba 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -17,6 +17,7 @@ import { InternalOpId, internalToExternalOpId, maxLsn, + PersistedSyncRules, PopulateChecksumCacheOptions, PopulateChecksumCacheResults, ProtocolOpId, @@ -70,7 +71,9 @@ export class MongoSyncBucketStorage private readonly db: PowerSyncMongo; readonly checksums: MongoChecksums; - private parsedSyncRulesCache: { parsed: HydratedSyncRules; options: storage.ParseSyncRulesOptions } | undefined; + private parsedSyncRulesCache: + | { parsed: PersistedSyncRules; hydrated: HydratedSyncRules; options: storage.ParseSyncRulesOptions } + | undefined; private writeCheckpointAPI: MongoWriteCheckpointAPI; private mapping: BucketDefinitionMapping; @@ -112,17 +115,23 @@ export class MongoSyncBucketStorage }); } - getParsedSyncRules(options: storage.ParseSyncRulesOptions): HydratedSyncRules { + getParsedSyncRules(options: storage.ParseSyncRulesOptions): PersistedSyncRules { + this.getHydratedSyncRules(options); + return this.parsedSyncRulesCache!.parsed; + } + + getHydratedSyncRules(options: storage.ParseSyncRulesOptions): HydratedSyncRules { const { parsed, options: cachedOptions } = this.parsedSyncRulesCache ?? {}; /** * Check if the cached sync rules, if present, had the same options. * Parse sync rules if the options are different or if there is no cached value. */ if (!parsed || options.defaultSchema != cachedOptions?.defaultSchema) { - this.parsedSyncRulesCache = { parsed: this.sync_rules.parsed(options).hydratedSyncRules(), options }; + const parsed = this.sync_rules.parsed(options); + this.parsedSyncRulesCache = { parsed, hydrated: parsed.hydratedSyncRules(), options }; } - return this.parsedSyncRulesCache!.parsed; + return this.parsedSyncRulesCache!.hydrated; } async getCheckpoint(): Promise { diff --git a/modules/module-mongodb-storage/test/src/storage_compacting.test.ts b/modules/module-mongodb-storage/test/src/storage_compacting.test.ts index 792431bb7..264399477 100644 --- a/modules/module-mongodb-storage/test/src/storage_compacting.test.ts +++ b/modules/module-mongodb-storage/test/src/storage_compacting.test.ts @@ -53,11 +53,11 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); const { checkpoint } = await populate(bucketStorage); - return { bucketStorage, checkpoint, factory }; + return { bucketStorage, checkpoint, factory, syncRules }; }; test('full compact', async () => { - const { bucketStorage, checkpoint, factory } = await setup(); + const { bucketStorage, checkpoint, factory, syncRules } = await setup(); // Simulate bucket_state from old version not being available await factory.db.bucket_state.deleteMany({}); @@ -71,14 +71,17 @@ bucket_definitions: signal: null as any }); - const checksumAfter = await bucketStorage.getChecksums(checkpoint, ['by_user["u1"]', 'by_user["u2"]']); - expect(checksumAfter.get('by_user["u1"]')).toEqual({ - bucket: 'by_user["u1"]', + const users = ['u1', 'u2']; + const userRequests = users.map((user) => test_utils.bucketRequest(syncRules, `by_user["${user}"]`)); + const [u1Request, u2Request] = userRequests; + const checksumAfter = await bucketStorage.getChecksums(checkpoint, userRequests); + expect(checksumAfter.get(u1Request.bucket)).toEqual({ + bucket: u1Request.bucket, checksum: -659469718, count: 1 }); - expect(checksumAfter.get('by_user["u2"]')).toEqual({ - bucket: 'by_user["u2"]', + expect(checksumAfter.get(u2Request.bucket)).toEqual({ + bucket: u2Request.bucket, checksum: 430217650, count: 1 }); @@ -122,14 +125,17 @@ bucket_definitions: }); expect(result2.buckets).toEqual(0); - const checksumAfter = await bucketStorage.getChecksums(checkpoint, ['by_user2["u1"]', 'by_user2["u2"]']); - expect(checksumAfter.get('by_user2["u1"]')).toEqual({ - bucket: 'by_user2["u1"]', + const users = ['u1', 'u2']; + const userRequests = users.map((user) => test_utils.bucketRequest(syncRules, `by_user2["${user}"]`)); + const [u1Request, u2Request] = userRequests; + const checksumAfter = await bucketStorage.getChecksums(checkpoint, userRequests); + expect(checksumAfter.get(u1Request.bucket)).toEqual({ + bucket: u1Request.bucket, checksum: -659469718, count: 1 }); - expect(checksumAfter.get('by_user2["u2"]')).toEqual({ - bucket: 'by_user2["u2"]', + expect(checksumAfter.get(u2Request.bucket)).toEqual({ + bucket: u2Request.bucket, checksum: 430217650, count: 1 }); diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index b5b5b1f59..14f14c903 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -85,7 +85,7 @@ class SubStream { this.logger = options.logger; this.metrics = options.metrics; this.abortSignal = options.abortSignal; - this.syncRules = this.storage.getParsedSyncRules({ + this.syncRules = this.storage.getHydratedSyncRules({ defaultSchema: this.connections.db.databaseName }); this.snapshotter = new MongoSnapshotter({ diff --git a/modules/module-mongodb/src/replication/MongoSnapshotter.ts b/modules/module-mongodb/src/replication/MongoSnapshotter.ts index eeac96a03..19b8943b6 100644 --- a/modules/module-mongodb/src/replication/MongoSnapshotter.ts +++ b/modules/module-mongodb/src/replication/MongoSnapshotter.ts @@ -101,7 +101,7 @@ export class MongoSnapshotter { this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000; this.client = this.connections.client; this.defaultDb = this.connections.db; - this.sync_rules = options.storage.getParsedSyncRules({ + this.sync_rules = options.storage.getHydratedSyncRules({ defaultSchema: this.defaultDb.databaseName }); this.abortSignal = options.abort_signal; diff --git a/modules/module-mongodb/test/src/change_stream_utils.ts b/modules/module-mongodb/test/src/change_stream_utils.ts index ad1b24a7c..0b2ce99e4 100644 --- a/modules/module-mongodb/test/src/change_stream_utils.ts +++ b/modules/module-mongodb/test/src/change_stream_utils.ts @@ -1,5 +1,6 @@ import { mongo } from '@powersync/lib-service-mongodb'; import { + BucketChecksumRequest, BucketStorageFactory, createCoreReplicationMetrics, initializeCoreReplicationMetrics, @@ -19,8 +20,8 @@ import { MongoManager } from '@module/replication/MongoManager.js'; import { createCheckpoint, STANDALONE_CHECKPOINT_ID } from '@module/replication/MongoRelation.js'; import { NormalizedMongoConnectionConfig } from '@module/types/types.js'; -import { clearTestDb, TEST_CONNECTION_OPTIONS } from './util.js'; import { ReplicationAbortedError } from '@powersync/lib-services-framework'; +import { clearTestDb, TEST_CONNECTION_OPTIONS } from './util.js'; export class ChangeStreamTestContext { private _walStream?: ChangeStream; @@ -185,7 +186,7 @@ export class ChangeStreamTestContext { async getBucketsDataBatch(buckets: Record, options?: { timeout?: number }) { let checkpoint = await this.getCheckpoint(options); const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); - const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules.definition, bucket, start)); + const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules, bucket, start)); return test_utils.fromAsync(this.storage!.getBucketDataBatch(checkpoint, map)); } @@ -196,7 +197,7 @@ export class ChangeStreamTestContext { } const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const checkpoint = await this.getCheckpoint(options); - let map = [bucketRequest(syncRules.definition, bucket, start)]; + let map = [bucketRequest(syncRules, bucket, start)]; let data: OplogEntry[] = []; while (true) { const batch = this.storage!.getBucketDataBatch(checkpoint, map); @@ -206,20 +207,15 @@ export class ChangeStreamTestContext { if (batches.length == 0 || !batches[0]!.chunkData.has_more) { break; } - map = [bucketRequest(syncRules.definition, bucket, start)]; + map = [bucketRequest(syncRules, bucket, start)]; } return data; } - async getChecksums(buckets: string[], options?: { timeout?: number }) { - let checkpoint = await this.getCheckpoint(options); - return this.storage!.getChecksums(checkpoint, buckets); - } - - async getChecksum(bucket: string, options?: { timeout?: number }) { + async getChecksum(request: BucketChecksumRequest, options?: { timeout?: number }) { let checkpoint = await this.getCheckpoint(options); - const map = await this.storage!.getChecksums(checkpoint, [bucket]); - return map.get(bucket); + const map = await this.storage!.getChecksums(checkpoint, [request]); + return map.get(request.bucket); } } diff --git a/modules/module-mongodb/test/src/slow_tests.test.ts b/modules/module-mongodb/test/src/slow_tests.test.ts index 8acf8d88d..77f6d2498 100644 --- a/modules/module-mongodb/test/src/slow_tests.test.ts +++ b/modules/module-mongodb/test/src/slow_tests.test.ts @@ -7,6 +7,7 @@ import { settledPromise, storage, unsettledPromise } from '@powersync/service-co import { ChangeStreamTestContext, setSnapshotHistorySeconds } from './change_stream_utils.js'; import { env } from './env.js'; import { describeWithStorage } from './util.js'; +import { bucketRequest, PARSE_OPTIONS } from '@powersync/service-core-tests'; describe.runIf(env.CI || env.SLOW_TESTS)('change stream slow tests', { timeout: 60_000 }, function () { describeWithStorage({}, defineSlowTests); @@ -23,13 +24,14 @@ function defineSlowTests(config: storage.TestStorageConfig) { // snapshot session. await using _ = await setSnapshotHistorySeconds(context.client, 1); const { db } = context; - await context.updateSyncRules(` + const instance = await context.updateSyncRules(` bucket_definitions: global: data: - SELECT _id as id, description, num FROM "test_data1" - SELECT _id as id, description, num FROM "test_data2" `); + const syncRules = instance.getParsedSyncRules(PARSE_OPTIONS); const collection1 = db.collection('test_data1'); const collection2 = db.collection('test_data2'); @@ -42,7 +44,8 @@ bucket_definitions: await collection2.bulkWrite(operations); await context.initializeReplication(); - const checksum = await context.getChecksum('global[]'); + const request = bucketRequest(syncRules, 'global[]'); + const checksum = await context.getChecksum(request); expect(checksum).toMatchObject({ count: 20_000 }); diff --git a/modules/module-mssql/src/replication/CDCStream.ts b/modules/module-mssql/src/replication/CDCStream.ts index a2eecc7ac..c4c89a9d9 100644 --- a/modules/module-mssql/src/replication/CDCStream.ts +++ b/modules/module-mssql/src/replication/CDCStream.ts @@ -110,7 +110,7 @@ export class CDCStream { constructor(private options: CDCStreamOptions) { this.logger = options.logger ?? defaultLogger; this.storage = options.storage; - this.syncRules = options.storage.getParsedSyncRules({ defaultSchema: options.connections.schema }); + this.syncRules = options.storage.getHydratedSyncRules({ defaultSchema: options.connections.schema }); this.connections = options.connections; this.abortSignal = options.abortSignal; } diff --git a/modules/module-mssql/test/src/CDCStreamTestContext.ts b/modules/module-mssql/test/src/CDCStreamTestContext.ts index 5f21204b6..d3a3f855a 100644 --- a/modules/module-mssql/test/src/CDCStreamTestContext.ts +++ b/modules/module-mssql/test/src/CDCStreamTestContext.ts @@ -168,7 +168,7 @@ export class CDCStreamTestContext implements AsyncDisposable { async getBucketsDataBatch(buckets: Record, options?: { timeout?: number }) { let checkpoint = await this.getCheckpoint(options); const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); - const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules.definition, bucket, start)); + const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules, bucket, start)); return test_utils.fromAsync(this.storage!.getBucketDataBatch(checkpoint, map)); } @@ -182,7 +182,7 @@ export class CDCStreamTestContext implements AsyncDisposable { } const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const checkpoint = await this.getCheckpoint(options); - let map = [bucketRequest(syncRules.definition, bucket, start)]; + let map = [bucketRequest(syncRules, bucket, start)]; let data: OplogEntry[] = []; while (true) { @@ -193,7 +193,7 @@ export class CDCStreamTestContext implements AsyncDisposable { if (batches.length == 0 || !batches[0]!.chunkData.has_more) { break; } - map = [bucketRequest(syncRules.definition, bucket, start)]; + map = [bucketRequest(syncRules, bucket, start)]; } return data; } @@ -208,7 +208,7 @@ export class CDCStreamTestContext implements AsyncDisposable { } const { checkpoint } = await this.storage!.getCheckpoint(); const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); - const map = [bucketRequest(syncRules.definition, bucket, start)]; + const map = [bucketRequest(syncRules, bucket, start)]; const batch = this.storage!.getBucketDataBatch(checkpoint, map); const batches = await test_utils.fromAsync(batch); return batches[0]?.chunkData.data ?? []; diff --git a/modules/module-mysql/src/replication/BinLogStream.ts b/modules/module-mysql/src/replication/BinLogStream.ts index 9777e5a87..365f7b721 100644 --- a/modules/module-mysql/src/replication/BinLogStream.ts +++ b/modules/module-mysql/src/replication/BinLogStream.ts @@ -89,7 +89,7 @@ export class BinLogStream { this.logger = options.logger ?? defaultLogger; this.storage = options.storage; this.connections = options.connections; - this.syncRules = options.storage.getParsedSyncRules({ defaultSchema: this.defaultSchema }); + this.syncRules = options.storage.getHydratedSyncRules({ defaultSchema: this.defaultSchema }); this.groupId = options.storage.group_id; this.abortSignal = options.abortSignal; } diff --git a/modules/module-mysql/test/src/BinlogStreamUtils.ts b/modules/module-mysql/test/src/BinlogStreamUtils.ts index 1a6ca8e33..07e06087c 100644 --- a/modules/module-mysql/test/src/BinlogStreamUtils.ts +++ b/modules/module-mysql/test/src/BinlogStreamUtils.ts @@ -151,7 +151,7 @@ export class BinlogStreamTestContext { async getBucketsDataBatch(buckets: Record, options?: { timeout?: number }) { const checkpoint = await this.getCheckpoint(options); const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); - const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules.definition, bucket, start)); + const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules, bucket, start)); return test_utils.fromAsync(this.storage!.getBucketDataBatch(checkpoint, map)); } @@ -166,7 +166,7 @@ export class BinlogStreamTestContext { } const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const checkpoint = await this.getCheckpoint(options); - const map = [bucketRequest(syncRules.definition, bucket, start)]; + const map = [bucketRequest(syncRules, bucket, start)]; const batch = this.storage!.getBucketDataBatch(checkpoint, map); const batches = await test_utils.fromAsync(batch); return batches[0]?.chunkData.data ?? []; diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index bb42986b4..3c39e7420 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -12,6 +12,7 @@ import { LastValueSink, maxLsn, PartialChecksum, + PersistedSyncRules, PopulateChecksumCacheOptions, PopulateChecksumCacheResults, ReplicationCheckpoint, @@ -62,7 +63,7 @@ export class PostgresSyncRulesStorage // TODO we might be able to share this in an abstract class private parsedSyncRulesCache: - | { parsed: sync_rules.HydratedSyncRules; options: storage.ParseSyncRulesOptions } + | { parsed: PersistedSyncRules; hydrated: sync_rules.HydratedSyncRules; options: storage.ParseSyncRulesOptions } | undefined; private _checksumCache: storage.ChecksumCache | undefined; @@ -99,17 +100,24 @@ export class PostgresSyncRulesStorage } // TODO we might be able to share this in an abstract class - getParsedSyncRules(options: storage.ParseSyncRulesOptions): sync_rules.HydratedSyncRules { + + getParsedSyncRules(options: storage.ParseSyncRulesOptions): PersistedSyncRules { + this.getHydratedSyncRules(options); + return this.parsedSyncRulesCache!.parsed; + } + + getHydratedSyncRules(options: storage.ParseSyncRulesOptions): sync_rules.HydratedSyncRules { const { parsed, options: cachedOptions } = this.parsedSyncRulesCache ?? {}; /** * Check if the cached sync rules, if present, had the same options. * Parse sync rules if the options are different or if there is no cached value. */ if (!parsed || options.defaultSchema != cachedOptions?.defaultSchema) { - this.parsedSyncRulesCache = { parsed: this.sync_rules.parsed(options).hydratedSyncRules(), options }; + const parsed = this.sync_rules.parsed(options); + this.parsedSyncRulesCache = { parsed, hydrated: parsed.hydratedSyncRules(), options }; } - return this.parsedSyncRulesCache!.parsed; + return this.parsedSyncRulesCache!.hydrated; } async reportError(e: any): Promise { diff --git a/modules/module-postgres/src/replication/PostgresSnapshotter.ts b/modules/module-postgres/src/replication/PostgresSnapshotter.ts index d45dc1997..d95f8b595 100644 --- a/modules/module-postgres/src/replication/PostgresSnapshotter.ts +++ b/modules/module-postgres/src/replication/PostgresSnapshotter.ts @@ -91,7 +91,7 @@ export class PostgresSnapshotter { this.logger = options.logger ?? defaultLogger; this.storage = options.storage; this.metrics = options.metrics; - this.sync_rules = options.storage.getParsedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA }); + this.sync_rules = options.storage.getHydratedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA }); this.group_id = options.storage.group_id; this.slot_name = options.storage.slot_name; this.connections = options.connections; diff --git a/modules/module-postgres/src/replication/WalStream.ts b/modules/module-postgres/src/replication/WalStream.ts index 7c712137d..d4350b7e7 100644 --- a/modules/module-postgres/src/replication/WalStream.ts +++ b/modules/module-postgres/src/replication/WalStream.ts @@ -132,7 +132,7 @@ export class WalStream { this.logger = options.logger ?? defaultLogger; this.storage = options.storage; this.metrics = options.metrics; - this.sync_rules = options.storage.getParsedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA }); + this.sync_rules = options.storage.getHydratedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA }); this.group_id = options.storage.group_id; this.slot_name = options.storage.slot_name; this.connections = options.connections; diff --git a/modules/module-postgres/test/src/large_batch.test.ts b/modules/module-postgres/test/src/large_batch.test.ts index 43a29ea28..e22d5d890 100644 --- a/modules/module-postgres/test/src/large_batch.test.ts +++ b/modules/module-postgres/test/src/large_batch.test.ts @@ -4,6 +4,7 @@ import { populateData } from '../../dist/utils/populate_test_data.js'; import { env } from './env.js'; import { describeWithStorage, TEST_CONNECTION_OPTIONS } from './util.js'; import { WalStreamTestContext } from './wal_stream_utils.js'; +import { bucketRequest } from '@powersync/service-core-tests'; describe.skipIf(!(env.CI || env.SLOW_TESTS))('batch replication', function () { describeWithStorage({ timeout: 240_000 }, function (config) { @@ -44,8 +45,13 @@ function defineBatchTests(config: storage.TestStorageConfig) { const checkpoint = await context.getCheckpoint({ timeout: 100_000 }); const duration = Date.now() - start; const used = Math.round(process.memoryUsage().heapUsed / 1024 / 1024); - const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']); - expect(checksum.get('global[]')!.count).toEqual(operation_count); + const syncRules = await context.factory.getActiveSyncRulesContent(); + if (!syncRules) { + throw new Error('Active sync rules not available'); + } + const request = bucketRequest(syncRules); + const checksum = await context.storage!.getChecksums(checkpoint, [request]); + expect(checksum.get(request.bucket)!.count).toEqual(operation_count); const perSecond = Math.round((operation_count / duration) * 1000); console.log(`${operation_count} ops in ${duration}ms ${perSecond} ops/s. ${used}MB heap`); }); @@ -54,6 +60,11 @@ function defineBatchTests(config: storage.TestStorageConfig) { await using context = await WalStreamTestContext.open(factory); // Manual test to check initial replication performance and memory usage await context.updateSyncRules(BASIC_SYNC_RULES); + const syncRules = await context.factory.getActiveSyncRulesContent(); + if (!syncRules) { + throw new Error('Active sync rules not available'); + } + const request = bucketRequest(syncRules); const { pool } = context; await pool.query(`CREATE TABLE test_data(id text primary key, description text, other text)`); @@ -90,8 +101,8 @@ function defineBatchTests(config: storage.TestStorageConfig) { const checkpoint = await context.getCheckpoint({ timeout: 100_000 }); const duration = Date.now() - start; - const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']); - expect(checksum.get('global[]')!.count).toEqual(operation_count); + const checksum = await context.storage!.getChecksums(checkpoint, [request]); + expect(checksum.get(request.bucket)!.count).toEqual(operation_count); const perSecond = Math.round((operation_count / duration) * 1000); console.log(`${operation_count} ops in ${duration}ms ${perSecond} ops/s.`); printMemoryUsage(); @@ -104,6 +115,11 @@ function defineBatchTests(config: storage.TestStorageConfig) { await using context = await WalStreamTestContext.open(factory); // This just tests performance of a large number of operations inside a transaction. await context.updateSyncRules(BASIC_SYNC_RULES); + const syncRules = await context.factory.getActiveSyncRulesContent(); + if (!syncRules) { + throw new Error('Active sync rules not available'); + } + const request = bucketRequest(syncRules); const { pool } = context; await pool.query(`CREATE TABLE test_data(id text primary key, description text, other text)`); @@ -141,8 +157,8 @@ function defineBatchTests(config: storage.TestStorageConfig) { const checkpoint = await context.getCheckpoint({ timeout: 50_000 }); const duration = Date.now() - start; const used = Math.round(process.memoryUsage().heapUsed / 1024 / 1024); - const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']); - expect(checksum.get('global[]')!.count).toEqual(operationCount); + const checksum = await context.storage!.getChecksums(checkpoint, [request]); + expect(checksum.get(request.bucket)!.count).toEqual(operationCount); const perSecond = Math.round((operationCount / duration) * 1000); // This number depends on the test machine, so we keep the test significantly // lower than expected numbers. @@ -158,8 +174,8 @@ function defineBatchTests(config: storage.TestStorageConfig) { const checkpoint2 = await context.getCheckpoint({ timeout: 20_000 }); const truncateDuration = Date.now() - truncateStart; - const checksum2 = await context.storage!.getChecksums(checkpoint2, ['global[]']); - const truncateCount = checksum2.get('global[]')!.count - checksum.get('global[]')!.count; + const checksum2 = await context.storage!.getChecksums(checkpoint2, [request]); + const truncateCount = checksum2.get(request.bucket)!.count - checksum.get(request.bucket)!.count; expect(truncateCount).toEqual(numTransactions * perTransaction); const truncatePerSecond = Math.round((truncateCount / truncateDuration) * 1000); console.log(`Truncated ${truncateCount} ops in ${truncateDuration}ms ${truncatePerSecond} ops/s. ${used}MB heap`); @@ -190,6 +206,11 @@ function defineBatchTests(config: storage.TestStorageConfig) { - SELECT * FROM test_data - SELECT * FROM test_data `); + const syncRules = await context.factory.getActiveSyncRulesContent(); + if (!syncRules) { + throw new Error('Active sync rules not available'); + } + const request = bucketRequest(syncRules); const { pool } = context; await pool.query(`CREATE TABLE test_data(id serial primary key, description text)`); @@ -224,8 +245,8 @@ function defineBatchTests(config: storage.TestStorageConfig) { await context.replicateSnapshot(); const checkpoint = await context.getCheckpoint({ timeout: 50_000 }); - const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']); - expect(checksum.get('global[]')!.count).toEqual((numDocs + 2) * 4); + const checksum = await context.storage!.getChecksums(checkpoint, [request]); + expect(checksum.get(request.bucket)!.count).toEqual((numDocs + 2) * 4); }); function printMemoryUsage() { diff --git a/modules/module-postgres/test/src/wal_stream_utils.ts b/modules/module-postgres/test/src/wal_stream_utils.ts index 486208e8c..ba615b7b2 100644 --- a/modules/module-postgres/test/src/wal_stream_utils.ts +++ b/modules/module-postgres/test/src/wal_stream_utils.ts @@ -171,7 +171,7 @@ export class WalStreamTestContext implements AsyncDisposable { async getBucketsDataBatch(buckets: Record, options?: { timeout?: number }) { let checkpoint = await this.getCheckpoint(options); const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); - const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules.definition, bucket, start)); + const map = Object.entries(buckets).map(([bucket, start]) => bucketRequest(syncRules, bucket, start)); return test_utils.fromAsync(this.storage!.getBucketDataBatch(checkpoint, map)); } @@ -185,7 +185,7 @@ export class WalStreamTestContext implements AsyncDisposable { } const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const checkpoint = await this.getCheckpoint(options); - let map = [bucketRequest(syncRules.definition, bucket, start)]; + let map = [bucketRequest(syncRules, bucket, start)]; let data: OplogEntry[] = []; while (true) { const batch = this.storage!.getBucketDataBatch(checkpoint, map); @@ -195,7 +195,7 @@ export class WalStreamTestContext implements AsyncDisposable { if (batches.length == 0 || !batches[0]!.chunkData.has_more) { break; } - map = [bucketRequest(syncRules.definition, bucket, start)]; + map = [bucketRequest(syncRules, bucket, start)]; } return data; } @@ -210,7 +210,7 @@ export class WalStreamTestContext implements AsyncDisposable { } const syncRules = this.storage!.getParsedSyncRules({ defaultSchema: 'n/a' }); const { checkpoint } = await this.storage!.getCheckpoint(); - const map = [bucketRequest(syncRules.definition, bucket, start)]; + const map = [bucketRequest(syncRules, bucket, start)]; const batch = this.storage!.getBucketDataBatch(checkpoint, map); const batches = await test_utils.fromAsync(batch); return batches[0]?.chunkData.data ?? []; diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index a131559c3..fdecc59c3 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -1,4 +1,4 @@ -import { BucketDataRequest, InternalOpId, storage, utils } from '@powersync/service-core'; +import { BucketDataRequest, InternalOpId, storage, sync, utils } from '@powersync/service-core'; import { GetQuerierOptions, RequestParameters, SqlSyncRules } from '@powersync/service-sync-rules'; import { versionedHydrationState } from '@powersync/service-sync-rules'; import * as bson from 'bson'; @@ -121,12 +121,18 @@ export function querierOptions(globalParameters: RequestParameters): GetQuerierO }; } +function isParsedSyncRules( + syncRules: storage.PersistedSyncRulesContent | storage.PersistedSyncRules +): syncRules is storage.PersistedSyncRules { + return (syncRules as storage.PersistedSyncRules).sync_rules !== undefined; +} + export function bucketRequest( - syncRules: storage.PersistedSyncRulesContent, + syncRules: storage.PersistedSyncRulesContent | storage.PersistedSyncRules, bucket?: string, start?: InternalOpId | string | number ): BucketDataRequest { - const parsed = syncRules.parsed({ defaultSchema: 'not-applicable' }); + const parsed = isParsedSyncRules(syncRules) ? syncRules : syncRules.parsed(PARSE_OPTIONS); const hydrationState = parsed.hydrationState; bucket ??= 'global[]'; const definitionName = bucket.substring(0, bucket.indexOf('[')); diff --git a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts index 30b7c7c07..bebf38e49 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts @@ -595,11 +595,11 @@ bucket_definitions: }); const syncBucketStorage = bucketStorageFactory.getInstance(syncRules); - const parsedSchema1 = syncBucketStorage.getParsedSyncRules({ + const parsedSchema1 = syncBucketStorage.getHydratedSyncRules({ defaultSchema: 'public' }); - const parsedSchema2 = syncBucketStorage.getParsedSyncRules({ + const parsedSchema2 = syncBucketStorage.getHydratedSyncRules({ defaultSchema: 'public' }); @@ -607,7 +607,7 @@ bucket_definitions: expect(parsedSchema2).equals(parsedSchema1); expect(parsedSchema1.getSourceTables()[0].schema).equals('public'); - const parsedSchema3 = syncBucketStorage.getParsedSyncRules({ + const parsedSchema3 = syncBucketStorage.getHydratedSyncRules({ defaultSchema: 'databasename' }); diff --git a/packages/service-core-tests/src/tests/register-sync-tests.ts b/packages/service-core-tests/src/tests/register-sync-tests.ts index 4d3c8d5e2..a764ac84e 100644 --- a/packages/service-core-tests/src/tests/register-sync-tests.ts +++ b/packages/service-core-tests/src/tests/register-sync-tests.ts @@ -85,7 +85,7 @@ export function registerSyncTests(config: storage.TestStorageConfig) { const stream = sync.streamResponse({ syncContext, bucketStorage: bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -147,7 +147,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -211,7 +211,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -322,7 +322,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -464,7 +464,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -581,7 +581,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -648,7 +648,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -677,7 +677,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -709,7 +709,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -784,7 +784,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -861,7 +861,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -929,7 +929,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -999,7 +999,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -1063,7 +1063,7 @@ bucket_definitions: const stream = sync.streamResponse({ syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -1193,7 +1193,7 @@ bucket_definitions: const params: sync.SyncStreamParameters = { syncContext, bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, @@ -1268,7 +1268,7 @@ config: const stream = sync.streamResponse({ syncContext, bucketStorage: bucketStorage, - syncRules: bucketStorage.getParsedSyncRules(test_utils.PARSE_OPTIONS), + syncRules: bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS), params: { buckets: [], include_checksum: true, diff --git a/packages/service-core/src/routes/endpoints/socket-route.ts b/packages/service-core/src/routes/endpoints/socket-route.ts index e6367e09b..0f585ab03 100644 --- a/packages/service-core/src/routes/endpoints/socket-route.ts +++ b/packages/service-core/src/routes/endpoints/socket-route.ts @@ -84,7 +84,7 @@ export const syncStreamReactive: SocketRouteGenerator = (router) => return; } - const syncRules = bucketStorage.getParsedSyncRules(routerEngine.getAPI().getParseSyncRulesOptions()); + const syncRules = bucketStorage.getHydratedSyncRules(routerEngine.getAPI().getParseSyncRulesOptions()); const removeStopHandler = routerEngine.addStopHandler(() => { closeReason ??= 'process shutdown'; diff --git a/packages/service-core/src/routes/endpoints/sync-stream.ts b/packages/service-core/src/routes/endpoints/sync-stream.ts index fb1df9e96..5b688d994 100644 --- a/packages/service-core/src/routes/endpoints/sync-stream.ts +++ b/packages/service-core/src/routes/endpoints/sync-stream.ts @@ -72,7 +72,7 @@ export const syncStreamed = routeDefinition({ }); } - const syncRules = bucketStorage.getParsedSyncRules(routerEngine.getAPI().getParseSyncRulesOptions()); + const syncRules = bucketStorage.getHydratedSyncRules(routerEngine.getAPI().getParseSyncRulesOptions()); const controller = new AbortController(); const tracker = new sync.RequestTracker(metricsEngine); diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index 1f965ef37..027260128 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -8,7 +8,7 @@ import { import * as util from '../util/util-index.js'; import { BucketStorageBatch, FlushedResult, SaveUpdate } from './BucketStorageBatch.js'; import { BucketStorageFactory } from './BucketStorageFactory.js'; -import { ParseSyncRulesOptions } from './PersistedSyncRulesContent.js'; +import { ParseSyncRulesOptions, PersistedSyncRules } from './PersistedSyncRulesContent.js'; import { SourceEntityDescriptor } from './SourceEntity.js'; import { SourceTable } from './SourceTable.js'; import { SyncStorageWriteCheckpointAPI } from './WriteCheckpointAPI.js'; @@ -49,7 +49,12 @@ export interface SyncRulesBucketStorage */ createWriter(options: StartBatchOptions): Promise; - getParsedSyncRules(options: ParseSyncRulesOptions): HydratedSyncRules; + getHydratedSyncRules(options: ParseSyncRulesOptions): HydratedSyncRules; + + /** + * For tests only. + */ + getParsedSyncRules(options: ParseSyncRulesOptions): PersistedSyncRules; /** * Terminate the sync rules. diff --git a/packages/service-core/test/src/routes/stream.test.ts b/packages/service-core/test/src/routes/stream.test.ts index 948bcc4ef..cc8316fad 100644 --- a/packages/service-core/test/src/routes/stream.test.ts +++ b/packages/service-core/test/src/routes/stream.test.ts @@ -44,7 +44,7 @@ describe('Stream Route', () => { // when compressing the stream. const storage = { - getParsedSyncRules() { + getHydratedSyncRules() { return new SqlSyncRules('bucket_definitions: {}').hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); }, watchCheckpointChanges: async function* (options) { @@ -82,7 +82,7 @@ describe('Stream Route', () => { it('logs the application metadata', async () => { const storage = { - getParsedSyncRules() { + getHydratedSyncRules() { return new SqlSyncRules('bucket_definitions: {}').hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); }, watchCheckpointChanges: async function* (options) { From 5714d9ffcb0b2f07b52d48818dce6c32c3c52989 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 13 Jan 2026 15:13:36 +0200 Subject: [PATCH 024/101] Initial fixes for parameter lookups. --- .../implementation/MongoSyncBucketStorage.ts | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 5a507b9ba..38f4a1114 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -348,7 +348,7 @@ export class MongoSyncBucketStorage [ { $match: { - 'key.g': this.group_id, + 'key.g': 0, lookup: { $in: lookupFilter }, _id: { $lte: checkpoint.checkpoint } } @@ -661,14 +661,16 @@ export class MongoSyncBucketStorage if (keepParameterLookupIds.has(id)) { continue; } - await this.retriedDelete(`deleting parameter lookup data for ${name}`, signal, () => - this.db.bucket_parameters.deleteMany( - { - 'key.g': id - }, - { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } - ) - ); + // FIXME: how do we do these deletes? + // For now, we delete only when the source table is removed. + // await this.retriedDelete(`deleting parameter lookup data for ${name}`, signal, () => + // this.db.bucket_parameters.deleteMany( + // { + // 'key.g': id + // }, + // { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + // ) + // ); } } @@ -726,7 +728,7 @@ export class MongoSyncBucketStorage .toArray(); for (let table of tables) { - await this.retriedDelete('deleting current data records for table', signal, () => + await this.retriedDelete(`deleting current data records for table ${table.table_name}`, signal, () => this.db.current_data.deleteMany( { _id: idPrefixFilter({ g: 0, t: table._id }, ['k']) @@ -734,6 +736,15 @@ export class MongoSyncBucketStorage { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } ) ); + await this.retriedDelete(`deleting parameter data records for table ${table.table_name}`, signal, () => + this.db.bucket_parameters.deleteMany( + { + key: idPrefixFilter({ g: 0, t: table._id }, ['k']) + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) + ); + await this.db.source_tables.deleteOne({ _id: table._id }); // Delete the source table record itself } } @@ -1067,7 +1078,7 @@ export class MongoSyncBucketStorage .find( { _id: { $gt: options.lastCheckpoint.checkpoint, $lte: options.nextCheckpoint.checkpoint }, - 'key.g': this.group_id + 'key.g': 0 }, { projection: { From d26a01f143ae0093223939d82d870e3a56e9ba3a Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 13 Jan 2026 15:18:47 +0200 Subject: [PATCH 025/101] Cleanup. --- .../src/storage/implementation/MongoSyncBucketStorage.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 38f4a1114..6d999e749 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -633,7 +633,6 @@ export class MongoSyncBucketStorage } } - // TODO: Only delete if not used by other sync rules for (let [name, id] of Object.entries(doc.rule_mapping.definitions)) { if (keepSyncDefinitionIds.has(id)) { continue; From 306beb6304d4971d304e16dab0784b6885192805 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 13 Jan 2026 15:29:25 +0200 Subject: [PATCH 026/101] Hack: re-use existing mappings. --- .../src/storage/MongoBucketStorage.ts | 43 ++++++++++++++++--- .../implementation/BucketDefinitionMapping.ts | 11 +++++ 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts index d5cbd5bbf..8e5d0b95b 100644 --- a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts @@ -13,6 +13,7 @@ import { SyncRuleDocument } from './implementation/models.js'; import { MongoPersistedSyncRulesContent } from './implementation/MongoPersistedSyncRulesContent.js'; import { MongoSyncBucketStorage, MongoSyncBucketStorageOptions } from './implementation/MongoSyncBucketStorage.js'; import { generateSlotName } from '../utils/util.js'; +import { BucketDefinitionMapping } from './implementation/BucketDefinitionMapping.js'; export class MongoBucketStorage extends BaseObserver @@ -191,7 +192,17 @@ export class MongoBucketStorage { state: storage.SyncRuleState.PROCESSING }, - { $set: { state: storage.SyncRuleState.STOP } } + { $set: { state: storage.SyncRuleState.STOP } }, + { + session: this.session + } + ); + + const activeSyncRules = await this.db.sync_rules.findOne( + { + state: storage.SyncRuleState.ACTIVE + }, + { session: this.session } ); const id_doc = await this.db.op_id_sequence.findOneAndUpdate( @@ -205,7 +216,8 @@ export class MongoBucketStorage }, { upsert: true, - returnDocument: 'after' + returnDocument: 'after', + session: this.session } ); @@ -223,11 +235,23 @@ export class MongoBucketStorage let bucketDefinitionId = (id << 16) + 1; let parameterDefinitionId = (id << 17) + 1; + let existingMapping: BucketDefinitionMapping; + if (activeSyncRules != null) { + existingMapping = BucketDefinitionMapping.fromSyncRules(activeSyncRules); + } else { + existingMapping = new BucketDefinitionMapping({}, {}); + } + syncRules.hydrate({ hydrationState: { getBucketSourceScope(source) { - bucketDefinitionMapping[source.uniqueName] = bucketDefinitionId; - bucketDefinitionId += 1; + const existingId = existingMapping.equivalentBucketSourceId(source); + if (existingId != null) { + bucketDefinitionMapping[source.uniqueName] = existingId; + } else { + bucketDefinitionMapping[source.uniqueName] = bucketDefinitionId; + bucketDefinitionId += 1; + } return { // N/A bucketPrefix: '', @@ -236,8 +260,13 @@ export class MongoBucketStorage }, getParameterIndexLookupScope(source) { const key = `${source.defaultLookupScope.lookupName}#${source.defaultLookupScope.queryId}`; - parameterDefinitionMapping[key] = parameterDefinitionId; - parameterDefinitionId += 1; + const existingId = existingMapping.equivalentParameterLookupId(source); + if (existingId != null) { + parameterDefinitionMapping[key] = existingId; + } else { + parameterDefinitionMapping[key] = parameterDefinitionId; + parameterDefinitionId += 1; + } // N/A return source.defaultLookupScope; } @@ -264,7 +293,7 @@ export class MongoBucketStorage parameter_lookups: parameterDefinitionMapping } }; - await this.db.sync_rules.insertOne(doc); + await this.db.sync_rules.insertOne(doc, { session: this.session }); await this.db.notifyCheckpoint(); rules = new MongoPersistedSyncRulesContent(this.db, doc); if (options.lock) { diff --git a/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts b/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts index d559b039b..f69be6442 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts @@ -28,4 +28,15 @@ export class BucketDefinitionMapping { } return defId; } + + equivalentBucketSourceId(source: BucketDataSource): number | null { + // FIXME: Do an actual comparison, instead of just using the unique name + return this.definitions[source.uniqueName] ?? null; + } + + equivalentParameterLookupId(source: ParameterIndexLookupCreator): number | null { + // FIXME: Do an actual comparison, instead of just using the scope + const key = `${source.defaultLookupScope.lookupName}#${source.defaultLookupScope.queryId}`; + return this.parameterLookupMapping[key] ?? null; + } } From 2d1429042b1f5ac3432a999a94a414a95a7188e6 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 13 Jan 2026 17:00:36 +0200 Subject: [PATCH 027/101] Initial writer restructuring. --- .../storage/implementation/MergedSyncRules.ts | 114 ++++ .../implementation/MongoBucketBatch.ts | 593 ++++++++++-------- .../implementation/MongoSyncBucketStorage.ts | 20 +- .../storage/implementation/PersistedBatch.ts | 6 +- .../src/storage/BucketStorageBatch.ts | 17 +- packages/sync-rules/src/HydratedSyncRules.ts | 37 +- 6 files changed, 512 insertions(+), 275 deletions(-) create mode 100644 modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts diff --git a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts new file mode 100644 index 000000000..aa4ebf6b0 --- /dev/null +++ b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts @@ -0,0 +1,114 @@ +import { + buildBucketInfo, + CompatibilityContext, + EvaluatedParameters, + EvaluatedRow, + EvaluateRowOptions, + EvaluationError, + EvaluationResult, + HydrationState, + isEvaluatedRow, + isEvaluationError, + mergeDataSources, + RowProcessor, + SOURCE, + SourceTableInterface, + SqlEventDescriptor, + SqliteInputValue, + SqliteRow, + SqliteValue, + TablePattern +} from '@powersync/service-sync-rules'; +import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; + +type EvaluateRowFn = (options: EvaluateRowOptions) => EvaluationResult[]; + +export class MergedSyncRules implements RowProcessor { + static merge(sources: MongoPersistedSyncRules[]): MergedSyncRules { + let evaluators = new Map(); + + for (let source of sources) { + const syncRules = source.sync_rules; + const mapping = source.mapping; + const hydrationState = source.hydrationState; + const dataSources = syncRules.bucketDataSources; + for (let source of dataSources) { + const scope = hydrationState.getBucketSourceScope(source); + const id = mapping.bucketSourceId(source); + if (evaluators.has(id)) { + continue; + } + + const evaluate: EvaluateRowFn = (options: EvaluateRowOptions): EvaluationResult[] => { + return source.evaluateRow(options).map((result) => { + if (isEvaluationError(result)) { + return result; + } + const info = buildBucketInfo(scope, result.serializedBucketParameters); + return { + bucket: info.bucket, + id: result.id, + table: result.table, + data: result.data, + source: info[SOURCE] + } satisfies EvaluatedRow; + }); + }; + evaluators.set(id, evaluate); + } + } + + return new MergedSyncRules(Array.from(evaluators.values())); + } + + constructor(private evaluators: EvaluateRowFn[]) {} + eventDescriptors: SqlEventDescriptor[] = []; + compatibility: CompatibilityContext = CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY; + + getSourceTables(): TablePattern[] { + throw new Error('Method not implemented.'); + } + tableTriggersEvent(table: SourceTableInterface): boolean { + throw new Error('Method not implemented.'); + } + tableSyncsData(table: SourceTableInterface): boolean { + throw new Error('Method not implemented.'); + } + tableSyncsParameters(table: SourceTableInterface): boolean { + throw new Error('Method not implemented.'); + } + applyRowContext( + source: SqliteRow + ): SqliteRow { + throw new Error('Method not implemented.'); + } + + /** + * Throws errors. + */ + evaluateRow(options: EvaluateRowOptions): EvaluatedRow[] { + const { results, errors } = this.evaluateRowWithErrors(options); + if (errors.length > 0) { + throw new Error(errors[0].error); + } + return results; + } + + evaluateRowWithErrors(options: EvaluateRowOptions): { results: EvaluatedRow[]; errors: EvaluationError[] } { + const rawResults: EvaluationResult[] = this.evaluators.flatMap((evaluator) => evaluator(options)); + const results = rawResults.filter(isEvaluatedRow) as EvaluatedRow[]; + const errors = rawResults.filter(isEvaluationError) as EvaluationError[]; + + return { results, errors }; + } + + evaluateParameterRow(table: SourceTableInterface, row: SqliteRow): EvaluatedParameters[] { + throw new Error('Method not implemented.'); + } + evaluateParameterRowWithErrors( + table: SourceTableInterface, + row: SqliteRow + ): { results: EvaluatedParameters[]; errors: EvaluationError[] } { + throw new Error('Method not implemented.'); + } +} diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 6efab50f8..707579b06 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -1,5 +1,11 @@ import { mongo } from '@powersync/lib-service-mongodb'; -import { HydratedSyncRules, SqlEventDescriptor, SqliteRow, SqliteValue } from '@powersync/service-sync-rules'; +import { + HydratedSyncRules, + RowProcessor, + SqlEventDescriptor, + SqliteRow, + SqliteValue +} from '@powersync/service-sync-rules'; import * as bson from 'bson'; import { @@ -17,6 +23,7 @@ import { deserializeBson, InternalOpId, isCompleteRow, + PersistedSyncRulesContent, SaveOperationTag, storage, SyncRuleState, @@ -47,15 +54,14 @@ const replicationMutex = new utils.Mutex(); export const EMPTY_DATA = new bson.Binary(bson.serialize({})); -export interface MongoBucketBatchOptions { +export interface MongoWriterOptions { db: PowerSyncMongo; - syncRules: MongoPersistedSyncRules; - groupId: number; slotName: string; - lastCheckpointLsn: string | null; - keepaliveOp: InternalOpId | null; - resumeFromLsn: string | null; storeCurrentData: boolean; + + rowProcessor: RowProcessor; + mapping: BucketDefinitionMapping; + /** * Set to true for initial replication. */ @@ -66,88 +72,67 @@ export interface MongoBucketBatchOptions { logger?: Logger; } -export class MongoBucketBatch - extends BaseObserver - implements storage.BucketStorageBatch -{ - private logger: Logger; +interface MongoBucketBatchOptions { + db: PowerSyncMongo; + syncRules: MongoPersistedSyncRules; + lastCheckpointLsn: string | null; + keepaliveOp: InternalOpId | null; + resumeFromLsn: string | null; + logger: Logger; + writer: MongoBucketDataWriter; +} + +export interface ForSyncRulesOptions { + syncRules: MongoPersistedSyncRules; + + lastCheckpointLsn: string | null; + resumeFromLsn: string | null; + keepaliveOp: InternalOpId | null; +} + +export class MongoBucketDataWriter implements storage.BucketDataWriter { + private batch: OperationBatch | null = null; + private readonly rowProcessor: RowProcessor; + write_checkpoint_batch: storage.CustomWriteCheckpointOptions[] = []; private readonly client: mongo.MongoClient; public readonly db: PowerSyncMongo; public readonly session: mongo.ClientSession; - private readonly sync_rules: HydratedSyncRules; - - private readonly group_id: number; - + private readonly logger: Logger; private readonly slot_name: string; private readonly storeCurrentData: boolean; private readonly skipExistingRows: boolean; - private batch: OperationBatch | null = null; - private write_checkpoint_batch: storage.CustomWriteCheckpointOptions[] = []; - private markRecordUnavailable: BucketStorageMarkRecordUnavailable | undefined; - private clearedError = false; private readonly mapping: BucketDefinitionMapping; - /** - * Last LSN received associated with a checkpoint. - * - * This could be either: - * 1. A commit LSN. - * 2. A keepalive message LSN. - */ - private last_checkpoint_lsn: string | null = null; - - private persisted_op: InternalOpId | null = null; - - /** - * Last written op, if any. This may not reflect a consistent checkpoint. - */ - public last_flushed_op: InternalOpId | null = null; - - /** - * lastCheckpointLsn is the last consistent commit. - * - * While that is generally a "safe" point to resume from, there are cases where we may want to resume from a different point: - * 1. After an initial snapshot, we don't have a consistent commit yet, but need to resume from the snapshot LSN. - * 2. If "no_checkpoint_before_lsn" is set far in advance, it may take a while to reach that point. We - * may want to resume at incremental points before that. - * - * This is set when creating the batch, but may not be updated afterwards. - */ - public resumeFromLsn: string | null = null; - - private needsActivation = true; + private markRecordUnavailable: BucketStorageMarkRecordUnavailable | undefined; + private batches: MongoBucketBatch[] = []; - constructor(options: MongoBucketBatchOptions) { - super(); - this.logger = options.logger ?? defaultLogger; - this.client = options.db.client; + constructor(options: MongoWriterOptions) { this.db = options.db; - this.group_id = options.groupId; - this.last_checkpoint_lsn = options.lastCheckpointLsn; - this.resumeFromLsn = options.resumeFromLsn; + this.client = this.db.client; this.session = this.client.startSession(); this.slot_name = options.slotName; - this.sync_rules = options.syncRules.hydratedSyncRules(); - this.mapping = options.syncRules.mapping; + this.mapping = options.mapping; + this.rowProcessor = options.rowProcessor; this.storeCurrentData = options.storeCurrentData; this.skipExistingRows = options.skipExistingRows; + this.logger = options.logger ?? defaultLogger; this.markRecordUnavailable = options.markRecordUnavailable; - this.batch = new OperationBatch(); - - this.persisted_op = options.keepaliveOp ?? null; } - addCustomWriteCheckpoint(checkpoint: storage.BatchedCustomWriteCheckpointOptions): void { - this.write_checkpoint_batch.push({ - ...checkpoint, - sync_rules_id: this.group_id + forSyncRules(options: ForSyncRulesOptions): MongoBucketBatch { + const batch = new MongoBucketBatch({ + db: this.db, + syncRules: options.syncRules, + lastCheckpointLsn: options.lastCheckpointLsn, + keepaliveOp: options.keepaliveOp, + resumeFromLsn: options.resumeFromLsn, + logger: this.logger, + writer: this }); - } - - get lastCheckpointLsn() { - return this.last_checkpoint_lsn; + this.batches.push(batch); + return batch; } async flush(options?: storage.BatchBucketFlushOptions): Promise { @@ -189,8 +174,10 @@ export class MongoBucketBatch throw new ReplicationAssertionError('Unexpected last_op == null'); } - this.persisted_op = last_op; - this.last_flushed_op = last_op; + for (let batch of this.batches) { + batch.persisted_op = last_op; + batch.last_flushed_op = last_op; + } return { flushed_op: last_op }; } @@ -274,7 +261,7 @@ export class MongoBucketBatch current_data_lookup.set(cacheKey(doc._id.t, doc._id.k), doc); } - let persistedBatch: PersistedBatch | null = new PersistedBatch(this.group_id, transactionSize, { + let persistedBatch: PersistedBatch | null = new PersistedBatch(transactionSize, { logger: this.logger, mapping: this.mapping }); @@ -317,7 +304,9 @@ export class MongoBucketBatch } if (didFlush) { - await this.clearError(); + for (let batch of this.batches) { + await batch.clearError(); + } } return resumeBatch?.hasData() ? resumeBatch : null; @@ -472,7 +461,7 @@ export class MongoBucketBatch if (afterId && after && utils.isCompleteRow(this.storeCurrentData, after)) { // Insert or update if (sourceTable.syncData) { - const { results: evaluated, errors: syncErrors } = this.sync_rules.evaluateRowWithErrors({ + const { results: evaluated, errors: syncErrors } = this.rowProcessor.evaluateRowWithErrors({ record: after, sourceTable }); @@ -514,7 +503,7 @@ export class MongoBucketBatch if (sourceTable.syncParameters) { // Parameters - const { results: paramEvaluated, errors: paramErrors } = this.sync_rules.evaluateParameterRowWithErrors( + const { results: paramEvaluated, errors: paramErrors } = this.rowProcessor.evaluateParameterRowWithErrors( sourceTable, after ); @@ -577,7 +566,7 @@ export class MongoBucketBatch return result; } - private async withTransaction(cb: () => Promise) { + async withTransaction(cb: () => Promise) { await replicationMutex.exclusiveLock(async () => { await this.session.withTransaction( async () => { @@ -653,19 +642,283 @@ export class MongoBucketBatch } ); - await this.db.sync_rules.updateOne( - { - _id: this.group_id - }, + // FIXME: Do we need this? + // await this.db.sync_rules.updateOne( + // { + // _id: this.group_id + // }, + // { + // $set: { + // last_keepalive_ts: new Date() + // } + // }, + // { session } + // ); + // We don't notify checkpoint here - we don't make any checkpoint updates directly + }); + } + + async save(record: storage.SaveOptions): Promise { + const { after, before, sourceTable, tag } = record; + for (const event of this.getTableEvents(sourceTable)) { + for (let batch of this.batches) { + batch.iterateListeners((cb) => + cb.replicationEvent?.({ + batch: batch, + table: sourceTable, + data: { + op: tag, + after: after && utils.isCompleteRow(this.storeCurrentData, after) ? after : undefined, + before: before && utils.isCompleteRow(this.storeCurrentData, before) ? before : undefined + }, + event + }) + ); + } + } + + /** + * Return if the table is just an event table + */ + if (!sourceTable.syncData && !sourceTable.syncParameters) { + return null; + } + + this.logger.debug(`Saving ${record.tag}:${record.before?.id}/${record.after?.id}`); + + this.batch ??= new OperationBatch(); + this.batch.push(new RecordOperation(record)); + + if (this.batch.shouldFlush()) { + const r = await this.flush(); + // HACK: Give other streams a chance to also flush + await timers.setTimeout(5); + return r; + } + return null; + } + + /** + * Drop is equivalent to TRUNCATE, plus removing our record of the table. + */ + async drop(sourceTables: storage.SourceTable[]): Promise { + await this.truncate(sourceTables); + const result = await this.flush(); + + await this.withTransaction(async () => { + for (let table of sourceTables) { + await this.db.source_tables.deleteOne({ _id: mongoTableId(table.id) }); + } + }); + return result; + } + + async truncate(sourceTables: storage.SourceTable[]): Promise { + await this.flush(); + + let last_op: InternalOpId | null = null; + for (let table of sourceTables) { + last_op = await this.truncateSingle(table); + } + + if (last_op) { + for (let batch of this.batches) { + batch.persisted_op = last_op; + } + return { + flushed_op: last_op + }; + } else { + return null; + } + } + + async truncateSingle(sourceTable: storage.SourceTable): Promise { + let last_op: InternalOpId | null = null; + + // To avoid too large transactions, we limit the amount of data we delete per transaction. + // Since we don't use the record data here, we don't have explicit size limits per batch. + const BATCH_LIMIT = 2000; + + let lastBatchCount = BATCH_LIMIT; + while (lastBatchCount == BATCH_LIMIT) { + await this.withReplicationTransaction(`Truncate ${sourceTable.qualifiedName}`, async (session, opSeq) => { + const current_data_filter: mongo.Filter = { + _id: idPrefixFilter({ g: 0, t: mongoTableId(sourceTable.id) }, ['k']), + // Skip soft-deleted data + pending_delete: { $exists: false } + }; + + const cursor = this.db.current_data.find(current_data_filter, { + projection: { + _id: 1, + buckets: 1, + lookups: 1 + }, + limit: BATCH_LIMIT, + session: session + }); + const batch = await cursor.toArray(); + const persistedBatch = new PersistedBatch(0, { logger: this.logger, mapping: this.mapping }); + + for (let value of batch) { + persistedBatch.saveBucketData({ + op_seq: opSeq, + before_buckets: value.buckets, + evaluated: [], + table: sourceTable, + sourceKey: value._id.k + }); + persistedBatch.saveParameterData({ + op_seq: opSeq, + existing_lookups: value.lookups, + evaluated: [], + sourceTable: sourceTable, + sourceKey: value._id.k + }); + + // Since this is not from streaming replication, we can do a hard delete + persistedBatch.hardDeleteCurrentData(value._id); + } + await persistedBatch.flush(this.db, session); + lastBatchCount = batch.length; + + last_op = opSeq.last(); + }); + } + + return last_op!; + } + + async updateTableProgress( + table: storage.SourceTable, + progress: Partial + ): Promise { + const copy = table.clone(); + const snapshotStatus = { + totalEstimatedCount: progress.totalEstimatedCount ?? copy.snapshotStatus?.totalEstimatedCount ?? 0, + replicatedCount: progress.replicatedCount ?? copy.snapshotStatus?.replicatedCount ?? 0, + lastKey: progress.lastKey ?? copy.snapshotStatus?.lastKey ?? null + }; + copy.snapshotStatus = snapshotStatus; + + await this.withTransaction(async () => { + await this.db.source_tables.updateOne( + { _id: mongoTableId(table.id) }, { $set: { - last_keepalive_ts: new Date() + snapshot_status: { + last_key: snapshotStatus.lastKey == null ? null : new bson.Binary(snapshotStatus.lastKey), + total_estimated_count: snapshotStatus.totalEstimatedCount, + replicated_count: snapshotStatus.replicatedCount + } } }, - { session } + { session: this.session } ); - // We don't notify checkpoint here - we don't make any checkpoint updates directly }); + + return copy; + } + + /** + * Gets relevant {@link SqlEventDescriptor}s for the given {@link SourceTable} + */ + protected getTableEvents(table: storage.SourceTable): SqlEventDescriptor[] { + return this.rowProcessor.eventDescriptors.filter((evt) => + [...evt.getSourceTables()].some((sourceTable) => sourceTable.matches(table)) + ); + } +} + +export class MongoBucketBatch + extends BaseObserver + implements storage.BucketStorageBatch +{ + private logger: Logger; + + private readonly client: mongo.MongoClient; + public readonly db: PowerSyncMongo; + public readonly session: mongo.ClientSession; + + private readonly group_id: number; + + private clearedError = false; + + /** + * Last LSN received associated with a checkpoint. + * + * This could be either: + * 1. A commit LSN. + * 2. A keepalive message LSN. + */ + private last_checkpoint_lsn: string | null = null; + + persisted_op: InternalOpId | null = null; + + /** + * Last written op, if any. This may not reflect a consistent checkpoint. + */ + public last_flushed_op: InternalOpId | null = null; + + /** + * lastCheckpointLsn is the last consistent commit. + * + * While that is generally a "safe" point to resume from, there are cases where we may want to resume from a different point: + * 1. After an initial snapshot, we don't have a consistent commit yet, but need to resume from the snapshot LSN. + * 2. If "no_checkpoint_before_lsn" is set far in advance, it may take a while to reach that point. We + * may want to resume at incremental points before that. + * + * This is set when creating the batch, but may not be updated afterwards. + */ + public resumeFromLsn: string | null = null; + + private needsActivation = true; + + private readonly writer: MongoBucketDataWriter; + + constructor(options: MongoBucketBatchOptions) { + super(); + this.logger = options.logger ?? defaultLogger; + this.client = options.db.client; + this.db = options.db; + this.group_id = options.syncRules.id; + this.last_checkpoint_lsn = options.lastCheckpointLsn; + this.resumeFromLsn = options.resumeFromLsn; + this.writer = options.writer; + this.session = this.writer.session; + + this.persisted_op = options.keepaliveOp ?? null; + } + updateTableProgress( + table: storage.SourceTable, + progress: Partial + ): Promise { + throw new Error('Method not implemented.'); + } + + save(record: storage.SaveOptions): Promise { + return this.writer.save(record); + } + truncate(sourceTables: storage.SourceTable[]): Promise { + return this.writer.truncate(sourceTables); + } + drop(sourceTables: storage.SourceTable[]): Promise { + return this.writer.truncate(sourceTables); + } + flush(options?: storage.BatchBucketFlushOptions): Promise { + return this.writer.flush(options); + } + + addCustomWriteCheckpoint(checkpoint: storage.BatchedCustomWriteCheckpointOptions): void { + this.writer.write_checkpoint_batch.push({ + ...checkpoint, + sync_rules_id: this.group_id + }); + } + + get lastCheckpointLsn() { + return this.last_checkpoint_lsn; } async [Symbol.asyncDispose]() { @@ -682,7 +935,7 @@ export class MongoBucketBatch async commit(lsn: string, options?: storage.BucketBatchCommitOptions): Promise { const { createEmptyCheckpoints } = { ...storage.DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS, ...options }; - await this.flush(options); + await this.writer.flush(options); const now = new Date(); @@ -936,165 +1189,6 @@ export class MongoBucketBatch ); } - async save(record: storage.SaveOptions): Promise { - const { after, before, sourceTable, tag } = record; - for (const event of this.getTableEvents(sourceTable)) { - this.iterateListeners((cb) => - cb.replicationEvent?.({ - batch: this, - table: sourceTable, - data: { - op: tag, - after: after && utils.isCompleteRow(this.storeCurrentData, after) ? after : undefined, - before: before && utils.isCompleteRow(this.storeCurrentData, before) ? before : undefined - }, - event - }) - ); - } - - /** - * Return if the table is just an event table - */ - if (!sourceTable.syncData && !sourceTable.syncParameters) { - return null; - } - - this.logger.debug(`Saving ${record.tag}:${record.before?.id}/${record.after?.id}`); - - this.batch ??= new OperationBatch(); - this.batch.push(new RecordOperation(record)); - - if (this.batch.shouldFlush()) { - const r = await this.flush(); - // HACK: Give other streams a chance to also flush - await timers.setTimeout(5); - return r; - } - return null; - } - - /** - * Drop is equivalent to TRUNCATE, plus removing our record of the table. - */ - async drop(sourceTables: storage.SourceTable[]): Promise { - await this.truncate(sourceTables); - const result = await this.flush(); - - await this.withTransaction(async () => { - for (let table of sourceTables) { - await this.db.source_tables.deleteOne({ _id: mongoTableId(table.id) }); - } - }); - return result; - } - - async truncate(sourceTables: storage.SourceTable[]): Promise { - await this.flush(); - - let last_op: InternalOpId | null = null; - for (let table of sourceTables) { - last_op = await this.truncateSingle(table); - } - - if (last_op) { - this.persisted_op = last_op; - return { - flushed_op: last_op - }; - } else { - return null; - } - } - - async truncateSingle(sourceTable: storage.SourceTable): Promise { - let last_op: InternalOpId | null = null; - - // To avoid too large transactions, we limit the amount of data we delete per transaction. - // Since we don't use the record data here, we don't have explicit size limits per batch. - const BATCH_LIMIT = 2000; - - let lastBatchCount = BATCH_LIMIT; - while (lastBatchCount == BATCH_LIMIT) { - await this.withReplicationTransaction(`Truncate ${sourceTable.qualifiedName}`, async (session, opSeq) => { - const current_data_filter: mongo.Filter = { - _id: idPrefixFilter({ g: 0, t: mongoTableId(sourceTable.id) }, ['k']), - // Skip soft-deleted data - pending_delete: { $exists: false } - }; - - const cursor = this.db.current_data.find(current_data_filter, { - projection: { - _id: 1, - buckets: 1, - lookups: 1 - }, - limit: BATCH_LIMIT, - session: session - }); - const batch = await cursor.toArray(); - const persistedBatch = new PersistedBatch(this.group_id, 0, { logger: this.logger, mapping: this.mapping }); - - for (let value of batch) { - persistedBatch.saveBucketData({ - op_seq: opSeq, - before_buckets: value.buckets, - evaluated: [], - table: sourceTable, - sourceKey: value._id.k - }); - persistedBatch.saveParameterData({ - op_seq: opSeq, - existing_lookups: value.lookups, - evaluated: [], - sourceTable: sourceTable, - sourceKey: value._id.k - }); - - // Since this is not from streaming replication, we can do a hard delete - persistedBatch.hardDeleteCurrentData(value._id); - } - await persistedBatch.flush(this.db, session); - lastBatchCount = batch.length; - - last_op = opSeq.last(); - }); - } - - return last_op!; - } - - async updateTableProgress( - table: storage.SourceTable, - progress: Partial - ): Promise { - const copy = table.clone(); - const snapshotStatus = { - totalEstimatedCount: progress.totalEstimatedCount ?? copy.snapshotStatus?.totalEstimatedCount ?? 0, - replicatedCount: progress.replicatedCount ?? copy.snapshotStatus?.replicatedCount ?? 0, - lastKey: progress.lastKey ?? copy.snapshotStatus?.lastKey ?? null - }; - copy.snapshotStatus = snapshotStatus; - - await this.withTransaction(async () => { - await this.db.source_tables.updateOne( - { _id: mongoTableId(table.id) }, - { - $set: { - snapshot_status: { - last_key: snapshotStatus.lastKey == null ? null : new bson.Binary(snapshotStatus.lastKey), - total_estimated_count: snapshotStatus.totalEstimatedCount, - replicated_count: snapshotStatus.replicatedCount - } - } - }, - { session: this.session } - ); - }); - - return copy; - } - async markAllSnapshotDone(no_checkpoint_before_lsn: string) { await this.db.sync_rules.updateOne( { @@ -1131,7 +1225,7 @@ export class MongoBucketBatch const session = this.session; const ids = tables.map((table) => mongoTableId(table.id)); - await this.withTransaction(async () => { + await this.writer.withTransaction(async () => { await this.db.source_tables.updateMany( { _id: { $in: ids } }, { @@ -1169,7 +1263,7 @@ export class MongoBucketBatch }); } - protected async clearError(): Promise { + async clearError(): Promise { // No need to clear an error more than once per batch, since an error would always result in restarting the batch. if (this.clearedError) { return; @@ -1188,15 +1282,6 @@ export class MongoBucketBatch ); this.clearedError = true; } - - /** - * Gets relevant {@link SqlEventDescriptor}s for the given {@link SourceTable} - */ - protected getTableEvents(table: storage.SourceTable): SqlEventDescriptor[] { - return this.sync_rules.eventDescriptors.filter((evt) => - [...evt.getSourceTables()].some((sourceTable) => sourceTable.matches(table)) - ); - } } export function currentBucketKey(b: CurrentBucket) { diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 6d999e749..1bd8f41d1 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -41,7 +41,7 @@ import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } f import { MongoBucketStorage } from '../MongoBucketStorage.js'; import { PowerSyncMongo } from './db.js'; import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey, SourceTableDocument } from './models.js'; -import { MongoBucketBatch } from './MongoBucketBatch.js'; +import { MongoBucketBatch, MongoBucketDataWriter } from './MongoBucketBatch.js'; import { MongoChecksumOptions, MongoChecksums } from './MongoChecksums.js'; import { MongoCompactor } from './MongoCompactor.js'; import { MongoParameterCompactor } from './MongoParameterCompactor.js'; @@ -186,18 +186,22 @@ export class MongoSyncBucketStorage const parsedSyncRules = this.sync_rules.parsed(options); - const batch = new MongoBucketBatch({ + const writer = new MongoBucketDataWriter({ logger: options.logger, db: this.db, - syncRules: parsedSyncRules, - groupId: this.group_id, slotName: this.slot_name, - lastCheckpointLsn: checkpoint_lsn, - resumeFromLsn: maxLsn(checkpoint_lsn, doc?.snapshot_lsn), - keepaliveOp: doc?.keepalive_op ? BigInt(doc.keepalive_op) : null, storeCurrentData: options.storeCurrentData, skipExistingRows: options.skipExistingRows ?? false, - markRecordUnavailable: options.markRecordUnavailable + markRecordUnavailable: options.markRecordUnavailable, + mapping: this.mapping, + rowProcessor: parsedSyncRules.hydratedSyncRules() + }); + const batch = writer.forSyncRules({ + syncRules: parsedSyncRules, + + lastCheckpointLsn: checkpoint_lsn, + resumeFromLsn: maxLsn(checkpoint_lsn, doc?.snapshot_lsn), + keepaliveOp: doc?.keepalive_op ? BigInt(doc.keepalive_op) : null }); this.iterateListeners((cb) => cb.batchStarted?.(batch)); return batch; diff --git a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts index 5278146ff..c2e660f96 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts @@ -64,11 +64,7 @@ export class PersistedBatch { */ currentSize = 0; - constructor( - private group_id: number, - writtenSize: number, - options: { logger: Logger; mapping: BucketDefinitionMapping } - ) { + constructor(writtenSize: number, options: { logger: Logger; mapping: BucketDefinitionMapping }) { this.currentSize = writtenSize; this.logger = options.logger; this.mapping = options.mapping; diff --git a/packages/service-core/src/storage/BucketStorageBatch.ts b/packages/service-core/src/storage/BucketStorageBatch.ts index 8c6323754..ce08afb58 100644 --- a/packages/service-core/src/storage/BucketStorageBatch.ts +++ b/packages/service-core/src/storage/BucketStorageBatch.ts @@ -11,12 +11,7 @@ export const DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS: ResolvedBucketBatchCommitOptio oldestUncommittedChange: null }; -export interface BucketStorageBatch extends ObserverClient, AsyncDisposable { - /** - * Alias for [Symbol.asyncDispose] - */ - dispose(): Promise; - +export interface BucketDataWriter { /** * Save an op, and potentially flush. * @@ -45,6 +40,16 @@ export interface BucketStorageBatch extends ObserverClient; +} + +export interface BucketStorageBatch + extends ObserverClient, + AsyncDisposable, + BucketDataWriter { + /** + * Alias for [Symbol.asyncDispose] + */ + dispose(): Promise; /** * Flush and commit any saved ops. This creates a new checkpoint by default. diff --git a/packages/sync-rules/src/HydratedSyncRules.ts b/packages/sync-rules/src/HydratedSyncRules.ts index 06ac260cb..4473a5bb1 100644 --- a/packages/sync-rules/src/HydratedSyncRules.ts +++ b/packages/sync-rules/src/HydratedSyncRules.ts @@ -20,16 +20,49 @@ import { SqlEventDescriptor, SqliteInputValue, SqliteValue, - SqlSyncRules + SqlSyncRules, + TablePattern } from './index.js'; import { SourceTableInterface } from './SourceTableInterface.js'; import { EvaluatedParametersResult, EvaluateRowOptions, EvaluationResult, SqliteRow } from './types.js'; +export interface RowProcessor { + readonly eventDescriptors: SqlEventDescriptor[]; + readonly compatibility: CompatibilityContext; + + getSourceTables(): TablePattern[]; + + tableTriggersEvent(table: SourceTableInterface): boolean; + + tableSyncsData(table: SourceTableInterface): boolean; + tableSyncsParameters(table: SourceTableInterface): boolean; + + applyRowContext( + source: SqliteRow + ): SqliteRow; + + /** + * Throws errors. + */ + evaluateRow(options: EvaluateRowOptions): EvaluatedRow[]; + + evaluateRowWithErrors(options: EvaluateRowOptions): { results: EvaluatedRow[]; errors: EvaluationError[] }; + + /** + * Throws errors. + */ + evaluateParameterRow(table: SourceTableInterface, row: SqliteRow): EvaluatedParameters[]; + evaluateParameterRowWithErrors( + table: SourceTableInterface, + row: SqliteRow + ): { results: EvaluatedParameters[]; errors: EvaluationError[] }; +} + /** * Hydrated sync rules is sync rule definitions along with persisted state. Currently, the persisted state * specifically affects bucket names. */ -export class HydratedSyncRules { +export class HydratedSyncRules implements RowProcessor { bucketSources: HydratedBucketSource[] = []; eventDescriptors: SqlEventDescriptor[] = []; compatibility: CompatibilityContext = CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY; From d73fad61615641a9c024a8025e2378b1953e3a3c Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 14 Jan 2026 11:19:59 +0200 Subject: [PATCH 028/101] WIP: merged processing. --- .../implementation/MongoBucketBatch.ts | 177 ++++++- .../implementation/MongoSyncBucketStorage.ts | 147 +++--- .../src/storage/implementation/models.ts | 3 +- .../src/replication/ChangeStream.ts | 479 +++++++++--------- .../src/replication/MongoRelation.ts | 5 +- .../src/replication/MongoSnapshotter.ts | 91 ++-- .../src/storage/BucketStorageBatch.ts | 29 +- .../src/storage/BucketStorageFactory.ts | 3 + .../src/storage/SyncRulesBucketStorage.ts | 5 +- packages/sync-rules/src/HydratedSyncRules.ts | 4 + 10 files changed, 570 insertions(+), 373 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 707579b06..24e1fe545 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -33,7 +33,7 @@ import * as timers from 'node:timers/promises'; import { idPrefixFilter, mongoTableId } from '../../utils/util.js'; import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; import { PowerSyncMongo } from './db.js'; -import { CurrentBucket, CurrentDataDocument, SourceKey, SyncRuleDocument } from './models.js'; +import { CurrentBucket, CurrentDataDocument, SourceKey, SourceTableDocument, SyncRuleDocument } from './models.js'; import { MongoIdSequence } from './MongoIdSequence.js'; import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; import { batchCreateCustomWriteCheckpoints } from './MongoWriteCheckpointAPI.js'; @@ -135,6 +135,181 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { return batch; } + async [Symbol.asyncDispose](): Promise { + await this.session.endSession(); + for (let batch of this.batches) { + await batch[Symbol.asyncDispose](); + } + } + + get resumeFromLsn(): string | null { + // FIXME: check the logic here when there are multiple batches + return this.batches[0]?.resumeFromLsn ?? null; + } + + async keepaliveAll(lsn: string): Promise { + let didAny = false; + for (let batch of this.batches) { + const didBatchKeepalive = await batch.keepalive(lsn); + didAny ||= didBatchKeepalive; + } + return didAny; + } + + async commitAll(lsn: string, options?: storage.BucketBatchCommitOptions): Promise { + let didCommit = false; + for (let batch of this.batches) { + const didWriterCommit = await batch.commit(lsn, options); + didCommit ||= didWriterCommit; + } + return didCommit; + } + + async setAllResumeLsn(lsn: string): Promise { + for (let batch of this.batches) { + await batch.setResumeLsn(lsn); + } + } + + async resolveTable(options: storage.ResolveTableOptions): Promise { + const sources = this.rowProcessor.getMatchingSources({ + connectionTag: options.connection_tag, + name: options.entity_descriptor.name, + schema: options.entity_descriptor.schema + }); + const bucketDataSourceIds = sources.bucketDataSources.map((source) => this.mapping.bucketSourceId(source)); + const parameterLookupSourceIds = sources.parameterIndexLookupCreators.map((source) => + this.mapping.parameterLookupId(source) + ); + + const { connection_id, connection_tag, entity_descriptor } = options; + + const { schema, name, objectId, replicaIdColumns } = entity_descriptor; + + const normalizedReplicaIdColumns = replicaIdColumns.map((column) => ({ + name: column.name, + type: column.type, + type_oid: column.typeId + })); + let result: storage.ResolveTableResult | null = null; + await this.db.client.withSession(async (session) => { + const col = this.db.source_tables; + let filter: mongo.Filter = { + connection_id: connection_id, + schema_name: schema, + table_name: name, + replica_id_columns2: normalizedReplicaIdColumns + }; + if (objectId != null) { + filter.relation_id = objectId; + } + let docs = await col.find(filter, { session }).toArray(); + let matchingDocs: SourceTableDocument[] = []; + + let coveredBucketDataSourceIds = new Set(); + let coveredParameterLookupSourceIds = new Set(); + + for (let doc of docs) { + const matchingBucketDataSourceIds = doc.bucket_data_source_ids.filter((id) => bucketDataSourceIds.includes(id)); + const matchingParameterLookupSourceIds = doc.parameter_lookup_source_ids.filter((id) => + parameterLookupSourceIds.includes(id) + ); + if (matchingBucketDataSourceIds.length == 0 && matchingParameterLookupSourceIds.length == 0) { + // Not relevant + continue; + } + matchingDocs.push(doc); + } + + const pendingBucketDataSourceIds = bucketDataSourceIds.filter((id) => !coveredBucketDataSourceIds.has(id)); + const pendingParameterLookupSourceIds = parameterLookupSourceIds.filter( + (id) => !coveredParameterLookupSourceIds.has(id) + ); + if (pendingBucketDataSourceIds.length > 0 || pendingParameterLookupSourceIds.length > 0) { + const doc: SourceTableDocument = { + _id: new bson.ObjectId(), + connection_id: connection_id, + relation_id: objectId, + schema_name: schema, + table_name: name, + replica_id_columns: null, + replica_id_columns2: normalizedReplicaIdColumns, + snapshot_done: false, + snapshot_status: undefined, + bucket_data_source_ids: pendingBucketDataSourceIds, + parameter_lookup_source_ids: pendingParameterLookupSourceIds + }; + + await col.insertOne(doc, { session }); + docs.push(doc); + } + + const sourceTables = docs.map((doc) => { + const sourceTable = new storage.SourceTable({ + id: doc._id, + connectionTag: connection_tag, + objectId: objectId, + schema: schema, + name: name, + replicaIdColumns: replicaIdColumns, + snapshotComplete: doc.snapshot_done ?? true + }); + sourceTable.snapshotStatus = + doc.snapshot_status == null + ? undefined + : { + lastKey: doc.snapshot_status.last_key?.buffer ?? null, + totalEstimatedCount: doc.snapshot_status.total_estimated_count, + replicatedCount: doc.snapshot_status.replicated_count + }; + + sourceTable.syncData = doc.bucket_data_source_ids.length > 0; + sourceTable.syncParameters = doc.parameter_lookup_source_ids.length > 0; + // FIXME: implement sourceTable.syncEvent + return sourceTable; + }); + + // FIXME: dropTables + // let dropTables: storage.SourceTable[] = []; + // // Detect tables that are either renamed, or have different replica_id_columns + // let truncateFilter = [{ schema_name: schema, table_name: name }] as any[]; + // if (objectId != null) { + // // Only detect renames if the source uses relation ids. + // truncateFilter.push({ relation_id: objectId }); + // } + // const truncate = await col + // .find( + // { + // group_id: group_id, + // connection_id: connection_id, + // _id: { $ne: doc._id }, + // $or: truncateFilter + // }, + // { session } + // ) + // .toArray(); + // dropTables = truncate.map( + // (doc) => + // new storage.SourceTable({ + // id: doc._id, + // connectionTag: connection_tag, + // objectId: doc.relation_id, + // schema: doc.schema_name, + // name: doc.table_name, + // replicaIdColumns: + // doc.replica_id_columns2?.map((c) => ({ name: c.name, typeOid: c.type_oid, type: c.type })) ?? [], + // snapshotComplete: doc.snapshot_done ?? true + // }) + // ); + + result = { + tables: sourceTables, + dropTables: [] + }; + }); + return result!; + } + async flush(options?: storage.BatchBucketFlushOptions): Promise { let result: storage.FlushedResult | null = null; // One flush may be split over multiple transactions. diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 1bd8f41d1..ec5151f31 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -223,7 +223,8 @@ export class MongoSyncBucketStorage } async resolveTable(options: storage.ResolveTableOptions): Promise { - const { group_id, connection_id, connection_tag, entity_descriptor } = options; + const { connection_id, connection_tag, entity_descriptor, bucket_data_source_ids, parameter_lookup_source_ids } = + options; const { schema, name, objectId, replicaIdColumns } = entity_descriptor; @@ -236,7 +237,6 @@ export class MongoSyncBucketStorage await this.db.client.withSession(async (session) => { const col = this.db.source_tables; let filter: mongo.Filter = { - sync_rules_ids: group_id, connection_id: connection_id, schema_name: schema, table_name: name, @@ -245,11 +245,33 @@ export class MongoSyncBucketStorage if (objectId != null) { filter.relation_id = objectId; } - let doc = await col.findOne(filter, { session }); - if (doc == null) { - doc = { + let docs = await col.find(filter, { session }).toArray(); + let matchingDocs: SourceTableDocument[] = []; + + let coveredBucketDataSourceIds = new Set(); + let coveredParameterLookupSourceIds = new Set(); + + for (let doc of docs) { + const matchingBucketDataSourceIds = doc.bucket_data_source_ids.filter((id) => + bucket_data_source_ids.includes(id) + ); + const matchingParameterLookupSourceIds = doc.parameter_lookup_source_ids.filter((id) => + parameter_lookup_source_ids.includes(id) + ); + if (matchingBucketDataSourceIds.length == 0 && matchingParameterLookupSourceIds.length == 0) { + // Not relevant + continue; + } + matchingDocs.push(doc); + } + + const pendingBucketDataSourceIds = bucket_data_source_ids.filter((id) => !coveredBucketDataSourceIds.has(id)); + const pendingParameterLookupSourceIds = parameter_lookup_source_ids.filter( + (id) => !coveredParameterLookupSourceIds.has(id) + ); + if (pendingBucketDataSourceIds.length > 0 || pendingParameterLookupSourceIds.length > 0) { + const doc: SourceTableDocument = { _id: new bson.ObjectId(), - sync_rules_ids: [group_id], connection_id: connection_id, relation_id: objectId, schema_name: schema, @@ -257,67 +279,72 @@ export class MongoSyncBucketStorage replica_id_columns: null, replica_id_columns2: normalizedReplicaIdColumns, snapshot_done: false, - snapshot_status: undefined + snapshot_status: undefined, + bucket_data_source_ids: bucket_data_source_ids, + parameter_lookup_source_ids: parameter_lookup_source_ids }; await col.insertOne(doc, { session }); + docs.push(doc); } - const sourceTable = new storage.SourceTable({ - id: doc._id, - connectionTag: connection_tag, - objectId: objectId, - schema: schema, - name: name, - replicaIdColumns: replicaIdColumns, - snapshotComplete: doc.snapshot_done ?? true + + const sourceTables = docs.map((doc) => { + const sourceTable = new storage.SourceTable({ + id: doc._id, + connectionTag: connection_tag, + objectId: objectId, + schema: schema, + name: name, + replicaIdColumns: replicaIdColumns, + snapshotComplete: doc.snapshot_done ?? true + }); + sourceTable.snapshotStatus = + doc.snapshot_status == null + ? undefined + : { + lastKey: doc.snapshot_status.last_key?.buffer ?? null, + totalEstimatedCount: doc.snapshot_status.total_estimated_count, + replicatedCount: doc.snapshot_status.replicated_count + }; + return sourceTable; }); - sourceTable.syncEvent = options.sync_rules.tableTriggersEvent(sourceTable); - sourceTable.syncData = options.sync_rules.tableSyncsData(sourceTable); - sourceTable.syncParameters = options.sync_rules.tableSyncsParameters(sourceTable); - sourceTable.snapshotStatus = - doc.snapshot_status == null - ? undefined - : { - lastKey: doc.snapshot_status.last_key?.buffer ?? null, - totalEstimatedCount: doc.snapshot_status.total_estimated_count, - replicatedCount: doc.snapshot_status.replicated_count - }; - - let dropTables: storage.SourceTable[] = []; - // Detect tables that are either renamed, or have different replica_id_columns - let truncateFilter = [{ schema_name: schema, table_name: name }] as any[]; - if (objectId != null) { - // Only detect renames if the source uses relation ids. - truncateFilter.push({ relation_id: objectId }); - } - const truncate = await col - .find( - { - group_id: group_id, - connection_id: connection_id, - _id: { $ne: doc._id }, - $or: truncateFilter - }, - { session } - ) - .toArray(); - dropTables = truncate.map( - (doc) => - new storage.SourceTable({ - id: doc._id, - connectionTag: connection_tag, - objectId: doc.relation_id, - schema: doc.schema_name, - name: doc.table_name, - replicaIdColumns: - doc.replica_id_columns2?.map((c) => ({ name: c.name, typeOid: c.type_oid, type: c.type })) ?? [], - snapshotComplete: doc.snapshot_done ?? true - }) - ); + + // FIXME: dropTables + // let dropTables: storage.SourceTable[] = []; + // // Detect tables that are either renamed, or have different replica_id_columns + // let truncateFilter = [{ schema_name: schema, table_name: name }] as any[]; + // if (objectId != null) { + // // Only detect renames if the source uses relation ids. + // truncateFilter.push({ relation_id: objectId }); + // } + // const truncate = await col + // .find( + // { + // group_id: group_id, + // connection_id: connection_id, + // _id: { $ne: doc._id }, + // $or: truncateFilter + // }, + // { session } + // ) + // .toArray(); + // dropTables = truncate.map( + // (doc) => + // new storage.SourceTable({ + // id: doc._id, + // connectionTag: connection_tag, + // objectId: doc.relation_id, + // schema: doc.schema_name, + // name: doc.table_name, + // replicaIdColumns: + // doc.replica_id_columns2?.map((c) => ({ name: c.name, typeOid: c.type_oid, type: c.type })) ?? [], + // snapshotComplete: doc.snapshot_done ?? true + // }) + // ); result = { - table: sourceTable, - dropTables: dropTables + tables: sourceTables, + dropTables: [] }; }); return result!; diff --git a/modules/module-mongodb-storage/src/storage/implementation/models.ts b/modules/module-mongodb-storage/src/storage/implementation/models.ts index b80e26ddc..85f36ba8a 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/models.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/models.ts @@ -73,7 +73,8 @@ export type OpType = 'PUT' | 'REMOVE' | 'MOVE' | 'CLEAR'; export interface SourceTableDocument { _id: bson.ObjectId; - sync_rules_ids: number[]; + bucket_data_source_ids: number[]; + parameter_lookup_source_ids: number[]; connection_id: number; relation_id: number | string | undefined; schema_name: string; diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 14f14c903..118335c55 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -9,8 +9,8 @@ import { ServiceError } from '@powersync/lib-services-framework'; import { + BucketStorageFactory, MetricsEngine, - RelationCache, SaveOperationTag, SourceEntityDescriptor, SourceTable, @@ -22,6 +22,7 @@ import { MongoLSN } from '../common/MongoLSN.js'; import { PostImagesOption } from '../types/types.js'; import { escapeRegExp } from '../utils.js'; import { ChangeStreamInvalidatedError, mapChangeStreamError } from './ChangeStreamErrors.js'; +import { ReplicationStreamConfig } from './ChangeStreamReplicationJob.js'; import { MongoManager } from './MongoManager.js'; import { constructAfterRecord, @@ -32,10 +33,10 @@ import { } from './MongoRelation.js'; import { MongoSnapshotter } from './MongoSnapshotter.js'; import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js'; -import { ReplicationStreamConfig } from './ChangeStreamReplicationJob.js'; export interface ChangeStreamOptions { connections: MongoManager; + factory: BucketStorageFactory; streams: Pick[]; metrics: MetricsEngine; abort_signal: AbortSignal; @@ -67,7 +68,6 @@ interface SubStreamOptions { } class SubStream { - public readonly relationCache = new RelationCache(getCacheIdentifier); private readonly connection_id = 1; private readonly connections: MongoManager; public readonly storage: storage.SyncRulesBucketStorage; @@ -100,14 +100,6 @@ class SubStream { }); } - private get usePostImages() { - return this.connections.options.postImages != PostImagesOption.OFF; - } - - private get configurePostImages() { - return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE; - } - async initReplication() { const result = await this.snapshotter.checkSlot(); // FIXME: This should be done once, not per sub-stream @@ -120,168 +112,6 @@ class SubStream { await this.snapshotter.queueSnapshotTables(result.snapshotLsn); } } - - async createWriter(): Promise { - return this.storage.createWriter({ - logger: this.logger, - zeroLSN: MongoLSN.ZERO.comparable, - defaultSchema: this.connections.db.databaseName, - // We get a complete postimage for every change, so we don't need to store the current data. - storeCurrentData: false - }); - } - - async handleRelation( - batch: storage.BucketStorageBatch, - descriptor: SourceEntityDescriptor, - options: { snapshot: boolean; collectionInfo: mongo.CollectionInfo | undefined } - ) { - if (options.collectionInfo != null) { - await this.checkPostImages(descriptor.schema, options.collectionInfo); - } else { - // If collectionInfo is null, the collection may have been dropped. - // Ignore the postImages check in this case. - } - - const snapshot = options.snapshot; - const result = await this.storage.resolveTable({ - group_id: this.storage.group_id, - connection_id: this.connection_id, - connection_tag: this.connections.connectionTag, - entity_descriptor: descriptor, - sync_rules: this.syncRules - }); - this.relationCache.update(result.table); - - // Drop conflicting collections. - // This is generally not expected for MongoDB source dbs, so we log an error. - if (result.dropTables.length > 0) { - this.logger.error( - `Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}` - ); - await batch.drop(result.dropTables); - } - - // Snapshot if: - // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) - // 2. Snapshot is not already done, AND: - // 3. The table is used in sync rules. - const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny; - if (shouldSnapshot) { - this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); - await this.snapshotter.queueSnapshot(batch, result.table); - } - - return result.table; - } - - private constructAfterRecord(document: mongo.Document): SqliteRow { - const inputRow = constructAfterRecord(document); - return this.syncRules.applyRowContext(inputRow); - } - - public async getRelation( - batch: storage.BucketStorageBatch, - descriptor: SourceEntityDescriptor, - options: { snapshot: boolean } - ): Promise { - const existing = this.relationCache.get(descriptor); - if (existing != null) { - return existing; - } - - // Note: collection may have been dropped at this point, so we handle - // missing values. - const collection = await this.getCollectionInfo(descriptor.schema, descriptor.name); - - return this.handleRelation(batch, descriptor, { snapshot: options.snapshot, collectionInfo: collection }); - } - - public async getCollectionInfo(db: string, name: string): Promise { - const collection = ( - await this.client - .db(db) - .listCollections( - { - name: name - }, - { nameOnly: false } - ) - .toArray() - )[0]; - return collection; - } - - private async checkPostImages(db: string, collectionInfo: mongo.CollectionInfo) { - if (!this.usePostImages) { - // Nothing to check - return; - } - - const enabled = collectionInfo.options?.changeStreamPreAndPostImages?.enabled == true; - - if (!enabled && this.configurePostImages) { - await this.client.db(db).command({ - collMod: collectionInfo.name, - changeStreamPreAndPostImages: { enabled: true } - }); - this.logger.info(`Enabled postImages on ${db}.${collectionInfo.name}`); - } else if (!enabled) { - throw new ServiceError(ErrorCode.PSYNC_S1343, `postImages not enabled on ${db}.${collectionInfo.name}`); - } - } - - async writeChange( - batch: storage.BucketStorageBatch, - table: storage.SourceTable, - change: mongo.ChangeStreamDocument - ): Promise { - if (!table.syncAny) { - this.logger.debug(`Collection ${table.qualifiedName} not used in sync rules - skipping`); - return null; - } - - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); - if (change.operationType == 'insert') { - const baseRecord = this.constructAfterRecord(change.fullDocument); - return await batch.save({ - tag: SaveOperationTag.INSERT, - sourceTable: table, - before: undefined, - beforeReplicaId: undefined, - after: baseRecord, - afterReplicaId: change.documentKey._id - }); - } else if (change.operationType == 'update' || change.operationType == 'replace') { - if (change.fullDocument == null) { - // Treat as delete - return await batch.save({ - tag: SaveOperationTag.DELETE, - sourceTable: table, - before: undefined, - beforeReplicaId: change.documentKey._id - }); - } - const after = this.constructAfterRecord(change.fullDocument!); - return await batch.save({ - tag: SaveOperationTag.UPDATE, - sourceTable: table, - before: undefined, - beforeReplicaId: undefined, - after: after, - afterReplicaId: change.documentKey._id - }); - } else if (change.operationType == 'delete') { - return await batch.save({ - tag: SaveOperationTag.DELETE, - sourceTable: table, - before: undefined, - beforeReplicaId: change.documentKey._id - }); - } else { - throw new ReplicationAssertionError(`Unsupported operation: ${change.operationType}`); - } - } } export class ChangeStream { @@ -293,6 +123,7 @@ export class ChangeStream { private readonly client: mongo.MongoClient; private readonly defaultDb: mongo.Db; private readonly metrics: MetricsEngine; + private readonly factory: BucketStorageFactory; private readonly maxAwaitTimeMS: number; @@ -318,12 +149,15 @@ export class ChangeStream { private changeStreamTimeout: number; + public readonly relationCache = new Map(); + constructor(options: ChangeStreamOptions) { this.metrics = options.metrics; this.connections = options.connections; this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000; this.client = this.connections.client; this.defaultDb = this.connections.db; + this.factory = options.factory; // The change stream aggregation command should timeout before the socket times out, // so we use 90% of the socket timeout value. this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9); @@ -356,6 +190,10 @@ export class ChangeStream { return this.connections.options.postImages != PostImagesOption.OFF; } + private get configurePostImages() { + return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE; + } + get stopped() { return this.abortSignal.aborted; } @@ -411,6 +249,25 @@ export class ChangeStream { return { $match: nsFilter, multipleDatabases }; } + private async checkPostImages(db: string, collectionInfo: mongo.CollectionInfo) { + if (!this.usePostImages) { + // Nothing to check + return; + } + + const enabled = collectionInfo.options?.changeStreamPreAndPostImages?.enabled == true; + + if (!enabled && this.configurePostImages) { + await this.client.db(db).command({ + collMod: collectionInfo.name, + changeStreamPreAndPostImages: { enabled: true } + }); + this.logger.info(`Enabled postImages on ${db}.${collectionInfo.name}`); + } else if (!enabled) { + throw new ServiceError(ErrorCode.PSYNC_S1343, `postImages not enabled on ${db}.${collectionInfo.name}`); + } + } + static *getQueryData(results: Iterable): Generator { for (let row of results) { yield constructAfterRecord(row); @@ -566,17 +423,97 @@ export class ChangeStream { }; } - private async streamChangesInternal() { - const writers = await Promise.all(this.substreams.map((s) => s.createWriter())); - await using _ = { - [Symbol.asyncDispose]: async () => { - await Promise.all(writers.map((w) => w[Symbol.asyncDispose]())); + async handleRelations( + writer: storage.BucketDataWriter, + descriptor: SourceEntityDescriptor, + options: { snapshot: boolean; collectionInfo: mongo.CollectionInfo | undefined } + ): Promise { + if (options.collectionInfo != null) { + await this.checkPostImages(descriptor.schema, options.collectionInfo); + } else { + // If collectionInfo is null, the collection may have been dropped. + // Ignore the postImages check in this case. + } + + const result = await writer.resolveTable({ + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: descriptor + }); + + const snapshot = options.snapshot; + this.relationCache.set(getCacheIdentifier(descriptor), result.tables); + + // Drop conflicting collections. + // This is generally not expected for MongoDB source dbs, so we log an error. + if (result.dropTables.length > 0) { + this.logger.error( + `Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}` + ); + await writer.drop(result.dropTables); + } + + // Snapshot if: + // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) + // 2. Snapshot is not already done, AND: + // 3. The table is used in sync rules. + for (let table of result.tables) { + const shouldSnapshot = snapshot && !table.snapshotComplete && table.syncAny; + if (shouldSnapshot) { + this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); + await this.snapshotter.queueSnapshot(writer, table); } - }; + } + + return result.tables; + } + + private async drop(writer: storage.BucketDataWriter, entity: SourceEntityDescriptor): Promise { + const tables = await this.getRelations(writer, entity, { + // We're "dropping" this collection, so never snapshot it. + snapshot: false + }); + if (tables.length > 0) { + await writer.drop(tables); + } + this.relationCache.delete(getCacheIdentifier(entity)); + } + + private async getCollectionInfo(db: string, name: string): Promise { + const collection = ( + await this.client + .db(db) + .listCollections( + { + name: name + }, + { nameOnly: false } + ) + .toArray() + )[0]; + return collection; + } + + async getRelations( + writer: storage.BucketDataWriter, + descriptor: SourceEntityDescriptor, + options: { snapshot: boolean } + ): Promise { + const existing = this.relationCache.get(getCacheIdentifier(descriptor)); + if (existing != null) { + return existing; + } + const collection = await this.getCollectionInfo(descriptor.schema, descriptor.name); + + return this.handleRelations(writer, descriptor, { snapshot: options.snapshot, collectionInfo: collection }); + } + + private async streamChangesInternal() { + await using writers = await this.factory.createCombinedWriter(this.substreams.map((s) => s.storage)); // FIXME: Proper resumeFromLsn implementation for multiple writers // We should probably use the active sync rules for this, or alternatively the minimum from the writers. - const { resumeFromLsn } = writers[0]; + const resumeFromLsn = writers.resumeFromLsn; if (resumeFromLsn == null) { throw new ReplicationAssertionError(`No LSN found to resume from`); } @@ -642,7 +579,7 @@ export class ChangeStream { // doing a keepalive in the middle of a transaction. if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) { const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken); - await Promise.all(writers.map((batch) => batch.keepalive(lsn))); + await writers.keepaliveAll(lsn); this.touch(); lastEmptyResume = performance.now(); // Log the token update. This helps as a general "replication is still active" message in the logs. @@ -772,11 +709,7 @@ export class ChangeStream { if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) { waitForCheckpointLsn = null; } - let didCommit = false; - for (let batch of writers) { - const didWriterCommit = await batch.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); - didCommit ||= didWriterCommit; - } + const didCommit = await writers.commitAll(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); if (didCommit) { // TODO: Re-check this logic @@ -788,81 +721,123 @@ export class ChangeStream { continue; } - for (let i = 0; i < this.substreams.length; i++) { - const batch = writers[i]; - const substream = this.substreams[i]; - if ( - changeDocument.operationType == 'insert' || - changeDocument.operationType == 'update' || - changeDocument.operationType == 'replace' || - changeDocument.operationType == 'delete' - ) { - if (waitForCheckpointLsn == null) { - waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId); - } - const rel = getMongoRelation(changeDocument.ns); - const table = await substream.getRelation(batch, rel, { - // In most cases, we should not need to snapshot this. But if this is the first time we see the collection - // for whatever reason, then we do need to snapshot it. - // This may result in some duplicate operations when a collection is created for the first time after - // sync rules was deployed. - snapshot: true - }); - if (table.syncAny) { - if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) { - this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime); - } - const flushResult = await substream.writeChange(batch, table, changeDocument); - changesSinceLastCheckpoint += 1; - if (flushResult != null && changesSinceLastCheckpoint >= 20_000) { - // When we are catching up replication after an initial snapshot, there may be a very long delay - // before we do a commit(). In that case, we need to periodically persist the resume LSN, so - // we don't restart from scratch if we restart replication. - // The same could apply if we need to catch up on replication after some downtime. - const { comparable: lsn } = new MongoLSN({ - timestamp: changeDocument.clusterTime!, - resume_token: changeDocument._id - }); - this.logger.info(`Updating resume LSN to ${lsn} after ${changesSinceLastCheckpoint} changes`); - await batch.setResumeLsn(lsn); - changesSinceLastCheckpoint = 0; - } - } - } else if (changeDocument.operationType == 'drop') { - const rel = getMongoRelation(changeDocument.ns); - const table = await substream.getRelation(batch, rel, { - // We're "dropping" this collection, so never snapshot it. - snapshot: false - }); - if (table.syncAny) { - await batch.drop([table]); - substream.relationCache.delete(table); + if ( + changeDocument.operationType == 'insert' || + changeDocument.operationType == 'update' || + changeDocument.operationType == 'replace' || + changeDocument.operationType == 'delete' + ) { + if (waitForCheckpointLsn == null) { + waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId); + } + const rel = getMongoRelation(changeDocument.ns); + const tables = await this.getRelations(writers, rel, { + // In most cases, we should not need to snapshot this. But if this is the first time we see the collection + // for whatever reason, then we do need to snapshot it. + // This may result in some duplicate operations when a collection is created for the first time after + // sync rules was deployed. + snapshot: true + }); + const filtered = tables.filter((t) => t.syncAny); + + for (let table of filtered) { + if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) { + this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime); } - } else if (changeDocument.operationType == 'rename') { - const relFrom = getMongoRelation(changeDocument.ns); - const relTo = getMongoRelation(changeDocument.to); - const tableFrom = await substream.getRelation(batch, relFrom, { - // We're "dropping" this collection, so never snapshot it. - snapshot: false - }); - if (tableFrom.syncAny) { - await batch.drop([tableFrom]); - substream.relationCache.delete(relFrom); + const flushResult = await this.writeChange(writers, table, changeDocument); + changesSinceLastCheckpoint += 1; + if (flushResult != null && changesSinceLastCheckpoint >= 20_000) { + // When we are catching up replication after an initial snapshot, there may be a very long delay + // before we do a commit(). In that case, we need to periodically persist the resume LSN, so + // we don't restart from scratch if we restart replication. + // The same could apply if we need to catch up on replication after some downtime. + const { comparable: lsn } = new MongoLSN({ + timestamp: changeDocument.clusterTime!, + resume_token: changeDocument._id + }); + this.logger.info(`Updating resume LSN to ${lsn} after ${changesSinceLastCheckpoint} changes`); + await writers.setAllResumeLsn(lsn); + changesSinceLastCheckpoint = 0; } - // Here we do need to snapshot the new table - const collection = await substream.getCollectionInfo(relTo.schema, relTo.name); - await substream.handleRelation(batch, relTo, { - // This is a new (renamed) collection, so always snapshot it. - snapshot: true, - collectionInfo: collection - }); } + } else if (changeDocument.operationType == 'drop') { + const rel = getMongoRelation(changeDocument.ns); + await this.drop(writers, rel); + } else if (changeDocument.operationType == 'rename') { + const relFrom = getMongoRelation(changeDocument.ns); + const relTo = getMongoRelation(changeDocument.to); + await this.drop(writers, relFrom); + + // Here we do need to snapshot the new table + const collection = await this.getCollectionInfo(relTo.schema, relTo.name); + await this.handleRelations(writers, relTo, { + // This is a new (renamed) collection, so always snapshot it. + snapshot: true, + collectionInfo: collection + }); } } throw new ReplicationAbortedError(`Replication stream aborted`, this.abortSignal.reason); } + private constructAfterRecord(writer: storage.BucketDataWriter, document: mongo.Document): SqliteRow { + const inputRow = constructAfterRecord(document); + return writer.rowProcessor.applyRowContext(inputRow); + } + + async writeChange( + writer: storage.BucketDataWriter, + table: storage.SourceTable, + change: mongo.ChangeStreamDocument + ): Promise { + if (!table.syncAny) { + this.logger.debug(`Collection ${table.qualifiedName} not used in sync rules - skipping`); + return null; + } + + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); + if (change.operationType == 'insert') { + const baseRecord = this.constructAfterRecord(writer, change.fullDocument); + return await writer.save({ + tag: SaveOperationTag.INSERT, + sourceTable: table, + before: undefined, + beforeReplicaId: undefined, + after: baseRecord, + afterReplicaId: change.documentKey._id + }); + } else if (change.operationType == 'update' || change.operationType == 'replace') { + if (change.fullDocument == null) { + // Treat as delete + return await writer.save({ + tag: SaveOperationTag.DELETE, + sourceTable: table, + before: undefined, + beforeReplicaId: change.documentKey._id + }); + } + const after = this.constructAfterRecord(writer, change.fullDocument!); + return await writer.save({ + tag: SaveOperationTag.UPDATE, + sourceTable: table, + before: undefined, + beforeReplicaId: undefined, + after: after, + afterReplicaId: change.documentKey._id + }); + } else if (change.operationType == 'delete') { + return await writer.save({ + tag: SaveOperationTag.DELETE, + sourceTable: table, + before: undefined, + beforeReplicaId: change.documentKey._id + }); + } else { + throw new ReplicationAssertionError(`Unsupported operation: ${change.operationType}`); + } + } + async getReplicationLagMillis(): Promise { if (this.oldestUncommittedChange == null) { if (this.isStartingReplication) { diff --git a/modules/module-mongodb/src/replication/MongoRelation.ts b/modules/module-mongodb/src/replication/MongoRelation.ts index 7ca0e51b8..807b295ce 100644 --- a/modules/module-mongodb/src/replication/MongoRelation.ts +++ b/modules/module-mongodb/src/replication/MongoRelation.ts @@ -30,10 +30,7 @@ export function getMongoRelation(source: mongo.ChangeStreamNameSpace): storage.S /** * For in-memory cache only. */ -export function getCacheIdentifier(source: storage.SourceEntityDescriptor | storage.SourceTable): string { - if (source instanceof storage.SourceTable) { - return `${source.schema}.${source.name}`; - } +export function getCacheIdentifier(source: storage.SourceEntityDescriptor): string { return `${source.schema}.${source.name}`; } diff --git a/modules/module-mongodb/src/replication/MongoSnapshotter.ts b/modules/module-mongodb/src/replication/MongoSnapshotter.ts index 19b8943b6..948cfc26f 100644 --- a/modules/module-mongodb/src/replication/MongoSnapshotter.ts +++ b/modules/module-mongodb/src/replication/MongoSnapshotter.ts @@ -153,57 +153,54 @@ export class MongoSnapshotter { const sourceTables = this.sync_rules.getSourceTables(); await this.client.connect(); - await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: MongoLSN.ZERO.comparable, - defaultSchema: this.defaultDb.databaseName, - storeCurrentData: false, - skipExistingRows: true - }, - async (batch) => { - if (snapshotLsn == null) { - // First replication attempt - get a snapshot and store the timestamp - snapshotLsn = await this.getSnapshotLsn(); - await batch.setResumeLsn(snapshotLsn); - this.logger.info(`Marking snapshot at ${snapshotLsn}`); - } else { - this.logger.info(`Resuming snapshot at ${snapshotLsn}`); - // Check that the snapshot is still valid. - await this.validateSnapshotLsn(snapshotLsn); - } + await using batch = await this.storage.createWriter({ + logger: this.logger, + zeroLSN: MongoLSN.ZERO.comparable, + defaultSchema: this.defaultDb.databaseName, + storeCurrentData: false, + skipExistingRows: true + }); - // Start by resolving all tables. - // This checks postImage configuration, and that should fail as - // early as possible. - let allSourceTables: SourceTable[] = []; - for (let tablePattern of sourceTables) { - const tables = await this.resolveQualifiedTableNames(batch, tablePattern); - allSourceTables.push(...tables); - } + if (snapshotLsn == null) { + // First replication attempt - get a snapshot and store the timestamp + snapshotLsn = await this.getSnapshotLsn(); + await batch.setResumeLsn(snapshotLsn); + this.logger.info(`Marking snapshot at ${snapshotLsn}`); + } else { + this.logger.info(`Resuming snapshot at ${snapshotLsn}`); + // Check that the snapshot is still valid. + await this.validateSnapshotLsn(snapshotLsn); + } - let tablesWithStatus: SourceTable[] = []; - for (let table of allSourceTables) { - if (table.snapshotComplete) { - this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); - continue; - } - const count = await this.estimatedCountNumber(table); - const updated = await batch.updateTableProgress(table, { - totalEstimatedCount: count - }); - tablesWithStatus.push(updated); - this.relationCache.update(updated); - this.logger.info( - `To replicate: ${updated.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}` - ); - } + // Start by resolving all tables. + // This checks postImage configuration, and that should fail as + // early as possible. + let allSourceTables: SourceTable[] = []; + for (let tablePattern of sourceTables) { + const tables = await this.resolveQualifiedTableNames(batch, tablePattern); + allSourceTables.push(...tables); + } - for (let table of tablesWithStatus) { - this.queue.add(table); - } + let tablesWithStatus: SourceTable[] = []; + for (let table of allSourceTables) { + if (table.snapshotComplete) { + this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); + continue; } - ); + const count = await this.estimatedCountNumber(table); + const updated = await batch.updateTableProgress(table, { + totalEstimatedCount: count + }); + tablesWithStatus.push(updated); + this.relationCache.update(updated); + this.logger.info( + `To replicate: ${updated.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}` + ); + } + + for (let table of tablesWithStatus) { + this.queue.add(table); + } } async waitForInitialSnapshot() { diff --git a/packages/service-core/src/storage/BucketStorageBatch.ts b/packages/service-core/src/storage/BucketStorageBatch.ts index ce08afb58..a6f78c1af 100644 --- a/packages/service-core/src/storage/BucketStorageBatch.ts +++ b/packages/service-core/src/storage/BucketStorageBatch.ts @@ -1,9 +1,15 @@ import { ObserverClient } from '@powersync/lib-services-framework'; -import { EvaluatedParameters, EvaluatedRow, SqliteRow, ToastableSqliteRow } from '@powersync/service-sync-rules'; +import { + EvaluatedParameters, + EvaluatedRow, + RowProcessor, + SqliteRow, + ToastableSqliteRow +} from '@powersync/service-sync-rules'; import { BSON } from 'bson'; import { ReplicationEventPayload } from './ReplicationEventPayload.js'; import { SourceTable, TableSnapshotStatus } from './SourceTable.js'; -import { BatchedCustomWriteCheckpointOptions } from './storage-index.js'; +import { BatchedCustomWriteCheckpointOptions, ResolveTableOptions, ResolveTableResult } from './storage-index.js'; import { InternalOpId } from '../util/utils.js'; export const DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS: ResolvedBucketBatchCommitOptions = { @@ -11,7 +17,22 @@ export const DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS: ResolvedBucketBatchCommitOptio oldestUncommittedChange: null }; -export interface BucketDataWriter { +export interface BucketDataWriter extends BucketDataWriterBase, AsyncDisposable { + readonly rowProcessor: RowProcessor; + + keepaliveAll(lsn: string): Promise; + commitAll(lsn: string, options?: BucketBatchCommitOptions): Promise; + setAllResumeLsn(lsn: string): Promise; + + /** + * Resolve a table, keeping track of it internally. + */ + resolveTable(options: ResolveTableOptions): Promise; +} + +export interface BucketDataWriterBase { + readonly resumeFromLsn: string | null; + /** * Save an op, and potentially flush. * @@ -45,7 +66,7 @@ export interface BucketDataWriter { export interface BucketStorageBatch extends ObserverClient, AsyncDisposable, - BucketDataWriter { + BucketDataWriterBase { /** * Alias for [Symbol.asyncDispose] */ diff --git a/packages/service-core/src/storage/BucketStorageFactory.ts b/packages/service-core/src/storage/BucketStorageFactory.ts index 87299fc05..c2f92cdf8 100644 --- a/packages/service-core/src/storage/BucketStorageFactory.ts +++ b/packages/service-core/src/storage/BucketStorageFactory.ts @@ -4,6 +4,7 @@ import { ReplicationEventPayload } from './ReplicationEventPayload.js'; import { ReplicationLock } from './ReplicationLock.js'; import { SyncRulesBucketStorage } from './SyncRulesBucketStorage.js'; import { ReportStorage } from './ReportStorage.js'; +import { BucketDataWriter } from './BucketStorageBatch.js'; /** * Represents a configured storage provider. @@ -26,6 +27,8 @@ export interface BucketStorageFactory extends ObserverClient; + /** * Deploy new sync rules. */ diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index 027260128..cc190c62f 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -166,16 +166,13 @@ export interface SyncRuleStatus { snapshot_lsn: string | null; } export interface ResolveTableOptions { - group_id: number; connection_id: number; connection_tag: string; entity_descriptor: SourceEntityDescriptor; - - sync_rules: HydratedSyncRules; } export interface ResolveTableResult { - table: SourceTable; + tables: SourceTable[]; dropTables: SourceTable[]; } diff --git a/packages/sync-rules/src/HydratedSyncRules.ts b/packages/sync-rules/src/HydratedSyncRules.ts index 4473a5bb1..55790c039 100644 --- a/packages/sync-rules/src/HydratedSyncRules.ts +++ b/packages/sync-rules/src/HydratedSyncRules.ts @@ -36,6 +36,10 @@ export interface RowProcessor { tableSyncsData(table: SourceTableInterface): boolean; tableSyncsParameters(table: SourceTableInterface): boolean; + getMatchingSources(table: SourceTableInterface): { + bucketDataSources: BucketDataSource[]; + parameterIndexLookupCreators: ParameterIndexLookupCreator[]; + }; applyRowContext( source: SqliteRow From 872d3a9832aa91ac586e26ad752022c99b73d01d Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 14 Jan 2026 12:29:14 +0200 Subject: [PATCH 029/101] Initial working through errors. --- .../src/storage/MongoBucketStorage.ts | 45 +++++- .../implementation/BucketDefinitionMapping.ts | 13 +- .../storage/implementation/MergedSyncRules.ts | 38 ++++- .../implementation/MongoBucketBatch.ts | 24 +-- .../implementation/MongoSyncBucketStorage.ts | 137 +----------------- .../src/replication/ChangeStream.ts | 32 ++-- .../src/storage/BucketStorageFactory.ts | 4 +- .../src/storage/SyncRulesBucketStorage.ts | 5 - 8 files changed, 126 insertions(+), 172 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts index 8e5d0b95b..55728d987 100644 --- a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts @@ -1,6 +1,6 @@ import { SqlSyncRules } from '@powersync/service-sync-rules'; -import { GetIntanceOptions, storage } from '@powersync/service-core'; +import { GetIntanceOptions, maxLsn, StartBatchOptions, storage } from '@powersync/service-core'; import { BaseObserver, ErrorCode, logger, ServiceError } from '@powersync/lib-services-framework'; import { v4 as uuid } from 'uuid'; @@ -14,6 +14,8 @@ import { MongoPersistedSyncRulesContent } from './implementation/MongoPersistedS import { MongoSyncBucketStorage, MongoSyncBucketStorageOptions } from './implementation/MongoSyncBucketStorage.js'; import { generateSlotName } from '../utils/util.js'; import { BucketDefinitionMapping } from './implementation/BucketDefinitionMapping.js'; +import { MongoBucketDataWriter } from './storage-index.js'; +import { MergedSyncRules } from './implementation/MergedSyncRules.js'; export class MongoBucketStorage extends BaseObserver @@ -72,6 +74,47 @@ export class MongoBucketStorage return storage; } + async createCombinedWriter( + storages: storage.SyncRulesBucketStorage[], + options: StartBatchOptions + ): Promise { + const mongoStorages = storages as MongoSyncBucketStorage[]; + const mappings = mongoStorages.map((s) => s.sync_rules.mapping); + const mergedMappings = BucketDefinitionMapping.merged(mappings); + const mergedProcessor = MergedSyncRules.merge(mongoStorages.map((s) => s.getParsedSyncRules(options))); + + const writer = new MongoBucketDataWriter({ + db: this.db, + mapping: mergedMappings, + markRecordUnavailable: options.markRecordUnavailable, + rowProcessor: mergedProcessor, + skipExistingRows: options.skipExistingRows ?? false, + slotName: '', + storeCurrentData: options.storeCurrentData + }); + + for (let storage of mongoStorages) { + const doc = await this.db.sync_rules.findOne( + { + _id: storage.group_id + }, + { projection: { last_checkpoint_lsn: 1, no_checkpoint_before: 1, keepalive_op: 1, snapshot_lsn: 1 } } + ); + const checkpoint_lsn = doc?.last_checkpoint_lsn ?? null; + const parsedSyncRules = storage.getParsedSyncRules(options); + const batch = writer.forSyncRules({ + syncRules: parsedSyncRules, + + lastCheckpointLsn: checkpoint_lsn, + resumeFromLsn: maxLsn(checkpoint_lsn, doc?.snapshot_lsn), + keepaliveOp: doc?.keepalive_op ? BigInt(doc.keepalive_op) : null + }); + storage.iterateListeners((cb) => cb.batchStarted?.(batch)); + } + + return writer; + } + async getSystemIdentifier(): Promise { const { setName: id } = await this.db.db.command({ hello: 1 diff --git a/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts b/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts index f69be6442..a89e72708 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts @@ -7,8 +7,12 @@ export class BucketDefinitionMapping { return new BucketDefinitionMapping(doc.rule_mapping.definitions, doc.rule_mapping.parameter_lookups); } + static merged(mappings: BucketDefinitionMapping[]): BucketDefinitionMapping { + return mappings.reduce((acc, curr) => acc.mergeWith(curr), new BucketDefinitionMapping()); + } + constructor( - private definitions: Record, + private definitions: Record = {}, private parameterLookupMapping: Record = {} ) {} @@ -39,4 +43,11 @@ export class BucketDefinitionMapping { const key = `${source.defaultLookupScope.lookupName}#${source.defaultLookupScope.queryId}`; return this.parameterLookupMapping[key] ?? null; } + + mergeWith(other: BucketDefinitionMapping): BucketDefinitionMapping { + return new BucketDefinitionMapping( + { ...this.definitions, ...other.definitions }, + { ...this.parameterLookupMapping, ...other.parameterLookupMapping } + ); + } } diff --git a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts index aa4ebf6b0..f29a08b19 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts @@ -1,4 +1,5 @@ import { + BucketDataSource, buildBucketInfo, CompatibilityContext, EvaluatedParameters, @@ -6,10 +7,9 @@ import { EvaluateRowOptions, EvaluationError, EvaluationResult, - HydrationState, isEvaluatedRow, isEvaluationError, - mergeDataSources, + ParameterIndexLookupCreator, RowProcessor, SOURCE, SourceTableInterface, @@ -23,9 +23,13 @@ import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; type EvaluateRowFn = (options: EvaluateRowOptions) => EvaluationResult[]; +interface ResolvedDataSource { + source: BucketDataSource; + evaluate: EvaluateRowFn; +} export class MergedSyncRules implements RowProcessor { static merge(sources: MongoPersistedSyncRules[]): MergedSyncRules { - let evaluators = new Map(); + let resolvedDataSources = new Map(); for (let source of sources) { const syncRules = source.sync_rules; @@ -35,7 +39,7 @@ export class MergedSyncRules implements RowProcessor { for (let source of dataSources) { const scope = hydrationState.getBucketSourceScope(source); const id = mapping.bucketSourceId(source); - if (evaluators.has(id)) { + if (resolvedDataSources.has(id)) { continue; } @@ -54,14 +58,30 @@ export class MergedSyncRules implements RowProcessor { } satisfies EvaluatedRow; }); }; - evaluators.set(id, evaluate); + resolvedDataSources.set(id, { source, evaluate }); } } - return new MergedSyncRules(Array.from(evaluators.values())); + return new MergedSyncRules(resolvedDataSources); + } + + constructor(private resolvedDataSources: Map) {} + + getMatchingSources(table: SourceTableInterface): { + bucketDataSources: BucketDataSource[]; + parameterIndexLookupCreators: ParameterIndexLookupCreator[]; + } { + const bucketDataSources = [...this.resolvedDataSources.values()] + .map((dataSource) => dataSource.source) + .filter((ds) => ds.tableSyncsData(table)); + return { + bucketDataSources, + parameterIndexLookupCreators: [ + //FIXME: implement + ] + }; } - constructor(private evaluators: EvaluateRowFn[]) {} eventDescriptors: SqlEventDescriptor[] = []; compatibility: CompatibilityContext = CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY; @@ -95,7 +115,9 @@ export class MergedSyncRules implements RowProcessor { } evaluateRowWithErrors(options: EvaluateRowOptions): { results: EvaluatedRow[]; errors: EvaluationError[] } { - const rawResults: EvaluationResult[] = this.evaluators.flatMap((evaluator) => evaluator(options)); + const rawResults: EvaluationResult[] = Object.values(this.resolvedDataSources).flatMap((dataSource) => + dataSource.evaluate(options) + ); const results = rawResults.filter(isEvaluatedRow) as EvaluatedRow[]; const errors = rawResults.filter(isEvaluationError) as EvaluationError[]; diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 24e1fe545..4c6b85285 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -92,7 +92,7 @@ export interface ForSyncRulesOptions { export class MongoBucketDataWriter implements storage.BucketDataWriter { private batch: OperationBatch | null = null; - private readonly rowProcessor: RowProcessor; + public readonly rowProcessor: RowProcessor; write_checkpoint_batch: storage.CustomWriteCheckpointOptions[] = []; private readonly client: mongo.MongoClient; @@ -106,7 +106,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { private readonly mapping: BucketDefinitionMapping; private markRecordUnavailable: BucketStorageMarkRecordUnavailable | undefined; - private batches: MongoBucketBatch[] = []; + public subWriters: MongoBucketBatch[] = []; constructor(options: MongoWriterOptions) { this.db = options.db; @@ -131,25 +131,25 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { logger: this.logger, writer: this }); - this.batches.push(batch); + this.subWriters.push(batch); return batch; } async [Symbol.asyncDispose](): Promise { await this.session.endSession(); - for (let batch of this.batches) { + for (let batch of this.subWriters) { await batch[Symbol.asyncDispose](); } } get resumeFromLsn(): string | null { // FIXME: check the logic here when there are multiple batches - return this.batches[0]?.resumeFromLsn ?? null; + return this.subWriters[0]?.resumeFromLsn ?? null; } async keepaliveAll(lsn: string): Promise { let didAny = false; - for (let batch of this.batches) { + for (let batch of this.subWriters) { const didBatchKeepalive = await batch.keepalive(lsn); didAny ||= didBatchKeepalive; } @@ -158,7 +158,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { async commitAll(lsn: string, options?: storage.BucketBatchCommitOptions): Promise { let didCommit = false; - for (let batch of this.batches) { + for (let batch of this.subWriters) { const didWriterCommit = await batch.commit(lsn, options); didCommit ||= didWriterCommit; } @@ -166,7 +166,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { } async setAllResumeLsn(lsn: string): Promise { - for (let batch of this.batches) { + for (let batch of this.subWriters) { await batch.setResumeLsn(lsn); } } @@ -349,7 +349,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { throw new ReplicationAssertionError('Unexpected last_op == null'); } - for (let batch of this.batches) { + for (let batch of this.subWriters) { batch.persisted_op = last_op; batch.last_flushed_op = last_op; } @@ -479,7 +479,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { } if (didFlush) { - for (let batch of this.batches) { + for (let batch of this.subWriters) { await batch.clearError(); } } @@ -836,7 +836,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { async save(record: storage.SaveOptions): Promise { const { after, before, sourceTable, tag } = record; for (const event of this.getTableEvents(sourceTable)) { - for (let batch of this.batches) { + for (let batch of this.subWriters) { batch.iterateListeners((cb) => cb.replicationEvent?.({ batch: batch, @@ -897,7 +897,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { } if (last_op) { - for (let batch of this.batches) { + for (let batch of this.subWriters) { batch.persisted_op = last_op; } return { diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index ec5151f31..c3aefe6e2 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -48,6 +48,7 @@ import { MongoParameterCompactor } from './MongoParameterCompactor.js'; import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js'; import { MongoPersistedSyncRulesContent } from './MongoPersistedSyncRulesContent.js'; import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; +import { MongoPersistedSyncRules } from '../storage-index.js'; export interface MongoSyncBucketStorageOptions { checksumOptions?: MongoChecksumOptions; @@ -72,15 +73,15 @@ export class MongoSyncBucketStorage readonly checksums: MongoChecksums; private parsedSyncRulesCache: - | { parsed: PersistedSyncRules; hydrated: HydratedSyncRules; options: storage.ParseSyncRulesOptions } + | { parsed: MongoPersistedSyncRules; hydrated: HydratedSyncRules; options: storage.ParseSyncRulesOptions } | undefined; private writeCheckpointAPI: MongoWriteCheckpointAPI; - private mapping: BucketDefinitionMapping; + private readonly mapping: BucketDefinitionMapping; constructor( public readonly factory: MongoBucketStorage, public readonly group_id: number, - private readonly sync_rules: MongoPersistedSyncRulesContent, + public readonly sync_rules: MongoPersistedSyncRulesContent, public readonly slot_name: string, writeCheckpointMode?: storage.WriteCheckpointMode, options?: MongoSyncBucketStorageOptions @@ -115,7 +116,7 @@ export class MongoSyncBucketStorage }); } - getParsedSyncRules(options: storage.ParseSyncRulesOptions): PersistedSyncRules { + getParsedSyncRules(options: storage.ParseSyncRulesOptions): MongoPersistedSyncRules { this.getHydratedSyncRules(options); return this.parsedSyncRulesCache!.parsed; } @@ -222,134 +223,6 @@ export class MongoSyncBucketStorage } } - async resolveTable(options: storage.ResolveTableOptions): Promise { - const { connection_id, connection_tag, entity_descriptor, bucket_data_source_ids, parameter_lookup_source_ids } = - options; - - const { schema, name, objectId, replicaIdColumns } = entity_descriptor; - - const normalizedReplicaIdColumns = replicaIdColumns.map((column) => ({ - name: column.name, - type: column.type, - type_oid: column.typeId - })); - let result: storage.ResolveTableResult | null = null; - await this.db.client.withSession(async (session) => { - const col = this.db.source_tables; - let filter: mongo.Filter = { - connection_id: connection_id, - schema_name: schema, - table_name: name, - replica_id_columns2: normalizedReplicaIdColumns - }; - if (objectId != null) { - filter.relation_id = objectId; - } - let docs = await col.find(filter, { session }).toArray(); - let matchingDocs: SourceTableDocument[] = []; - - let coveredBucketDataSourceIds = new Set(); - let coveredParameterLookupSourceIds = new Set(); - - for (let doc of docs) { - const matchingBucketDataSourceIds = doc.bucket_data_source_ids.filter((id) => - bucket_data_source_ids.includes(id) - ); - const matchingParameterLookupSourceIds = doc.parameter_lookup_source_ids.filter((id) => - parameter_lookup_source_ids.includes(id) - ); - if (matchingBucketDataSourceIds.length == 0 && matchingParameterLookupSourceIds.length == 0) { - // Not relevant - continue; - } - matchingDocs.push(doc); - } - - const pendingBucketDataSourceIds = bucket_data_source_ids.filter((id) => !coveredBucketDataSourceIds.has(id)); - const pendingParameterLookupSourceIds = parameter_lookup_source_ids.filter( - (id) => !coveredParameterLookupSourceIds.has(id) - ); - if (pendingBucketDataSourceIds.length > 0 || pendingParameterLookupSourceIds.length > 0) { - const doc: SourceTableDocument = { - _id: new bson.ObjectId(), - connection_id: connection_id, - relation_id: objectId, - schema_name: schema, - table_name: name, - replica_id_columns: null, - replica_id_columns2: normalizedReplicaIdColumns, - snapshot_done: false, - snapshot_status: undefined, - bucket_data_source_ids: bucket_data_source_ids, - parameter_lookup_source_ids: parameter_lookup_source_ids - }; - - await col.insertOne(doc, { session }); - docs.push(doc); - } - - const sourceTables = docs.map((doc) => { - const sourceTable = new storage.SourceTable({ - id: doc._id, - connectionTag: connection_tag, - objectId: objectId, - schema: schema, - name: name, - replicaIdColumns: replicaIdColumns, - snapshotComplete: doc.snapshot_done ?? true - }); - sourceTable.snapshotStatus = - doc.snapshot_status == null - ? undefined - : { - lastKey: doc.snapshot_status.last_key?.buffer ?? null, - totalEstimatedCount: doc.snapshot_status.total_estimated_count, - replicatedCount: doc.snapshot_status.replicated_count - }; - return sourceTable; - }); - - // FIXME: dropTables - // let dropTables: storage.SourceTable[] = []; - // // Detect tables that are either renamed, or have different replica_id_columns - // let truncateFilter = [{ schema_name: schema, table_name: name }] as any[]; - // if (objectId != null) { - // // Only detect renames if the source uses relation ids. - // truncateFilter.push({ relation_id: objectId }); - // } - // const truncate = await col - // .find( - // { - // group_id: group_id, - // connection_id: connection_id, - // _id: { $ne: doc._id }, - // $or: truncateFilter - // }, - // { session } - // ) - // .toArray(); - // dropTables = truncate.map( - // (doc) => - // new storage.SourceTable({ - // id: doc._id, - // connectionTag: connection_tag, - // objectId: doc.relation_id, - // schema: doc.schema_name, - // name: doc.table_name, - // replicaIdColumns: - // doc.replica_id_columns2?.map((c) => ({ name: c.name, typeOid: c.type_oid, type: c.type })) ?? [], - // snapshotComplete: doc.snapshot_done ?? true - // }) - // ); - - result = { - tables: sourceTables, - dropTables: [] - }; - }); - return result!; - } - async getParameterSets( checkpoint: MongoReplicationCheckpoint, lookups: ScopedParameterLookup[] diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 118335c55..83a742c56 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -18,7 +18,7 @@ import { } from '@powersync/service-core'; import { DatabaseInputRow, HydratedSyncRules, SqliteInputRow, SqliteRow } from '@powersync/service-sync-rules'; import { ReplicationMetric } from '@powersync/service-types'; -import { MongoLSN } from '../common/MongoLSN.js'; +import { MongoLSN, ZERO_LSN } from '../common/MongoLSN.js'; import { PostImagesOption } from '../types/types.js'; import { escapeRegExp } from '../utils.js'; import { ChangeStreamInvalidatedError, mapChangeStreamError } from './ChangeStreamErrors.js'; @@ -509,11 +509,21 @@ export class ChangeStream { } private async streamChangesInternal() { - await using writers = await this.factory.createCombinedWriter(this.substreams.map((s) => s.storage)); + await using writer = await this.factory.createCombinedWriter( + this.substreams.map((s) => s.storage), + { + defaultSchema: this.defaultDb.databaseName, + storeCurrentData: false, + zeroLSN: ZERO_LSN, + logger: this.logger, + markRecordUnavailable: undefined, + skipExistingRows: false + } + ); // FIXME: Proper resumeFromLsn implementation for multiple writers // We should probably use the active sync rules for this, or alternatively the minimum from the writers. - const resumeFromLsn = writers.resumeFromLsn; + const resumeFromLsn = writer.resumeFromLsn; if (resumeFromLsn == null) { throw new ReplicationAssertionError(`No LSN found to resume from`); } @@ -579,7 +589,7 @@ export class ChangeStream { // doing a keepalive in the middle of a transaction. if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) { const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken); - await writers.keepaliveAll(lsn); + await writer.keepaliveAll(lsn); this.touch(); lastEmptyResume = performance.now(); // Log the token update. This helps as a general "replication is still active" message in the logs. @@ -709,7 +719,7 @@ export class ChangeStream { if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) { waitForCheckpointLsn = null; } - const didCommit = await writers.commitAll(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); + const didCommit = await writer.commitAll(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); if (didCommit) { // TODO: Re-check this logic @@ -731,7 +741,7 @@ export class ChangeStream { waitForCheckpointLsn = await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId); } const rel = getMongoRelation(changeDocument.ns); - const tables = await this.getRelations(writers, rel, { + const tables = await this.getRelations(writer, rel, { // In most cases, we should not need to snapshot this. But if this is the first time we see the collection // for whatever reason, then we do need to snapshot it. // This may result in some duplicate operations when a collection is created for the first time after @@ -744,7 +754,7 @@ export class ChangeStream { if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) { this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime); } - const flushResult = await this.writeChange(writers, table, changeDocument); + const flushResult = await this.writeChange(writer, table, changeDocument); changesSinceLastCheckpoint += 1; if (flushResult != null && changesSinceLastCheckpoint >= 20_000) { // When we are catching up replication after an initial snapshot, there may be a very long delay @@ -756,21 +766,21 @@ export class ChangeStream { resume_token: changeDocument._id }); this.logger.info(`Updating resume LSN to ${lsn} after ${changesSinceLastCheckpoint} changes`); - await writers.setAllResumeLsn(lsn); + await writer.setAllResumeLsn(lsn); changesSinceLastCheckpoint = 0; } } } else if (changeDocument.operationType == 'drop') { const rel = getMongoRelation(changeDocument.ns); - await this.drop(writers, rel); + await this.drop(writer, rel); } else if (changeDocument.operationType == 'rename') { const relFrom = getMongoRelation(changeDocument.ns); const relTo = getMongoRelation(changeDocument.to); - await this.drop(writers, relFrom); + await this.drop(writer, relFrom); // Here we do need to snapshot the new table const collection = await this.getCollectionInfo(relTo.schema, relTo.name); - await this.handleRelations(writers, relTo, { + await this.handleRelations(writer, relTo, { // This is a new (renamed) collection, so always snapshot it. snapshot: true, collectionInfo: collection diff --git a/packages/service-core/src/storage/BucketStorageFactory.ts b/packages/service-core/src/storage/BucketStorageFactory.ts index c2f92cdf8..500466d33 100644 --- a/packages/service-core/src/storage/BucketStorageFactory.ts +++ b/packages/service-core/src/storage/BucketStorageFactory.ts @@ -2,7 +2,7 @@ import { ObserverClient } from '@powersync/lib-services-framework'; import { ParseSyncRulesOptions, PersistedSyncRules, PersistedSyncRulesContent } from './PersistedSyncRulesContent.js'; import { ReplicationEventPayload } from './ReplicationEventPayload.js'; import { ReplicationLock } from './ReplicationLock.js'; -import { SyncRulesBucketStorage } from './SyncRulesBucketStorage.js'; +import { StartBatchOptions, SyncRulesBucketStorage } from './SyncRulesBucketStorage.js'; import { ReportStorage } from './ReportStorage.js'; import { BucketDataWriter } from './BucketStorageBatch.js'; @@ -27,7 +27,7 @@ export interface BucketStorageFactory extends ObserverClient; + createCombinedWriter(storage: SyncRulesBucketStorage[], options: StartBatchOptions): Promise; /** * Deploy new sync rules. diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index cc190c62f..67ac5e9eb 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -24,11 +24,6 @@ export interface SyncRulesBucketStorage readonly factory: BucketStorageFactory; - /** - * Resolve a table, keeping track of it internally. - */ - resolveTable(options: ResolveTableOptions): Promise; - /** * Use this to get access to update storage data. * From d816b6bf118411477084589c304a27cb47b14808 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 14 Jan 2026 13:52:55 +0200 Subject: [PATCH 030/101] Restructure snapshotter. --- .../implementation/BucketDefinitionMapping.ts | 8 + .../implementation/MongoBucketBatch.ts | 197 ++++++++---- .../implementation/MongoSyncBucketStorage.ts | 31 +- .../src/replication/ChangeStream.ts | 151 +++++---- .../src/replication/MongoSnapshotter.ts | 297 ++++++------------ .../src/storage/BucketStorageBatch.ts | 15 +- 6 files changed, 348 insertions(+), 351 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts b/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts index a89e72708..4af2dd666 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts @@ -16,6 +16,14 @@ export class BucketDefinitionMapping { private parameterLookupMapping: Record = {} ) {} + hasBucketSourceId(id: number) { + return Object.values(this.definitions).includes(id); + } + + hasParameterLookupId(id: number) { + return Object.values(this.parameterLookupMapping).includes(id); + } + bucketSourceId(source: BucketDataSource): number { const defId = this.definitions[source.uniqueName]; if (defId == null) { diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 4c6b85285..66531a45d 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -1,7 +1,7 @@ import { mongo } from '@powersync/lib-service-mongodb'; import { - HydratedSyncRules, RowProcessor, + SourceTableInterface, SqlEventDescriptor, SqliteRow, SqliteValue @@ -23,8 +23,9 @@ import { deserializeBson, InternalOpId, isCompleteRow, - PersistedSyncRulesContent, SaveOperationTag, + SourceTable, + SourceTableId, storage, SyncRuleState, utils @@ -171,6 +172,127 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { } } + private findMatchingSubWriters(tables: SourceTableDocument[]) { + return this.subWriters.filter((subWriter) => { + return tables.some((table) => subWriter.hasTable(table)); + }); + } + + async markTableSnapshotDone(tables: storage.SourceTable[], no_checkpoint_before_lsn?: string) { + const session = this.session; + const ids = tables.map((table) => mongoTableId(table.id)); + + await this.withTransaction(async () => { + await this.db.source_tables.updateMany( + { _id: { $in: ids } }, + { + $set: { + snapshot_done: true + }, + $unset: { + snapshot_status: 1 + } + }, + { session } + ); + + const updatedTables = await this.db.source_tables.find({ _id: { $in: ids } }, { session }).toArray(); + + if (no_checkpoint_before_lsn != null) { + const affectedSubWriters = this.findMatchingSubWriters(updatedTables); + + await this.db.sync_rules.updateOne( + { + _id: { $in: affectedSubWriters.map((w) => w.group_id) } + }, + { + $set: { + last_keepalive_ts: new Date() + }, + $max: { + no_checkpoint_before: no_checkpoint_before_lsn + } + }, + { session: this.session } + ); + } + }); + return tables.map((table) => { + const copy = table.clone(); + copy.snapshotComplete = true; + return copy; + }); + } + + async markTableSnapshotRequired(table: SourceTable): Promise { + const doc = await this.db.source_tables.findOne({ _id: mongoTableId(table.id) }); + if (doc == null) { + return; + } + + const subWriters = this.findMatchingSubWriters([doc]); + + await this.db.sync_rules.updateOne( + { + _id: { $in: subWriters.map((w) => w.group_id) } + }, + { + $set: { + snapshot_done: false + } + }, + { session: this.session } + ); + } + + async markAllSnapshotDone(no_checkpoint_before_lsn: string): Promise { + await this.db.sync_rules.updateOne( + { + _id: { $in: this.subWriters.map((w) => w.group_id) }, + snapshot_done: { $ne: true } + }, + { + $set: { + snapshot_done: true, + last_keepalive_ts: new Date() + }, + $max: { + no_checkpoint_before: no_checkpoint_before_lsn + } + }, + { session: this.session } + ); + } + + async getTable(ref: SourceTable): Promise { + const doc = await this.db.source_tables.findOne({ _id: mongoTableId(ref.id) }); + if (doc == null) { + return null; + } + const sourceTable = new storage.SourceTable({ + id: doc._id, + objectId: doc.relation_id, + schema: doc.schema_name, + connectionTag: ref.connectionTag, + name: doc.table_name, + replicaIdColumns: ref.replicaIdColumns, + snapshotComplete: doc.snapshot_done ?? true + }); + sourceTable.snapshotStatus = + doc.snapshot_status == null + ? undefined + : { + lastKey: doc.snapshot_status.last_key?.buffer ?? null, + totalEstimatedCount: doc.snapshot_status.total_estimated_count, + replicatedCount: doc.snapshot_status.replicated_count + }; + + sourceTable.syncData = doc.bucket_data_source_ids.length > 0; + sourceTable.syncParameters = doc.parameter_lookup_source_ids.length > 0; + // FIXME: implement sourceTable.syncEvent + return sourceTable; + } + async resolveTable(options: storage.ResolveTableOptions): Promise { const sources = this.rowProcessor.getMatchingSources({ connectionTag: options.connection_tag, @@ -1012,11 +1134,10 @@ export class MongoBucketBatch { private logger: Logger; - private readonly client: mongo.MongoClient; public readonly db: PowerSyncMongo; public readonly session: mongo.ClientSession; - private readonly group_id: number; + public readonly group_id: number; private clearedError = false; @@ -1052,24 +1173,34 @@ export class MongoBucketBatch private readonly writer: MongoBucketDataWriter; + public readonly mapping: BucketDefinitionMapping; + constructor(options: MongoBucketBatchOptions) { super(); this.logger = options.logger ?? defaultLogger; - this.client = options.db.client; this.db = options.db; this.group_id = options.syncRules.id; this.last_checkpoint_lsn = options.lastCheckpointLsn; this.resumeFromLsn = options.resumeFromLsn; this.writer = options.writer; this.session = this.writer.session; + this.mapping = options.syncRules.mapping; this.persisted_op = options.keepaliveOp ?? null; } - updateTableProgress( + + async updateTableProgress( table: storage.SourceTable, progress: Partial ): Promise { - throw new Error('Method not implemented.'); + return await this.writer.updateTableProgress(table, progress); + } + + hasTable(sourceTable: SourceTableDocument): boolean { + return ( + sourceTable.bucket_data_source_ids.some((id) => this.mapping.hasBucketSourceId(id)) || + sourceTable.parameter_lookup_source_ids.some((id) => this.mapping.hasParameterLookupId(id)) + ); } save(record: storage.SaveOptions): Promise { @@ -1383,59 +1514,11 @@ export class MongoBucketBatch } async markTableSnapshotRequired(table: storage.SourceTable): Promise { - await this.db.sync_rules.updateOne( - { - _id: this.group_id - }, - { - $set: { - snapshot_done: false - } - }, - { session: this.session } - ); + await this.writer.markTableSnapshotRequired(table); } async markTableSnapshotDone(tables: storage.SourceTable[], no_checkpoint_before_lsn?: string) { - const session = this.session; - const ids = tables.map((table) => mongoTableId(table.id)); - - await this.writer.withTransaction(async () => { - await this.db.source_tables.updateMany( - { _id: { $in: ids } }, - { - $set: { - snapshot_done: true - }, - $unset: { - snapshot_status: 1 - } - }, - { session } - ); - - if (no_checkpoint_before_lsn != null) { - await this.db.sync_rules.updateOne( - { - _id: this.group_id - }, - { - $set: { - last_keepalive_ts: new Date() - }, - $max: { - no_checkpoint_before: no_checkpoint_before_lsn - } - }, - { session: this.session } - ); - } - }); - return tables.map((table) => { - const copy = table.clone(); - copy.snapshotComplete = true; - return copy; - }); + return this.writer.markTableSnapshotDone(tables, no_checkpoint_before_lsn); } async clearError(): Promise { diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index c3aefe6e2..497164c0e 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -177,35 +177,8 @@ export class MongoSyncBucketStorage } async createWriter(options: storage.StartBatchOptions): Promise { - const doc = await this.db.sync_rules.findOne( - { - _id: this.group_id - }, - { projection: { last_checkpoint_lsn: 1, no_checkpoint_before: 1, keepalive_op: 1, snapshot_lsn: 1 } } - ); - const checkpoint_lsn = doc?.last_checkpoint_lsn ?? null; - - const parsedSyncRules = this.sync_rules.parsed(options); - - const writer = new MongoBucketDataWriter({ - logger: options.logger, - db: this.db, - slotName: this.slot_name, - storeCurrentData: options.storeCurrentData, - skipExistingRows: options.skipExistingRows ?? false, - markRecordUnavailable: options.markRecordUnavailable, - mapping: this.mapping, - rowProcessor: parsedSyncRules.hydratedSyncRules() - }); - const batch = writer.forSyncRules({ - syncRules: parsedSyncRules, - - lastCheckpointLsn: checkpoint_lsn, - resumeFromLsn: maxLsn(checkpoint_lsn, doc?.snapshot_lsn), - keepaliveOp: doc?.keepalive_op ? BigInt(doc.keepalive_op) : null - }); - this.iterateListeners((cb) => cb.batchStarted?.(batch)); - return batch; + const writer = await this.factory.createCombinedWriter([this], options); + return writer.subWriters[0]; } async startBatch( diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 83a742c56..28e9d106f 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -62,55 +62,37 @@ interface SubStreamOptions { logger: Logger; abortSignal: AbortSignal; checkpointStreamId: mongo.ObjectId; - snapshotChunkLength?: number; - metrics: MetricsEngine; maxAwaitTimeMS: number; } +interface InitResult { + needsInitialSync: boolean; + snapshotLsn: string | null; +} + class SubStream { - private readonly connection_id = 1; private readonly connections: MongoManager; public readonly storage: storage.SyncRulesBucketStorage; public readonly syncRules: HydratedSyncRules; private readonly logger: Logger; - public readonly snapshotter: MongoSnapshotter; - private readonly client: mongo.MongoClient; - private readonly metrics: MetricsEngine; - private readonly abortSignal: AbortSignal; constructor(options: SubStreamOptions) { this.connections = options.connections; - this.client = this.connections.client; this.storage = options.storage; this.logger = options.logger; - this.metrics = options.metrics; - this.abortSignal = options.abortSignal; this.syncRules = this.storage.getHydratedSyncRules({ defaultSchema: this.connections.db.databaseName }); - this.snapshotter = new MongoSnapshotter({ - abort_signal: options.abortSignal, - checkpointStreamId: options.checkpointStreamId, - connections: this.connections, - storage: this.storage, - logger: this.logger.child({ prefix: `[powersync_${this.storage.group_id}_snapshot] ` }), - snapshotChunkLength: options.snapshotChunkLength, - metrics: options.metrics, - maxAwaitTimeMS: options.maxAwaitTimeMS - }); } - async initReplication() { - const result = await this.snapshotter.checkSlot(); - // FIXME: This should be done once, not per sub-stream - await this.snapshotter.setupCheckpointsCollection(); - if (result.needsInitialSync) { - if (result.snapshotLsn == null) { - // Snapshot LSN is not present, so we need to start replication from scratch. - await this.storage.clear({ signal: this.abortSignal }); - } - await this.snapshotter.queueSnapshotTables(result.snapshotLsn); + async checkSlot(): Promise { + const status = await this.storage.getStatus(); + if (status.snapshot_done && status.checkpoint_lsn) { + this.logger.info(`Initial replication already done`); + return { needsInitialSync: false, snapshotLsn: null }; } + + return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn }; } } @@ -151,6 +133,10 @@ export class ChangeStream { public readonly relationCache = new Map(); + private readonly snapshotter: MongoSnapshotter; + + private readonly snapshotChunkLength: number | undefined; + constructor(options: ChangeStreamOptions) { this.metrics = options.metrics; this.connections = options.connections; @@ -163,6 +149,7 @@ export class ChangeStream { this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9); this.logger = options.logger ?? defaultLogger; + this.snapshotChunkLength = options.snapshotChunkLength; this.substreams = options.streams.map((config) => { return new SubStream({ @@ -171,12 +158,35 @@ export class ChangeStream { connections: this.connections, storage: config.storage, logger: this.logger.child({ prefix: `[powersync_${config.storage.group_id}] ` }), - snapshotChunkLength: options.snapshotChunkLength, - maxAwaitTimeMS: this.maxAwaitTimeMS, - metrics: this.metrics + maxAwaitTimeMS: this.maxAwaitTimeMS }); }); + const snapshotLogger = this.logger.child({ prefix: `[powersync_snapshot] ` }); + + const snapshotter = new MongoSnapshotter({ + writer: async () => { + const writer = await this.factory.createCombinedWriter( + this.substreams.map((s) => s.storage), + { + defaultSchema: this.defaultDb.databaseName, + storeCurrentData: false, + zeroLSN: ZERO_LSN, + logger: snapshotLogger + } + ); + return writer; + }, + abort_signal: this.abortSignal, + checkpointStreamId: this.checkpointStreamId, + connections: this.connections, + logger: snapshotLogger, + snapshotChunkLength: this.snapshotChunkLength, + metrics: this.metrics, + maxAwaitTimeMS: this.maxAwaitTimeMS + }); + this.snapshotter = snapshotter; + // We wrap in our own abort controller so we can trigger abort internally. options.abort_signal.addEventListener('abort', () => { this.abortController.abort(options.abort_signal.reason); @@ -274,14 +284,51 @@ export class ChangeStream { } } + private async createSnapshotter() {} + + private async setupCheckpointsCollection() { + const collection = await this.getCollectionInfo(this.defaultDb.databaseName, CHECKPOINTS_COLLECTION); + if (collection == null) { + await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, { + changeStreamPreAndPostImages: { enabled: true } + }); + } else if (this.usePostImages && collection.options?.changeStreamPreAndPostImages?.enabled != true) { + // Drop + create requires less permissions than collMod, + // and we don't care about the data in this collection. + await this.defaultDb.dropCollection(CHECKPOINTS_COLLECTION); + await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, { + changeStreamPreAndPostImages: { enabled: true } + }); + } else { + // Clear the collection on startup, to keep it clean + // We never query this collection directly, and don't want to keep the data around. + // We only use this to get data into the oplog/changestream. + await this.defaultDb.collection(CHECKPOINTS_COLLECTION).deleteMany({}); + } + } + + private async initReplication() { + await this.setupCheckpointsCollection(); + for (let stream of this.substreams) { + const result = await stream.checkSlot(); + + if (result.needsInitialSync) { + if (result.snapshotLsn == null) { + // Snapshot LSN is not present, so we need to start replication from scratch. + await stream.storage.clear({ signal: this.abortSignal }); + } + await this.snapshotter.queueSnapshotTables(result.snapshotLsn); + } + } + } + async replicate() { let streamPromise: Promise | null = null; - let loopPromises: Promise[] = []; + let loopPromise: Promise | null = null; try { // If anything errors here, the entire replication process is halted, and // all connections automatically closed, including this one. this.initPromise = this.initReplication(); - await this.initPromise; streamPromise = this.streamChanges() .then(() => { throw new ReplicationAssertionError(`Replication stream exited unexpectedly`); @@ -295,19 +342,21 @@ export class ChangeStream { this.abortController.abort(e); throw e; }); - loopPromises = this.substreams.map((s) => - s.snapshotter - .replicationLoop() - .then(() => { - throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`); - }) - .catch(async (e) => { - await s.storage.reportError(e); - this.abortController.abort(e); - throw e; - }) - ); - const results = await Promise.allSettled([...loopPromises, streamPromise]); + loopPromise = this.snapshotter + .replicationLoop() + .then(() => { + throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`); + }) + .catch(async (e) => { + // Report stream errors to all substreams for now - we can't yet distinguish the errors + for (let substream of this.substreams) { + await substream.storage.reportError(e); + } + + this.abortController.abort(e); + throw e; + }); + const results = await Promise.allSettled([loopPromise, streamPromise]); // First, prioritize non-aborted errors for (let result of results) { if (result.status == 'rejected' && !(result.reason instanceof ReplicationAbortedError)) { @@ -337,11 +386,7 @@ export class ChangeStream { throw new ReplicationAssertionError('replicate() must be called before waitForInitialSnapshot()'); } await this.initPromise; - await Promise.all(this.substreams.map((s) => s.snapshotter.waitForInitialSnapshot())); - } - - private async initReplication() { - await Promise.all(this.substreams.map((substream) => substream.initReplication())); + await this.snapshotter?.waitForInitialSnapshot(); } private async streamChanges() { diff --git a/modules/module-mongodb/src/replication/MongoSnapshotter.ts b/modules/module-mongodb/src/replication/MongoSnapshotter.ts index 948cfc26f..3b5992d1d 100644 --- a/modules/module-mongodb/src/replication/MongoSnapshotter.ts +++ b/modules/module-mongodb/src/replication/MongoSnapshotter.ts @@ -1,49 +1,29 @@ import { mongo } from '@powersync/lib-service-mongodb'; import { container, - ErrorCode, logger as defaultLogger, + ErrorCode, Logger, ReplicationAbortedError, ServiceError } from '@powersync/lib-services-framework'; -import { - MetricsEngine, - RelationCache, - SaveOperationTag, - SourceEntityDescriptor, - SourceTable, - InternalOpId, - storage -} from '@powersync/service-core'; -import { - DatabaseInputRow, - SqliteInputRow, - SqliteRow, - HydratedSyncRules, - TablePattern -} from '@powersync/service-sync-rules'; +import { InternalOpId, MetricsEngine, SaveOperationTag, SourceTable, storage } from '@powersync/service-core'; +import { DatabaseInputRow, RowProcessor, SqliteInputRow, SqliteRow, TablePattern } from '@powersync/service-sync-rules'; import { ReplicationMetric } from '@powersync/service-types'; import * as timers from 'node:timers/promises'; import pDefer from 'p-defer'; import { MongoLSN } from '../common/MongoLSN.js'; import { PostImagesOption } from '../types/types.js'; import { escapeRegExp } from '../utils.js'; -import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js'; -import { - constructAfterRecord, - createCheckpoint, - getCacheIdentifier, - getMongoRelation, - STANDALONE_CHECKPOINT_ID -} from './MongoRelation.js'; -import { MongoManager } from './MongoManager.js'; import { mapChangeStreamError } from './ChangeStreamErrors.js'; +import { MongoManager } from './MongoManager.js'; +import { constructAfterRecord, createCheckpoint, getMongoRelation, STANDALONE_CHECKPOINT_ID } from './MongoRelation.js'; +import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js'; import { CHECKPOINTS_COLLECTION } from './replication-utils.js'; export interface MongoSnapshotterOptions { connections: MongoManager; - storage: storage.SyncRulesBucketStorage; + writer: () => Promise; metrics: MetricsEngine; abort_signal: AbortSignal; /** @@ -58,18 +38,11 @@ export interface MongoSnapshotterOptions { checkpointStreamId: mongo.ObjectId; } -interface InitResult { - needsInitialSync: boolean; - snapshotLsn: string | null; -} - export class MongoSnapshotter { - sync_rules: HydratedSyncRules; - group_id: number; - connection_id = 1; - private readonly storage: storage.SyncRulesBucketStorage; + private readonly writerFactory: () => Promise; + private readonly metrics: MetricsEngine; private connections: MongoManager; @@ -81,8 +54,6 @@ export class MongoSnapshotter { private abortSignal: AbortSignal; - private relationCache = new RelationCache(getCacheIdentifier); - private logger: Logger; private checkpointStreamId: mongo.ObjectId; @@ -93,17 +64,13 @@ export class MongoSnapshotter { private lastSnapshotOpId: InternalOpId | null = null; constructor(options: MongoSnapshotterOptions) { - this.storage = options.storage; + this.writerFactory = options.writer; this.metrics = options.metrics; - this.group_id = options.storage.group_id; this.connections = options.connections; this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000; this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000; this.client = this.connections.client; this.defaultDb = this.connections.db; - this.sync_rules = options.storage.getHydratedSyncRules({ - defaultSchema: this.defaultDb.databaseName - }); this.abortSignal = options.abort_signal; this.logger = options.logger ?? defaultLogger; this.checkpointStreamId = options.checkpointStreamId; @@ -118,66 +85,29 @@ export class MongoSnapshotter { return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE; } - async checkSlot(): Promise { - const status = await this.storage.getStatus(); - if (status.snapshot_done && status.checkpoint_lsn) { - this.logger.info(`Initial replication already done`); - return { needsInitialSync: false, snapshotLsn: null }; - } - - return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn }; - } - - async setupCheckpointsCollection() { - const collection = await this.getCollectionInfo(this.defaultDb.databaseName, CHECKPOINTS_COLLECTION); - if (collection == null) { - await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, { - changeStreamPreAndPostImages: { enabled: true } - }); - } else if (this.usePostImages && collection.options?.changeStreamPreAndPostImages?.enabled != true) { - // Drop + create requires less permissions than collMod, - // and we don't care about the data in this collection. - await this.defaultDb.dropCollection(CHECKPOINTS_COLLECTION); - await this.defaultDb.createCollection(CHECKPOINTS_COLLECTION, { - changeStreamPreAndPostImages: { enabled: true } - }); - } else { - // Clear the collection on startup, to keep it clean - // We never query this collection directly, and don't want to keep the data around. - // We only use this to get data into the oplog/changestream. - await this.defaultDb.collection(CHECKPOINTS_COLLECTION).deleteMany({}); - } - } - async queueSnapshotTables(snapshotLsn: string | null) { - const sourceTables = this.sync_rules.getSourceTables(); - await this.client.connect(); - - await using batch = await this.storage.createWriter({ - logger: this.logger, - zeroLSN: MongoLSN.ZERO.comparable, - defaultSchema: this.defaultDb.databaseName, - storeCurrentData: false, - skipExistingRows: true - }); + await using writer = await this.writerFactory(); + const sourceTables = writer.rowProcessor.getSourceTables(); if (snapshotLsn == null) { // First replication attempt - get a snapshot and store the timestamp - snapshotLsn = await this.getSnapshotLsn(); - await batch.setResumeLsn(snapshotLsn); + snapshotLsn = await this.getSnapshotLsn(writer); + // FIXME: check the logic for resumeLSN. + await writer.setAllResumeLsn(snapshotLsn); this.logger.info(`Marking snapshot at ${snapshotLsn}`); } else { this.logger.info(`Resuming snapshot at ${snapshotLsn}`); // Check that the snapshot is still valid. - await this.validateSnapshotLsn(snapshotLsn); + await this.validateSnapshotLsn(writer, snapshotLsn); } // Start by resolving all tables. // This checks postImage configuration, and that should fail as // early as possible. + // This resolves _all_ tables, including those already snapshotted. let allSourceTables: SourceTable[] = []; for (let tablePattern of sourceTables) { - const tables = await this.resolveQualifiedTableNames(batch, tablePattern); + const tables = await this.resolveQualifiedTableNames(writer, tablePattern); allSourceTables.push(...tables); } @@ -188,11 +118,10 @@ export class MongoSnapshotter { continue; } const count = await this.estimatedCountNumber(table); - const updated = await batch.updateTableProgress(table, { + const updated = await writer.updateTableProgress(table, { totalEstimatedCount: count }); tablesWithStatus.push(updated); - this.relationCache.update(updated); this.logger.info( `To replicate: ${updated.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}` ); @@ -209,9 +138,10 @@ export class MongoSnapshotter { async replicationLoop() { try { + await using writer = await this.writerFactory(); if (this.queue.size == 0) { // Special case where we start with no tables to snapshot - await this.markSnapshotDone(); + await this.markSnapshotDone(writer); } while (!this.abortSignal.aborted) { const table = this.queue.values().next().value; @@ -221,10 +151,10 @@ export class MongoSnapshotter { continue; } - await this.replicateTable(table); + await this.replicateTable(writer, table); this.queue.delete(table); if (this.queue.size == 0) { - await this.markSnapshotDone(); + await this.markSnapshotDone(writer); } } throw new ReplicationAbortedError(`Replication loop aborted`, this.abortSignal.reason); @@ -235,72 +165,57 @@ export class MongoSnapshotter { } } - private async markSnapshotDone() { - const flushResults = await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: MongoLSN.ZERO.comparable, - defaultSchema: this.defaultDb.databaseName, - storeCurrentData: false, - skipExistingRows: true - }, - async (batch) => { - // The checkpoint here is a marker - we need to replicate up to at least this - // point before the data can be considered consistent. - const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); - await batch.markAllSnapshotDone(checkpoint); - // KLUDGE: We need to create an extra checkpoint _after_ marking the snapshot done, to fix - // issues with order of processing commits(). This is picked up by tests on postgres storage, - // the issue may be specific to that storage engine. - await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); - } - ); - - const lastOp = flushResults?.flushed_op ?? this.lastSnapshotOpId; - if (lastOp != null) { + private async markSnapshotDone(writer: storage.BucketDataWriter) { + // The checkpoint here is a marker - we need to replicate up to at least this + // point before the data can be considered consistent. + const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); + await writer.markAllSnapshotDone(checkpoint); + // KLUDGE: We need to create an extra checkpoint _after_ marking the snapshot done, to fix + // issues with order of processing commits(). This is picked up by tests on postgres storage, + // the issue may be specific to that storage engine. + await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); + + if (this.lastSnapshotOpId != null) { // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules. // TODO: only run this after initial replication, not after each table. - await this.storage.populatePersistentChecksumCache({ - // No checkpoint yet, but we do have the opId. - maxOpId: lastOp, - signal: this.abortSignal - }); + // FIXME: implement this again + // await this.storage.populatePersistentChecksumCache({ + // // No checkpoint yet, but we do have the opId. + // maxOpId: this.lastSnapshotOpId, + // signal: this.abortSignal + // }); } } - private async replicateTable(tableRequest: SourceTable) { - const flushResults = await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: MongoLSN.ZERO.comparable, - defaultSchema: this.defaultDb.databaseName, - storeCurrentData: false, - skipExistingRows: true - }, - async (batch) => { - // Get fresh table info, in case it was updated while queuing - const table = await this.handleRelation(batch, tableRequest, { collectionInfo: undefined }); - if (table.snapshotComplete) { - return; - } - await this.snapshotTable(batch, table); + private async replicateTable(writer: storage.BucketDataWriter, tableRequest: SourceTable) { + // Get fresh table info, in case it was updated while queuing + const table = await writer.getTable(tableRequest); + if (table == null) { + return; + } + if (table.snapshotComplete) { + return; + } + await this.snapshotTable(writer, table); - const noCheckpointBefore = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); - await batch.markTableSnapshotDone([table], noCheckpointBefore); + const noCheckpointBefore = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); + await writer.markTableSnapshotDone([table], noCheckpointBefore); - // This commit ensures we set keepalive_op. - const resumeLsn = batch.resumeFromLsn ?? MongoLSN.ZERO.comparable; - await batch.commit(resumeLsn); - } - ); - if (flushResults?.flushed_op != null) { - this.lastSnapshotOpId = flushResults.flushed_op; - } - this.logger.info(`Flushed snapshot at ${flushResults?.flushed_op}`); + // This commit ensures we set keepalive_op. + const resumeLsn = writer.resumeFromLsn ?? MongoLSN.ZERO.comparable; + // FIXME: Only commit on relevant syncRules? + + await writer.commitAll(resumeLsn); + + // FIXME: check this + // if (flushResults?.flushed_op != null) { + // this.lastSnapshotOpId = flushResults.flushed_op; + // } + this.logger.info(`Flushed snapshot at ${this.lastSnapshotOpId}`); } - async queueSnapshot(batch: storage.BucketStorageBatch, table: storage.SourceTable) { - await batch.markTableSnapshotRequired(table); + async queueSnapshot(writer: storage.BucketDataWriter, table: storage.SourceTable) { + await writer.markTableSnapshotRequired(table); this.queue.add(table); } @@ -314,8 +229,8 @@ export class MongoSnapshotter { return await db.collection(table.name).estimatedDocumentCount(); } - async resolveQualifiedTableNames( - batch: storage.BucketStorageBatch, + private async resolveQualifiedTableNames( + writer: storage.BucketDataWriter, tablePattern: TablePattern ): Promise { const schema = tablePattern.schema; @@ -347,17 +262,20 @@ export class MongoSnapshotter { } for (let collection of collections) { - const table = await this.handleRelation(batch, getMongoRelation({ db: schema, coll: collection.name }), { - collectionInfo: collection + await this.checkPostImages(schema, collection); + const sourceTables = await writer.resolveTable({ + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: getMongoRelation({ db: schema, coll: collection.name }) }); - - result.push(table); + // TODO: dropTables? + result.push(...sourceTables.tables); } return result; } - private async snapshotTable(batch: storage.BucketStorageBatch, table: storage.SourceTable) { + private async snapshotTable(writer: storage.BucketDataWriter, table: storage.SourceTable) { const totalEstimatedCount = await this.estimatedCountNumber(table); let at = table.snapshotStatus?.replicatedCount ?? 0; const db = this.client.db(table.schema); @@ -391,10 +309,10 @@ export class MongoSnapshotter { // Pre-fetch next batch, so that we can read and write concurrently nextChunkPromise = query.nextChunk(); for (let document of docBatch) { - const record = this.constructAfterRecord(document); + const record = this.constructAfterRecord(writer.rowProcessor, document); // This auto-flushes when the batch reaches its size limit - await batch.save({ + await writer.save({ tag: SaveOperationTag.INSERT, sourceTable: table, before: undefined, @@ -405,16 +323,18 @@ export class MongoSnapshotter { } // Important: flush before marking progress - await batch.flush(); + const flushResult = await writer.flush(); + if (flushResult != null) { + this.lastSnapshotOpId = flushResult.flushed_op; + } at += docBatch.length; this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(docBatch.length); - table = await batch.updateTableProgress(table, { + table = await writer.updateTableProgress(table, { lastKey, replicatedCount: at, totalEstimatedCount: totalEstimatedCount }); - this.relationCache.update(table); const duration = performance.now() - lastBatch; lastBatch = performance.now(); @@ -427,9 +347,9 @@ export class MongoSnapshotter { await nextChunkPromise; } - private constructAfterRecord(document: mongo.Document): SqliteRow { + private constructAfterRecord(rowProcessor: RowProcessor, document: mongo.Document): SqliteRow { const inputRow = constructAfterRecord(document); - return this.sync_rules.applyRowContext(inputRow); + return rowProcessor.applyRowContext(inputRow); } private async getCollectionInfo(db: string, name: string): Promise { @@ -466,40 +386,7 @@ export class MongoSnapshotter { } } - private async handleRelation( - batch: storage.BucketStorageBatch, - descriptor: SourceEntityDescriptor, - options: { collectionInfo: mongo.CollectionInfo | undefined } - ) { - if (options.collectionInfo != null) { - await this.checkPostImages(descriptor.schema, options.collectionInfo); - } else { - // If collectionInfo is null, the collection may have been dropped. - // Ignore the postImages check in this case. - } - - const result = await this.storage.resolveTable({ - group_id: this.group_id, - connection_id: this.connection_id, - connection_tag: this.connections.connectionTag, - entity_descriptor: descriptor, - sync_rules: this.sync_rules - }); - this.relationCache.update(result.table); - - // Drop conflicting collections. - // This is generally not expected for MongoDB source dbs, so we log an error. - if (result.dropTables.length > 0) { - this.logger.error( - `Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}` - ); - await batch.drop(result.dropTables); - } - - return result.table; - } - - private async getSnapshotLsn(): Promise { + private async getSnapshotLsn(writer: storage.BucketDataWriter): Promise { const hello = await this.defaultDb.command({ hello: 1 }); // Basic sanity check if (hello.msg == 'isdbgrid') { @@ -529,7 +416,7 @@ export class MongoSnapshotter { const LSN_TIMEOUT_SECONDS = 60; const LSN_CREATE_INTERVAL_SECONDS = 1; - await using streamManager = this.openChangeStream({ lsn: null, maxAwaitTimeMs: 0 }); + await using streamManager = this.openChangeStream(writer, { lsn: null, maxAwaitTimeMs: 0 }); const { stream } = streamManager; const startTime = performance.now(); let lastCheckpointCreated = -10_000; @@ -576,8 +463,8 @@ export class MongoSnapshotter { /** * Given a snapshot LSN, validate that we can read from it, by opening a change stream. */ - private async validateSnapshotLsn(lsn: string) { - await using streamManager = this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 }); + private async validateSnapshotLsn(writer: storage.BucketDataWriter, lsn: string) { + await using streamManager = this.openChangeStream(writer, { lsn: lsn, maxAwaitTimeMs: 0 }); const { stream } = streamManager; try { // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream @@ -589,8 +476,8 @@ export class MongoSnapshotter { } } - private getSourceNamespaceFilters(): { $match: any; multipleDatabases: boolean } { - const sourceTables = this.sync_rules.getSourceTables(); + private getSourceNamespaceFilters(rowProcessor: RowProcessor): { $match: any; multipleDatabases: boolean } { + const sourceTables = rowProcessor.getSourceTables(); let $inFilters: { db: string; coll: string }[] = [ { db: this.defaultDb.databaseName, coll: CHECKPOINTS_COLLECTION } @@ -634,12 +521,12 @@ export class MongoSnapshotter { } } - private openChangeStream(options: { lsn: string | null; maxAwaitTimeMs?: number }) { + private openChangeStream(writer: storage.BucketDataWriter, options: { lsn: string | null; maxAwaitTimeMs?: number }) { const lastLsn = options.lsn ? MongoLSN.fromSerialized(options.lsn) : null; const startAfter = lastLsn?.timestamp; const resumeAfter = lastLsn?.resumeToken; - const filters = this.getSourceNamespaceFilters(); + const filters = this.getSourceNamespaceFilters(writer.rowProcessor); const pipeline: mongo.Document[] = [ { diff --git a/packages/service-core/src/storage/BucketStorageBatch.ts b/packages/service-core/src/storage/BucketStorageBatch.ts index a6f78c1af..5c535b7ff 100644 --- a/packages/service-core/src/storage/BucketStorageBatch.ts +++ b/packages/service-core/src/storage/BucketStorageBatch.ts @@ -8,7 +8,7 @@ import { } from '@powersync/service-sync-rules'; import { BSON } from 'bson'; import { ReplicationEventPayload } from './ReplicationEventPayload.js'; -import { SourceTable, TableSnapshotStatus } from './SourceTable.js'; +import { SourceTable, SourceTableId, TableSnapshotStatus } from './SourceTable.js'; import { BatchedCustomWriteCheckpointOptions, ResolveTableOptions, ResolveTableResult } from './storage-index.js'; import { InternalOpId } from '../util/utils.js'; @@ -28,6 +28,7 @@ export interface BucketDataWriter extends BucketDataWriterBase, AsyncDisposable * Resolve a table, keeping track of it internally. */ resolveTable(options: ResolveTableOptions): Promise; + getTable(ref: SourceTable): Promise; } export interface BucketDataWriterBase { @@ -61,6 +62,12 @@ export interface BucketDataWriterBase { * @returns null if there are no changes to flush. */ flush(options?: BatchBucketFlushOptions): Promise; + + markTableSnapshotDone(tables: SourceTable[], no_checkpoint_before_lsn?: string): Promise; + markTableSnapshotRequired(table: SourceTable): Promise; + markAllSnapshotDone(no_checkpoint_before_lsn: string): Promise; + + updateTableProgress(table: SourceTable, progress: Partial): Promise; } export interface BucketStorageBatch @@ -114,12 +121,6 @@ export interface BucketStorageBatch */ resumeFromLsn: string | null; - markTableSnapshotDone(tables: SourceTable[], no_checkpoint_before_lsn?: string): Promise; - markTableSnapshotRequired(table: SourceTable): Promise; - markAllSnapshotDone(no_checkpoint_before_lsn: string): Promise; - - updateTableProgress(table: SourceTable, progress: Partial): Promise; - /** * Queues the creation of a custom Write Checkpoint. This will be persisted after operations are flushed. */ From f9dfbfc300a45ef87c9c17bf0d7d315952d542c6 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 14 Jan 2026 13:59:46 +0200 Subject: [PATCH 031/101] Fix job wiring up. --- .../replication/ChangeStreamReplicationJob.ts | 5 +++++ .../src/replication/ChangeStreamReplicator.ts | 1 + packages/sync-rules/src/HydratedSyncRules.ts | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+) diff --git a/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts b/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts index 5f4f41017..7e58d75a1 100644 --- a/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts +++ b/modules/module-mongodb/src/replication/ChangeStreamReplicationJob.ts @@ -1,5 +1,6 @@ import { container, logger as defaultLogger } from '@powersync/lib-services-framework'; import { + BucketStorageFactory, PersistedSyncRulesContent, replication, ReplicationLock, @@ -11,6 +12,7 @@ import { ConnectionManagerFactory } from './ConnectionManagerFactory.js'; export interface ChangeStreamReplicationJobOptions extends replication.AbstractReplicationJobOptions { connectionFactory: ConnectionManagerFactory; + storageFactory: BucketStorageFactory; streams: ReplicationStreamConfig[]; } @@ -22,6 +24,7 @@ export interface ReplicationStreamConfig { export class ChangeStreamReplicationJob extends replication.AbstractReplicationJob { private connectionFactory: ConnectionManagerFactory; + private storageFactory: BucketStorageFactory; private lastStream: ChangeStream | null = null; private readonly streams: ReplicationStreamConfig[]; @@ -30,6 +33,7 @@ export class ChangeStreamReplicationJob extends replication.AbstractReplicationJ super(options); this.connectionFactory = options.connectionFactory; this.streams = options.streams; + this.storageFactory = options.storageFactory; // We use a custom formatter to process the prefix this.logger = defaultLogger.child({ prefix: `[powersync-${this.streams.map((stream) => stream.syncRules.id).join(',')}] ` @@ -106,6 +110,7 @@ export class ChangeStreamReplicationJob extends replication.AbstractReplicationJ return; } const stream = new ChangeStream({ + factory: this.storageFactory, abort_signal: this.abortController.signal, streams: this.streams, metrics: this.options.metrics, diff --git a/modules/module-mongodb/src/replication/ChangeStreamReplicator.ts b/modules/module-mongodb/src/replication/ChangeStreamReplicator.ts index 123a30224..1f2830c39 100644 --- a/modules/module-mongodb/src/replication/ChangeStreamReplicator.ts +++ b/modules/module-mongodb/src/replication/ChangeStreamReplicator.ts @@ -79,6 +79,7 @@ export class ChangeStreamReplicator extends replication.AbstractReplicator source.hydrate(params.createParams)); } + getMatchingSources(table: SourceTableInterface): { + bucketDataSources: BucketDataSource[]; + parameterIndexLookupCreators: ParameterIndexLookupCreator[]; + } { + const bucketDataSources = this.bucketDataSources.filter((ds) => ds.tableSyncsData(table)); + const parameterIndexLookupCreators: ParameterIndexLookupCreator[] = this.bucketParameterIndexLookupCreators.filter( + (ds) => ds.tableSyncsParameters(table) + ); + return { + bucketDataSources, + parameterIndexLookupCreators + }; + } + // These methods do not depend on hydration, so we can just forward them to the definition. getSourceTables() { From 44ac2b7cd7ad7fab88aba0168ca38beaa1b2538e Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 14 Jan 2026 15:47:48 +0200 Subject: [PATCH 032/101] Fixes. --- .../storage/implementation/MergedSyncRules.ts | 35 +++++++++++++++---- .../implementation/MongoBucketBatch.ts | 14 +++++++- .../implementation/MongoSyncBucketStorage.ts | 19 ++++------ .../src/replication/ChangeStream.ts | 18 ++++------ packages/sync-rules/src/TablePattern.ts | 11 ++++++ 5 files changed, 65 insertions(+), 32 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts index f29a08b19..d0be3d5c2 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts @@ -17,6 +17,7 @@ import { SqliteInputValue, SqliteRow, SqliteValue, + SqlSyncRules, TablePattern } from '@powersync/service-sync-rules'; import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; @@ -29,13 +30,24 @@ interface ResolvedDataSource { } export class MergedSyncRules implements RowProcessor { static merge(sources: MongoPersistedSyncRules[]): MergedSyncRules { + return new MergedSyncRules(sources); + } + + private resolvedDataSources: Map; + private sourcePatterns: TablePattern[]; + private allSyncRules: SqlSyncRules[]; + + constructor(sources: MongoPersistedSyncRules[]) { let resolvedDataSources = new Map(); + let sourcePatternMap = new Map(); + this.allSyncRules = []; for (let source of sources) { const syncRules = source.sync_rules; const mapping = source.mapping; const hydrationState = source.hydrationState; const dataSources = syncRules.bucketDataSources; + this.allSyncRules.push(syncRules); for (let source of dataSources) { const scope = hydrationState.getBucketSourceScope(source); const id = mapping.bucketSourceId(source); @@ -60,13 +72,18 @@ export class MergedSyncRules implements RowProcessor { }; resolvedDataSources.set(id, { source, evaluate }); } - } - return new MergedSyncRules(resolvedDataSources); + for (let pattern of syncRules.getSourceTables()) { + const key = pattern.key; + if (!sourcePatternMap.has(key)) { + sourcePatternMap.set(key, pattern); + } + } + } + this.resolvedDataSources = resolvedDataSources; + this.sourcePatterns = Array.from(sourcePatternMap.values()); } - constructor(private resolvedDataSources: Map) {} - getMatchingSources(table: SourceTableInterface): { bucketDataSources: BucketDataSource[]; parameterIndexLookupCreators: ParameterIndexLookupCreator[]; @@ -86,21 +103,25 @@ export class MergedSyncRules implements RowProcessor { compatibility: CompatibilityContext = CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY; getSourceTables(): TablePattern[] { - throw new Error('Method not implemented.'); + return this.sourcePatterns; } + tableTriggersEvent(table: SourceTableInterface): boolean { throw new Error('Method not implemented.'); } + tableSyncsData(table: SourceTableInterface): boolean { throw new Error('Method not implemented.'); } tableSyncsParameters(table: SourceTableInterface): boolean { throw new Error('Method not implemented.'); } + applyRowContext( source: SqliteRow ): SqliteRow { - throw new Error('Method not implemented.'); + // FIXME: This may be different per sync rules - need to handle that + return this.allSyncRules[this.allSyncRules.length - 1].applyRowContext(source); } /** @@ -115,7 +136,7 @@ export class MergedSyncRules implements RowProcessor { } evaluateRowWithErrors(options: EvaluateRowOptions): { results: EvaluatedRow[]; errors: EvaluationError[] } { - const rawResults: EvaluationResult[] = Object.values(this.resolvedDataSources).flatMap((dataSource) => + const rawResults: EvaluationResult[] = [...this.resolvedDataSources.values()].flatMap((dataSource) => dataSource.evaluate(options) ); const results = rawResults.filter(isEvaluatedRow) as EvaluatedRow[]; diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 66531a45d..bbe0b058b 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -23,6 +23,7 @@ import { deserializeBson, InternalOpId, isCompleteRow, + maxLsn, SaveOperationTag, SourceTable, SourceTableId, @@ -145,7 +146,12 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { get resumeFromLsn(): string | null { // FIXME: check the logic here when there are multiple batches - return this.subWriters[0]?.resumeFromLsn ?? null; + let lsn: string | null = null; + for (let sub of this.subWriters) { + // TODO: should this be min instead? + lsn = maxLsn(lsn, sub.resumeFromLsn); + } + return lsn; } async keepaliveAll(lsn: string): Promise { @@ -341,6 +347,12 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { continue; } matchingDocs.push(doc); + for (let id of matchingBucketDataSourceIds) { + coveredBucketDataSourceIds.add(id); + } + for (let id of matchingParameterLookupSourceIds) { + coveredParameterLookupSourceIds.add(id); + } } const pendingBucketDataSourceIds = bucketDataSourceIds.filter((id) => !coveredBucketDataSourceIds.has(id)); diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 497164c0e..d390cd65c 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -16,8 +16,6 @@ import { GetCheckpointChangesOptions, InternalOpId, internalToExternalOpId, - maxLsn, - PersistedSyncRules, PopulateChecksumCacheOptions, PopulateChecksumCacheResults, ProtocolOpId, @@ -28,27 +26,22 @@ import { WatchWriteCheckpointOptions } from '@powersync/service-core'; import { JSONBig } from '@powersync/service-jsonbig'; -import { - BucketDataSource, - HydratedSyncRules, - ScopedParameterLookup, - SqliteJsonRow -} from '@powersync/service-sync-rules'; +import { HydratedSyncRules, ScopedParameterLookup, SqliteJsonRow } from '@powersync/service-sync-rules'; import * as bson from 'bson'; import { LRUCache } from 'lru-cache'; import * as timers from 'timers/promises'; import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } from '../../utils/util.js'; import { MongoBucketStorage } from '../MongoBucketStorage.js'; +import { MongoPersistedSyncRules } from '../storage-index.js'; +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; import { PowerSyncMongo } from './db.js'; -import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey, SourceTableDocument } from './models.js'; -import { MongoBucketBatch, MongoBucketDataWriter } from './MongoBucketBatch.js'; +import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey } from './models.js'; +import { MongoBucketBatch } from './MongoBucketBatch.js'; import { MongoChecksumOptions, MongoChecksums } from './MongoChecksums.js'; import { MongoCompactor } from './MongoCompactor.js'; import { MongoParameterCompactor } from './MongoParameterCompactor.js'; -import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js'; import { MongoPersistedSyncRulesContent } from './MongoPersistedSyncRulesContent.js'; -import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; -import { MongoPersistedSyncRules } from '../storage-index.js'; +import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js'; export interface MongoSyncBucketStorageOptions { checksumOptions?: MongoChecksumOptions; diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 28e9d106f..6bbb716ac 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -16,7 +16,7 @@ import { SourceTable, storage } from '@powersync/service-core'; -import { DatabaseInputRow, HydratedSyncRules, SqliteInputRow, SqliteRow } from '@powersync/service-sync-rules'; +import { HydratedSyncRules, SqliteRow } from '@powersync/service-sync-rules'; import { ReplicationMetric } from '@powersync/service-types'; import { MongoLSN, ZERO_LSN } from '../common/MongoLSN.js'; import { PostImagesOption } from '../types/types.js'; @@ -278,14 +278,6 @@ export class ChangeStream { } } - static *getQueryData(results: Iterable): Generator { - for (let row of results) { - yield constructAfterRecord(row); - } - } - - private async createSnapshotter() {} - private async setupCheckpointsCollection() { const collection = await this.getCollectionInfo(this.defaultDb.databaseName, CHECKPOINTS_COLLECTION); if (collection == null) { @@ -329,6 +321,8 @@ export class ChangeStream { // If anything errors here, the entire replication process is halted, and // all connections automatically closed, including this one. this.initPromise = this.initReplication(); + // Important - need to wait for init. This sets the resumeLsn, amongst other setup + await this.initPromise; streamPromise = this.streamChanges() .then(() => { throw new ReplicationAssertionError(`Replication stream exited unexpectedly`); @@ -566,8 +560,10 @@ export class ChangeStream { } ); - // FIXME: Proper resumeFromLsn implementation for multiple writers - // We should probably use the active sync rules for this, or alternatively the minimum from the writers. + // Even though we use a unified stream, the resumeFromLsn is tracked separately per sync rules version. + // This resumeFromLsn on the writer gives us the _minimum_ one. + // When starting with the first sync rules, we need to get an LSN from the snapshot. + // When we then start a new sync rules version, it will use the LSN from the existing sync rules version. const resumeFromLsn = writer.resumeFromLsn; if (resumeFromLsn == null) { throw new ReplicationAssertionError(`No LSN found to resume from`); diff --git a/packages/sync-rules/src/TablePattern.ts b/packages/sync-rules/src/TablePattern.ts index 89b981091..7b80a9b11 100644 --- a/packages/sync-rules/src/TablePattern.ts +++ b/packages/sync-rules/src/TablePattern.ts @@ -26,6 +26,17 @@ export class TablePattern { this.tablePattern = tablePattern; } + /** + * Unique lookup key for this pattern. For in-memory use only - no gaurantee of stability across restarts. + */ + get key(): string { + return JSON.stringify([this.connectionTag, this.schema, this.tablePattern]); + } + + equals(other: TablePattern): boolean { + return this.key == other.key; + } + get isWildcard() { return this.tablePattern.endsWith('%'); } From 8b923a52c1d44d010b78ba06060acd759dbd95c2 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 14 Jan 2026 16:13:16 +0200 Subject: [PATCH 033/101] Fixes to source table filtering. --- .../storage/implementation/MergedSyncRules.ts | 26 +++++++++++++++---- .../implementation/MongoBucketBatch.ts | 8 ++++-- .../service-core/src/storage/SourceTable.ts | 11 ++++++++ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts index d0be3d5c2..b40e70151 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts @@ -21,12 +21,15 @@ import { TablePattern } from '@powersync/service-sync-rules'; import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; +import { SourceTable } from '@powersync/service-core'; +import { ReplicationAssertionError } from '@powersync/lib-services-framework'; type EvaluateRowFn = (options: EvaluateRowOptions) => EvaluationResult[]; interface ResolvedDataSource { source: BucketDataSource; evaluate: EvaluateRowFn; + id: number; } export class MergedSyncRules implements RowProcessor { static merge(sources: MongoPersistedSyncRules[]): MergedSyncRules { @@ -70,7 +73,7 @@ export class MergedSyncRules implements RowProcessor { } satisfies EvaluatedRow; }); }; - resolvedDataSources.set(id, { source, evaluate }); + resolvedDataSources.set(id, { source, evaluate, id }); } for (let pattern of syncRules.getSourceTables()) { @@ -92,9 +95,9 @@ export class MergedSyncRules implements RowProcessor { .map((dataSource) => dataSource.source) .filter((ds) => ds.tableSyncsData(table)); return { - bucketDataSources, + bucketDataSources: bucketDataSources, parameterIndexLookupCreators: [ - //FIXME: implement + // FIXME: implement ] }; } @@ -136,9 +139,22 @@ export class MergedSyncRules implements RowProcessor { } evaluateRowWithErrors(options: EvaluateRowOptions): { results: EvaluatedRow[]; errors: EvaluationError[] } { - const rawResults: EvaluationResult[] = [...this.resolvedDataSources.values()].flatMap((dataSource) => - dataSource.evaluate(options) + // Important: We only get matching sources here, not all sources. This can help for two things: + // 1. For performance: Skip any not-matching sources. + // 2. For re-replication: We may take a snapshot when adding a new source, with a new SourceTable. + // In that case, we don't want to re-evaluate all existing sources, only the new one. + + // FIXME: Fix performance - don't scan all sources + // And maybe re-use getMatchingSources? + const table = options.sourceTable; + if (!(table instanceof SourceTable)) { + throw new ReplicationAssertionError(`Expected SourceTable instance`); + } + const bucketDataSources = [...this.resolvedDataSources.values()].filter((ds) => + table.bucketDataSourceIds.includes(ds.id) ); + + const rawResults: EvaluationResult[] = bucketDataSources.flatMap((dataSource) => dataSource.evaluate(options)); const results = rawResults.filter(isEvaluatedRow) as EvaluatedRow[]; const errors = rawResults.filter(isEvaluationError) as EvaluationError[]; diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index bbe0b058b..7e9dc8ac9 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -282,7 +282,9 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { connectionTag: ref.connectionTag, name: doc.table_name, replicaIdColumns: ref.replicaIdColumns, - snapshotComplete: doc.snapshot_done ?? true + snapshotComplete: doc.snapshot_done ?? true, + bucketDataSourceIds: doc.bucket_data_source_ids ?? [], + parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? [] }); sourceTable.snapshotStatus = doc.snapshot_status == null @@ -386,7 +388,9 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { schema: schema, name: name, replicaIdColumns: replicaIdColumns, - snapshotComplete: doc.snapshot_done ?? true + snapshotComplete: doc.snapshot_done ?? true, + bucketDataSourceIds: doc.bucket_data_source_ids ?? [], + parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? [] }); sourceTable.snapshotStatus = doc.snapshot_status == null diff --git a/packages/service-core/src/storage/SourceTable.ts b/packages/service-core/src/storage/SourceTable.ts index 9a36bc125..2a5eb3509 100644 --- a/packages/service-core/src/storage/SourceTable.ts +++ b/packages/service-core/src/storage/SourceTable.ts @@ -16,6 +16,9 @@ export interface SourceTableOptions { name: string; replicaIdColumns: ColumnDescriptor[]; snapshotComplete: boolean; + + bucketDataSourceIds?: number[]; + parameterLookupSourceIds?: number[]; } export interface TableSnapshotStatus { @@ -102,6 +105,14 @@ export class SourceTable implements SourceEntityDescriptor { return this.syncData || this.syncParameters || this.syncEvent; } + get bucketDataSourceIds() { + return this.options.bucketDataSourceIds ?? []; + } + + get parameterLookupSourceIds() { + return this.options.parameterLookupSourceIds ?? []; + } + /** * In-memory clone of the table status. */ From 436e6f4eaf6fcc341d6996dec0b0acdddceaf8db Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 14 Jan 2026 16:30:21 +0200 Subject: [PATCH 034/101] Fix source table filtering. --- .../src/storage/implementation/MongoBucketBatch.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 7e9dc8ac9..e3b5bbe1d 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -377,10 +377,10 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { }; await col.insertOne(doc, { session }); - docs.push(doc); + matchingDocs.push(doc); } - const sourceTables = docs.map((doc) => { + const sourceTables = matchingDocs.map((doc) => { const sourceTable = new storage.SourceTable({ id: doc._id, connectionTag: connection_tag, @@ -501,6 +501,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { options?: storage.BucketBatchCommitOptions ): Promise { let sizes: Map | undefined = undefined; + if (this.storeCurrentData && !this.skipExistingRows) { // We skip this step if we don't store current_data, since the sizes will // always be small in that case. From bb6376238196667797fdc3bdf896d11e68e092fa Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 14 Jan 2026 16:58:42 +0200 Subject: [PATCH 035/101] Fix initialization of sync rule rules where no new snapshots are required. --- .../src/storage/MongoBucketStorage.ts | 7 +++++-- .../src/storage/implementation/MongoBucketBatch.ts | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts index 55728d987..d8f380ecd 100644 --- a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts @@ -319,10 +319,13 @@ export class MongoBucketStorage const doc: SyncRuleDocument = { _id: id, content: options.content, - last_checkpoint: null, + last_checkpoint: activeSyncRules?.last_checkpoint ?? null, last_checkpoint_lsn: null, no_checkpoint_before: null, - keepalive_op: null, + // HACK: copy the op from the active sync rules, if any. + // This specifically helps for the case of the new sync rules not replicating anything new. + // FIXME: Make sure this is properly sound and tested. + keepalive_op: activeSyncRules?.last_checkpoint ? String(activeSyncRules.last_checkpoint) : null, snapshot_done: false, snapshot_lsn: undefined, state: storage.SyncRuleState.PROCESSING, diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index e3b5bbe1d..b314cc2d1 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -901,7 +901,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { }); } - private async withReplicationTransaction( + async withReplicationTransaction( description: string, callback: (session: mongo.ClientSession, opSeq: MongoIdSequence) => Promise ): Promise { From 596343dc9f49a2dbc9893865455b23bdbcd4107c Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 15 Jan 2026 11:20:31 +0200 Subject: [PATCH 036/101] Work around build issues. --- .../implementation/MongoBucketBatch.ts | 4 ++-- .../implementation/MongoSyncBucketStorage.ts | 6 +++++- .../src/replication/ChangeStream.ts | 2 +- .../src/replication/MongoSnapshotter.ts | 2 +- .../module-mssql/src/replication/CDCStream.ts | 1 - .../src/replication/BinLogStream.ts | 1 - .../storage/PostgresBucketStorageFactory.ts | 7 +++++++ .../src/storage/PostgresSyncRulesStorage.ts | 4 +++- .../src/replication/PostgresSnapshotter.ts | 1 - .../src/replication/WalStream.ts | 1 - .../src/storage/BucketStorageBatch.ts | 10 ++++++++-- .../src/storage/SyncRulesBucketStorage.ts | 19 ++++++++++++++++++- 12 files changed, 45 insertions(+), 13 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index b314cc2d1..35f7739aa 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -301,7 +301,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { return sourceTable; } - async resolveTable(options: storage.ResolveTableOptions): Promise { + async resolveTables(options: storage.ResolveTablesOptions): Promise { const sources = this.rowProcessor.getMatchingSources({ connectionTag: options.connection_tag, name: options.entity_descriptor.name, @@ -321,7 +321,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { type: column.type, type_oid: column.typeId })); - let result: storage.ResolveTableResult | null = null; + let result: storage.ResolveTablesResult | null = null; await this.db.client.withSession(async (session) => { const col = this.db.source_tables; let filter: mongo.Filter = { diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index d390cd65c..7f3871903 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -35,7 +35,7 @@ import { MongoBucketStorage } from '../MongoBucketStorage.js'; import { MongoPersistedSyncRules } from '../storage-index.js'; import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; import { PowerSyncMongo } from './db.js'; -import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey } from './models.js'; +import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey, SourceTableDocument } from './models.js'; import { MongoBucketBatch } from './MongoBucketBatch.js'; import { MongoChecksumOptions, MongoChecksums } from './MongoChecksums.js'; import { MongoCompactor } from './MongoCompactor.js'; @@ -189,6 +189,10 @@ export class MongoSyncBucketStorage } } + async resolveTable(options: storage.ResolveTableOptions): Promise { + throw new Error('Method deprecated and not implemented.'); + } + async getParameterSets( checkpoint: MongoReplicationCheckpoint, lookups: ScopedParameterLookup[] diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 6bbb716ac..5c3fd73d2 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -474,7 +474,7 @@ export class ChangeStream { // Ignore the postImages check in this case. } - const result = await writer.resolveTable({ + const result = await writer.resolveTables({ connection_id: this.connection_id, connection_tag: this.connections.connectionTag, entity_descriptor: descriptor diff --git a/modules/module-mongodb/src/replication/MongoSnapshotter.ts b/modules/module-mongodb/src/replication/MongoSnapshotter.ts index 3b5992d1d..c458fb99c 100644 --- a/modules/module-mongodb/src/replication/MongoSnapshotter.ts +++ b/modules/module-mongodb/src/replication/MongoSnapshotter.ts @@ -263,7 +263,7 @@ export class MongoSnapshotter { for (let collection of collections) { await this.checkPostImages(schema, collection); - const sourceTables = await writer.resolveTable({ + const sourceTables = await writer.resolveTables({ connection_id: this.connection_id, connection_tag: this.connections.connectionTag, entity_descriptor: getMongoRelation({ db: schema, coll: collection.name }) diff --git a/modules/module-mssql/src/replication/CDCStream.ts b/modules/module-mssql/src/replication/CDCStream.ts index c4c89a9d9..98c277918 100644 --- a/modules/module-mssql/src/replication/CDCStream.ts +++ b/modules/module-mssql/src/replication/CDCStream.ts @@ -241,7 +241,6 @@ export class CDCStream { throw new ReplicationAssertionError(`objectId expected, got ${typeof table.objectId}`); } const resolved = await this.storage.resolveTable({ - group_id: this.groupId, connection_id: this.connectionId, connection_tag: this.connectionTag, entity_descriptor: table, diff --git a/modules/module-mysql/src/replication/BinLogStream.ts b/modules/module-mysql/src/replication/BinLogStream.ts index 365f7b721..b7c0ce4f8 100644 --- a/modules/module-mysql/src/replication/BinLogStream.ts +++ b/modules/module-mysql/src/replication/BinLogStream.ts @@ -128,7 +128,6 @@ export class BinLogStream { async handleRelation(batch: storage.BucketStorageBatch, entity: storage.SourceEntityDescriptor, snapshot: boolean) { const result = await this.storage.resolveTable({ - group_id: this.groupId, connection_id: this.connectionId, connection_tag: this.connectionTag, entity_descriptor: entity, diff --git a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts index 85fbe4dad..7feda539d 100644 --- a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts +++ b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts @@ -43,6 +43,13 @@ export class PostgresBucketStorageFactory }); } + createCombinedWriter( + storage: SyncRulesBucketStorage[], + options: storage.StartBatchOptions + ): Promise { + throw new Error('Not implemented yet'); + } + async [Symbol.asyncDispose]() { await this.db[Symbol.asyncDispose](); } diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index 3c39e7420..7c4b2a989 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -176,10 +176,12 @@ export class PostgresSyncRulesStorage } async resolveTable(options: storage.ResolveTableOptions): Promise { - const { group_id, connection_id, connection_tag, entity_descriptor } = options; + const { connection_id, connection_tag, entity_descriptor } = options; const { schema, name: table, objectId, replicaIdColumns } = entity_descriptor; + const group_id = this.group_id; + const normalizedReplicaIdColumns = replicaIdColumns.map((column) => ({ name: column.name, type: column.type, diff --git a/modules/module-postgres/src/replication/PostgresSnapshotter.ts b/modules/module-postgres/src/replication/PostgresSnapshotter.ts index d95f8b595..bd2ba9afd 100644 --- a/modules/module-postgres/src/replication/PostgresSnapshotter.ts +++ b/modules/module-postgres/src/replication/PostgresSnapshotter.ts @@ -624,7 +624,6 @@ export class PostgresSnapshotter { throw new ReplicationAssertionError(`objectId expected, got ${typeof descriptor.objectId}`); } const result = await this.storage.resolveTable({ - group_id: this.group_id, connection_id: this.connection_id, connection_tag: this.connections.connectionTag, entity_descriptor: descriptor, diff --git a/modules/module-postgres/src/replication/WalStream.ts b/modules/module-postgres/src/replication/WalStream.ts index d4350b7e7..7d2345c7d 100644 --- a/modules/module-postgres/src/replication/WalStream.ts +++ b/modules/module-postgres/src/replication/WalStream.ts @@ -183,7 +183,6 @@ export class WalStream { throw new ReplicationAssertionError(`objectId expected, got ${typeof descriptor.objectId}`); } const result = await this.storage.resolveTable({ - group_id: this.group_id, connection_id: this.connection_id, connection_tag: this.connections.connectionTag, entity_descriptor: descriptor, diff --git a/packages/service-core/src/storage/BucketStorageBatch.ts b/packages/service-core/src/storage/BucketStorageBatch.ts index 5c535b7ff..6fda718ea 100644 --- a/packages/service-core/src/storage/BucketStorageBatch.ts +++ b/packages/service-core/src/storage/BucketStorageBatch.ts @@ -9,7 +9,13 @@ import { import { BSON } from 'bson'; import { ReplicationEventPayload } from './ReplicationEventPayload.js'; import { SourceTable, SourceTableId, TableSnapshotStatus } from './SourceTable.js'; -import { BatchedCustomWriteCheckpointOptions, ResolveTableOptions, ResolveTableResult } from './storage-index.js'; +import { + BatchedCustomWriteCheckpointOptions, + ResolveTableOptions, + ResolveTableResult, + ResolveTablesOptions, + ResolveTablesResult +} from './storage-index.js'; import { InternalOpId } from '../util/utils.js'; export const DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS: ResolvedBucketBatchCommitOptions = { @@ -27,7 +33,7 @@ export interface BucketDataWriter extends BucketDataWriterBase, AsyncDisposable /** * Resolve a table, keeping track of it internally. */ - resolveTable(options: ResolveTableOptions): Promise; + resolveTables(options: ResolveTablesOptions): Promise; getTable(ref: SourceTable): Promise; } diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index 67ac5e9eb..cfcf4678e 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -34,6 +34,11 @@ export interface SyncRulesBucketStorage callback: (batch: BucketStorageBatch) => Promise ): Promise; + /** + * @deprecated use `createWriter()` instead, with its `resolveTables` method. + */ + resolveTable(options: ResolveTableOptions): Promise; + /** * Create a new writer - an alternative to `startBatch`. * @@ -160,17 +165,29 @@ export interface SyncRuleStatus { snapshot_done: boolean; snapshot_lsn: string | null; } +export interface ResolveTablesOptions { + connection_id: number; + connection_tag: string; + entity_descriptor: SourceEntityDescriptor; +} + export interface ResolveTableOptions { connection_id: number; connection_tag: string; entity_descriptor: SourceEntityDescriptor; + sync_rules: HydratedSyncRules; } -export interface ResolveTableResult { +export interface ResolveTablesResult { tables: SourceTable[]; dropTables: SourceTable[]; } +export interface ResolveTableResult { + table: SourceTable; + dropTables: SourceTable[]; +} + export interface StartBatchOptions extends ParseSyncRulesOptions { zeroLSN: string; /** From e71bc4946caaccec80ea8401990764aaa37002fc Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 15 Jan 2026 11:32:38 +0200 Subject: [PATCH 037/101] Fix test build errors. --- modules/module-mongodb/test/src/change_stream_utils.ts | 1 + modules/module-mongodb/test/src/mongo_test.test.ts | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/module-mongodb/test/src/change_stream_utils.ts b/modules/module-mongodb/test/src/change_stream_utils.ts index 0b2ce99e4..cdc430876 100644 --- a/modules/module-mongodb/test/src/change_stream_utils.ts +++ b/modules/module-mongodb/test/src/change_stream_utils.ts @@ -115,6 +115,7 @@ export class ChangeStreamTestContext { return this._walStream; } const options: ChangeStreamOptions = { + factory: this.factory, streams: [{ storage: this.storage }], metrics: METRICS_HELPER.metricsEngine, connections: this.connectionManager, diff --git a/modules/module-mongodb/test/src/mongo_test.test.ts b/modules/module-mongodb/test/src/mongo_test.test.ts index c27002535..5c2ede098 100644 --- a/modules/module-mongodb/test/src/mongo_test.test.ts +++ b/modules/module-mongodb/test/src/mongo_test.test.ts @@ -14,6 +14,7 @@ import { ChangeStream } from '@module/replication/ChangeStream.js'; import { constructAfterRecord } from '@module/replication/MongoRelation.js'; import { PostImagesOption } from '@module/types/types.js'; import { clearTestDb, connectMongoData, TEST_CONNECTION_OPTIONS } from './util.js'; +import { MongoSnapshotter } from '@module/replication/MongoSnapshotter.js'; describe('mongo data types', () => { async function setupTable(db: mongo.Db) { @@ -266,7 +267,7 @@ describe('mongo data types', () => { .toArray(); // It is tricky to save "undefined" with mongo, so we check that it succeeded. expect(rawResults[4].undefined).toBeUndefined(); - const transformed = [...ChangeStream.getQueryData(rawResults)]; + const transformed = [...MongoSnapshotter.getQueryData(rawResults)]; checkResults(transformed); } finally { await client.close(); @@ -287,7 +288,7 @@ describe('mongo data types', () => { .find({}, { sort: { _id: 1 } }) .toArray(); expect(rawResults[3].undefined).toEqual([undefined]); - const transformed = [...ChangeStream.getQueryData(rawResults)]; + const transformed = [...MongoSnapshotter.getQueryData(rawResults)]; checkResultsNested(transformed); } finally { @@ -548,7 +549,7 @@ bucket_definitions: .collection('test_data') .find({}, { sort: { _id: 1 } }) .toArray(); - const [row] = [...ChangeStream.getQueryData(rawResults)]; + const [row] = [...MongoSnapshotter.getQueryData(rawResults)]; const oldFormat = applyRowContext(row, CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY); expect(oldFormat).toMatchObject({ From 9cffa33d4462a8999d8588aab8c03de429df7b43 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 15 Jan 2026 12:27:41 +0200 Subject: [PATCH 038/101] Support parameter lookups again; simplify a bit. --- .../storage/implementation/MergedSyncRules.ts | 107 +++++++++--------- packages/sync-rules/src/HydratedSyncRules.ts | 13 --- 2 files changed, 56 insertions(+), 64 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts index b40e70151..99356a9a8 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts @@ -1,17 +1,21 @@ +import { ReplicationAssertionError } from '@powersync/lib-services-framework'; +import { SourceTable } from '@powersync/service-core'; import { BucketDataSource, - buildBucketInfo, CompatibilityContext, EvaluatedParameters, + EvaluatedParametersResult, EvaluatedRow, EvaluateRowOptions, EvaluationError, EvaluationResult, + hydrateEvaluateParameterRow, + hydrateEvaluateRow, + isEvaluatedParameters, isEvaluatedRow, isEvaluationError, ParameterIndexLookupCreator, RowProcessor, - SOURCE, SourceTableInterface, SqlEventDescriptor, SqliteInputValue, @@ -21,27 +25,40 @@ import { TablePattern } from '@powersync/service-sync-rules'; import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; -import { SourceTable } from '@powersync/service-core'; -import { ReplicationAssertionError } from '@powersync/lib-services-framework'; type EvaluateRowFn = (options: EvaluateRowOptions) => EvaluationResult[]; +type EvaluateParameterRowFn = (sourceTable: SourceTableInterface, row: SqliteRow) => EvaluatedParametersResult[]; interface ResolvedDataSource { source: BucketDataSource; evaluate: EvaluateRowFn; id: number; } +interface ResolvedParameterLookupSource { + source: ParameterIndexLookupCreator; + id: number; + evaluate: EvaluateParameterRowFn; +} + +/** + * This is like HydratedSyncRules, but merges multiple sources together, and only implements the methods + * required for replication. + * + * This should be moved to a re-usable location, possibly merged with HydratedSyncRules logic. + */ export class MergedSyncRules implements RowProcessor { static merge(sources: MongoPersistedSyncRules[]): MergedSyncRules { return new MergedSyncRules(sources); } private resolvedDataSources: Map; + private resolvedParameterLookupSources: Map; private sourcePatterns: TablePattern[]; private allSyncRules: SqlSyncRules[]; constructor(sources: MongoPersistedSyncRules[]) { let resolvedDataSources = new Map(); + let resolvedParameterLookupSources = new Map(); let sourcePatternMap = new Map(); this.allSyncRules = []; @@ -50,29 +67,14 @@ export class MergedSyncRules implements RowProcessor { const mapping = source.mapping; const hydrationState = source.hydrationState; const dataSources = syncRules.bucketDataSources; + const bucketParameterLookupSources = syncRules.bucketParameterLookupSources; this.allSyncRules.push(syncRules); for (let source of dataSources) { - const scope = hydrationState.getBucketSourceScope(source); const id = mapping.bucketSourceId(source); if (resolvedDataSources.has(id)) { continue; } - - const evaluate: EvaluateRowFn = (options: EvaluateRowOptions): EvaluationResult[] => { - return source.evaluateRow(options).map((result) => { - if (isEvaluationError(result)) { - return result; - } - const info = buildBucketInfo(scope, result.serializedBucketParameters); - return { - bucket: info.bucket, - id: result.id, - table: result.table, - data: result.data, - source: info[SOURCE] - } satisfies EvaluatedRow; - }); - }; + const evaluate = hydrateEvaluateRow(hydrationState, source); resolvedDataSources.set(id, { source, evaluate, id }); } @@ -82,11 +84,27 @@ export class MergedSyncRules implements RowProcessor { sourcePatternMap.set(key, pattern); } } + + for (let source of bucketParameterLookupSources) { + const id = mapping.parameterLookupId(source); + if (resolvedParameterLookupSources.has(id)) { + continue; + } + + const withScope = hydrateEvaluateParameterRow(hydrationState, source); + resolvedParameterLookupSources.set(id, { source, id, evaluate: withScope }); + } } this.resolvedDataSources = resolvedDataSources; + this.resolvedParameterLookupSources = resolvedParameterLookupSources; this.sourcePatterns = Array.from(sourcePatternMap.values()); } + /** + * + * @param table The source database table definition, _not_ the individually derived SourceTables. + * @returns + */ getMatchingSources(table: SourceTableInterface): { bucketDataSources: BucketDataSource[]; parameterIndexLookupCreators: ParameterIndexLookupCreator[]; @@ -94,11 +112,15 @@ export class MergedSyncRules implements RowProcessor { const bucketDataSources = [...this.resolvedDataSources.values()] .map((dataSource) => dataSource.source) .filter((ds) => ds.tableSyncsData(table)); + + const parameterIndexLookupCreators: ParameterIndexLookupCreator[] = [ + ...this.resolvedParameterLookupSources.values() + ] + .map((dataSource) => dataSource.source) + .filter((ds) => ds.tableSyncsParameters(table)); return { - bucketDataSources: bucketDataSources, - parameterIndexLookupCreators: [ - // FIXME: implement - ] + bucketDataSources, + parameterIndexLookupCreators }; } @@ -109,17 +131,6 @@ export class MergedSyncRules implements RowProcessor { return this.sourcePatterns; } - tableTriggersEvent(table: SourceTableInterface): boolean { - throw new Error('Method not implemented.'); - } - - tableSyncsData(table: SourceTableInterface): boolean { - throw new Error('Method not implemented.'); - } - tableSyncsParameters(table: SourceTableInterface): boolean { - throw new Error('Method not implemented.'); - } - applyRowContext( source: SqliteRow ): SqliteRow { @@ -127,17 +138,6 @@ export class MergedSyncRules implements RowProcessor { return this.allSyncRules[this.allSyncRules.length - 1].applyRowContext(source); } - /** - * Throws errors. - */ - evaluateRow(options: EvaluateRowOptions): EvaluatedRow[] { - const { results, errors } = this.evaluateRowWithErrors(options); - if (errors.length > 0) { - throw new Error(errors[0].error); - } - return results; - } - evaluateRowWithErrors(options: EvaluateRowOptions): { results: EvaluatedRow[]; errors: EvaluationError[] } { // Important: We only get matching sources here, not all sources. This can help for two things: // 1. For performance: Skip any not-matching sources. @@ -161,13 +161,18 @@ export class MergedSyncRules implements RowProcessor { return { results, errors }; } - evaluateParameterRow(table: SourceTableInterface, row: SqliteRow): EvaluatedParameters[] { - throw new Error('Method not implemented.'); - } evaluateParameterRowWithErrors( table: SourceTableInterface, row: SqliteRow ): { results: EvaluatedParameters[]; errors: EvaluationError[] } { - throw new Error('Method not implemented.'); + const parameterIndexLookupCreators = [...this.resolvedParameterLookupSources.values()].filter((ds) => + ds.source.tableSyncsParameters(table) + ); + const rawResults: EvaluatedParametersResult[] = parameterIndexLookupCreators.flatMap((creator) => + creator.evaluate(table, row) + ); + const results = rawResults.filter(isEvaluatedParameters) as EvaluatedParameters[]; + const errors = rawResults.filter(isEvaluationError) as EvaluationError[]; + return { results, errors }; } } diff --git a/packages/sync-rules/src/HydratedSyncRules.ts b/packages/sync-rules/src/HydratedSyncRules.ts index da1106cd1..3e9381cb6 100644 --- a/packages/sync-rules/src/HydratedSyncRules.ts +++ b/packages/sync-rules/src/HydratedSyncRules.ts @@ -32,10 +32,6 @@ export interface RowProcessor { getSourceTables(): TablePattern[]; - tableTriggersEvent(table: SourceTableInterface): boolean; - - tableSyncsData(table: SourceTableInterface): boolean; - tableSyncsParameters(table: SourceTableInterface): boolean; getMatchingSources(table: SourceTableInterface): { bucketDataSources: BucketDataSource[]; parameterIndexLookupCreators: ParameterIndexLookupCreator[]; @@ -45,17 +41,8 @@ export interface RowProcessor { source: SqliteRow ): SqliteRow; - /** - * Throws errors. - */ - evaluateRow(options: EvaluateRowOptions): EvaluatedRow[]; - evaluateRowWithErrors(options: EvaluateRowOptions): { results: EvaluatedRow[]; errors: EvaluationError[] }; - /** - * Throws errors. - */ - evaluateParameterRow(table: SourceTableInterface, row: SqliteRow): EvaluatedParameters[]; evaluateParameterRowWithErrors( table: SourceTableInterface, row: SqliteRow From 7acc0f774a64123024072778d9209f9e25d11a6b Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 15 Jan 2026 12:35:07 +0200 Subject: [PATCH 039/101] Fix parameter row filtering. --- .../src/storage/implementation/MergedSyncRules.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts index 99356a9a8..6e5d6b60d 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts @@ -145,7 +145,6 @@ export class MergedSyncRules implements RowProcessor { // In that case, we don't want to re-evaluate all existing sources, only the new one. // FIXME: Fix performance - don't scan all sources - // And maybe re-use getMatchingSources? const table = options.sourceTable; if (!(table instanceof SourceTable)) { throw new ReplicationAssertionError(`Expected SourceTable instance`); @@ -165,8 +164,13 @@ export class MergedSyncRules implements RowProcessor { table: SourceTableInterface, row: SqliteRow ): { results: EvaluatedParameters[]; errors: EvaluationError[] } { + // FIXME: Fix performance - don't scan all sources + + if (!(table instanceof SourceTable)) { + throw new ReplicationAssertionError(`Expected SourceTable instance`); + } const parameterIndexLookupCreators = [...this.resolvedParameterLookupSources.values()].filter((ds) => - ds.source.tableSyncsParameters(table) + table.parameterLookupSourceIds.includes(ds.id) ); const rawResults: EvaluatedParametersResult[] = parameterIndexLookupCreators.flatMap((creator) => creator.evaluate(table, row) From 24ba4d063d6be940efe8b1baa8f92d5dd0ec236a Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 15 Jan 2026 13:39:27 +0200 Subject: [PATCH 040/101] Minor code simplification. --- packages/sync-rules/src/SqlSyncRules.ts | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/packages/sync-rules/src/SqlSyncRules.ts b/packages/sync-rules/src/SqlSyncRules.ts index ce2031b1f..bd27eeca0 100644 --- a/packages/sync-rules/src/SqlSyncRules.ts +++ b/packages/sync-rules/src/SqlSyncRules.ts @@ -419,21 +419,18 @@ export class SqlSyncRules { const sourceTables = new Map(); for (const bucket of this.bucketDataSources) { for (const r of bucket.getSourceTables()) { - const key = `${r.connectionTag}.${r.schema}.${r.tablePattern}`; - sourceTables.set(key, r); + sourceTables.set(r.key, r); } } for (const bucket of this.bucketParameterLookupSources) { for (const r of bucket.getSourceTables()) { - const key = `${r.connectionTag}.${r.schema}.${r.tablePattern}`; - sourceTables.set(key, r); + sourceTables.set(r.key, r); } } for (const event of this.eventDescriptors) { for (const r of event.getSourceTables()) { - const key = `${r.connectionTag}.${r.schema}.${r.tablePattern}`; - sourceTables.set(key, r); + sourceTables.set(r.key, r); } } From 54e5f5975344a62adb9404d103a6f68aa6f1e44d Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 15 Jan 2026 13:42:27 +0200 Subject: [PATCH 041/101] Use TablePattern[] instead of Set. Set doesn't make sense since we cannot specify a custom comparison function. --- packages/sync-rules/src/BucketSource.ts | 4 ++-- packages/sync-rules/src/SqlBucketDescriptor.ts | 6 +++--- packages/sync-rules/src/SqlParameterQuery.ts | 4 ++-- .../sync-rules/src/TableValuedFunctionSqlParameterQuery.ts | 2 +- packages/sync-rules/src/events/SqlEventDescriptor.ts | 6 +++--- packages/sync-rules/src/streams/filter.ts | 6 ++---- packages/sync-rules/src/streams/stream.ts | 4 ++-- packages/sync-rules/test/src/util.ts | 4 ++-- 8 files changed, 17 insertions(+), 19 deletions(-) diff --git a/packages/sync-rules/src/BucketSource.ts b/packages/sync-rules/src/BucketSource.ts index c84e0bca1..bdec256d8 100644 --- a/packages/sync-rules/src/BucketSource.ts +++ b/packages/sync-rules/src/BucketSource.ts @@ -104,7 +104,7 @@ export interface BucketDataSource { */ readonly bucketParameters: string[]; - getSourceTables(): Set; + getSourceTables(): TablePattern[]; tableSyncsData(table: SourceTableInterface): boolean; /** @@ -137,7 +137,7 @@ export interface ParameterIndexLookupCreator { */ readonly defaultLookupScope: ParameterLookupScope; - getSourceTables(): Set; + getSourceTables(): TablePattern[]; /** * Given a row in a source table that affects sync parameters, returns a structure to index which buckets rows should diff --git a/packages/sync-rules/src/SqlBucketDescriptor.ts b/packages/sync-rules/src/SqlBucketDescriptor.ts index 04eb7f590..e1522fd8f 100644 --- a/packages/sync-rules/src/SqlBucketDescriptor.ts +++ b/packages/sync-rules/src/SqlBucketDescriptor.ts @@ -190,10 +190,10 @@ export class BucketDefinitionDataSource implements BucketDataSource { return results; } - getSourceTables(): Set { - let result = new Set(); + getSourceTables(): TablePattern[] { + let result: TablePattern[] = []; for (let query of this.descriptor.dataQueries) { - result.add(query.sourceTable); + result.push(query.sourceTable); } return result; } diff --git a/packages/sync-rules/src/SqlParameterQuery.ts b/packages/sync-rules/src/SqlParameterQuery.ts index 08b04e591..024ca5d00 100644 --- a/packages/sync-rules/src/SqlParameterQuery.ts +++ b/packages/sync-rules/src/SqlParameterQuery.ts @@ -346,8 +346,8 @@ export class SqlParameterQuery implements ParameterIndexLookupCreator { return this.sourceTable.matches(table); } - getSourceTables(): Set { - return new Set([this.sourceTable]); + getSourceTables() { + return [this.sourceTable]; } createParameterQuerierSource(params: CreateSourceParams): BucketParameterQuerierSource { diff --git a/packages/sync-rules/src/TableValuedFunctionSqlParameterQuery.ts b/packages/sync-rules/src/TableValuedFunctionSqlParameterQuery.ts index 1fa5a3008..2e0ec97e6 100644 --- a/packages/sync-rules/src/TableValuedFunctionSqlParameterQuery.ts +++ b/packages/sync-rules/src/TableValuedFunctionSqlParameterQuery.ts @@ -224,7 +224,7 @@ export class TableValuedFunctionSqlParameterQuery { } getSourceTables() { - return new Set(); + return []; } tableSyncsParameters(_table: SourceTableInterface): boolean { diff --git a/packages/sync-rules/src/events/SqlEventDescriptor.ts b/packages/sync-rules/src/events/SqlEventDescriptor.ts index 33c34b98e..77c908d83 100644 --- a/packages/sync-rules/src/events/SqlEventDescriptor.ts +++ b/packages/sync-rules/src/events/SqlEventDescriptor.ts @@ -53,10 +53,10 @@ export class SqlEventDescriptor { return matchingQuery.evaluateRowWithErrors(options.sourceTable, options.record); } - getSourceTables(): Set { - let result = new Set(); + getSourceTables(): TablePattern[] { + let result: TablePattern[] = []; for (let query of this.sourceQueries) { - result.add(query.sourceTable!); + result.push(query.sourceTable!); } return result; } diff --git a/packages/sync-rules/src/streams/filter.ts b/packages/sync-rules/src/streams/filter.ts index e211f377e..9f6f5259c 100644 --- a/packages/sync-rules/src/streams/filter.ts +++ b/packages/sync-rules/src/streams/filter.ts @@ -548,10 +548,8 @@ export class SubqueryParameterLookupSource implements ParameterIndexLookupCreato }; } - getSourceTables(): Set { - let result = new Set(); - result.add(this.parameterTable); - return result; + getSourceTables() { + return [this.parameterTable]; } /** diff --git a/packages/sync-rules/src/streams/stream.ts b/packages/sync-rules/src/streams/stream.ts index 3394cfa51..6c107dfe7 100644 --- a/packages/sync-rules/src/streams/stream.ts +++ b/packages/sync-rules/src/streams/stream.ts @@ -96,8 +96,8 @@ export class SyncStreamDataSource implements BucketDataSource { return this.variant.defaultBucketPrefix(this.stream.name); } - getSourceTables(): Set { - return new Set([this.data.sourceTable]); + getSourceTables() { + return [this.data.sourceTable]; } tableSyncsData(table: SourceTableInterface): boolean { diff --git a/packages/sync-rules/test/src/util.ts b/packages/sync-rules/test/src/util.ts index 6d2a9efa8..7a4188888 100644 --- a/packages/sync-rules/test/src/util.ts +++ b/packages/sync-rules/test/src/util.ts @@ -95,8 +95,8 @@ export const EMPTY_DATA_SOURCE: BucketDataSource = { uniqueName: 'mybucket', bucketParameters: [], // These are not used in the tests. - getSourceTables: function (): Set { - return new Set(); + getSourceTables: function (): TablePattern[] { + return []; }, evaluateRow(options) { throw new Error('Function not implemented.'); From bb9fb76dee8b2fc04c0bb0f422e03d63381c8605 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 19 Jan 2026 16:30:16 +0200 Subject: [PATCH 042/101] Refactor to pull through TablePattern. --- .../storage/implementation/MergedSyncRules.ts | 9 +-- .../implementation/MongoBucketBatch.ts | 12 ++-- .../src/replication/ChangeStream.ts | 62 ++++++++++++------- .../src/replication/MongoSnapshotter.ts | 3 +- .../src/test-utils/general-utils.ts | 3 +- .../service-core/src/storage/SourceTable.ts | 11 +++- .../src/storage/SyncRulesBucketStorage.ts | 4 +- packages/sync-rules/src/HydratedSyncRules.ts | 11 ++-- 8 files changed, 70 insertions(+), 45 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts index 6e5d6b60d..bc7c0fe25 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts @@ -102,22 +102,23 @@ export class MergedSyncRules implements RowProcessor { /** * - * @param table The source database table definition, _not_ the individually derived SourceTables. + * @param pattern The source database table definition, _not_ the individually derived SourceTables. * @returns */ - getMatchingSources(table: SourceTableInterface): { + getMatchingSources(pattern: TablePattern): { bucketDataSources: BucketDataSource[]; parameterIndexLookupCreators: ParameterIndexLookupCreator[]; } { + // FIXME: Fix performance - don't scan all sources const bucketDataSources = [...this.resolvedDataSources.values()] .map((dataSource) => dataSource.source) - .filter((ds) => ds.tableSyncsData(table)); + .filter((ds) => ds.getSourceTables().some((table) => table.equals(pattern))); const parameterIndexLookupCreators: ParameterIndexLookupCreator[] = [ ...this.resolvedParameterLookupSources.values() ] .map((dataSource) => dataSource.source) - .filter((ds) => ds.tableSyncsParameters(table)); + .filter((ds) => ds.getSourceTables().some((table) => table.equals(pattern))); return { bucketDataSources, parameterIndexLookupCreators diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 35f7739aa..7df0ece1a 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -284,7 +284,8 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { replicaIdColumns: ref.replicaIdColumns, snapshotComplete: doc.snapshot_done ?? true, bucketDataSourceIds: doc.bucket_data_source_ids ?? [], - parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? [] + parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? [], + pattern: ref.pattern }); sourceTable.snapshotStatus = doc.snapshot_status == null @@ -302,11 +303,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { } async resolveTables(options: storage.ResolveTablesOptions): Promise { - const sources = this.rowProcessor.getMatchingSources({ - connectionTag: options.connection_tag, - name: options.entity_descriptor.name, - schema: options.entity_descriptor.schema - }); + const sources = this.rowProcessor.getMatchingSources(options.pattern); const bucketDataSourceIds = sources.bucketDataSources.map((source) => this.mapping.bucketSourceId(source)); const parameterLookupSourceIds = sources.parameterIndexLookupCreators.map((source) => this.mapping.parameterLookupId(source) @@ -390,7 +387,8 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { replicaIdColumns: replicaIdColumns, snapshotComplete: doc.snapshot_done ?? true, bucketDataSourceIds: doc.bucket_data_source_ids ?? [], - parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? [] + parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? [], + pattern: options.pattern }); sourceTable.snapshotStatus = doc.snapshot_status == null diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 5c3fd73d2..5d2051c1e 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -474,37 +474,51 @@ export class ChangeStream { // Ignore the postImages check in this case. } - const result = await writer.resolveTables({ - connection_id: this.connection_id, - connection_tag: this.connections.connectionTag, - entity_descriptor: descriptor + // FIXME: Optimize - avoid scanning all source tables + const patterns = writer.rowProcessor.getSourceTables().filter((t) => { + return t.matches({ + connectionTag: this.connections.connectionTag, + schema: descriptor.schema, + name: descriptor.name + }); }); - const snapshot = options.snapshot; - this.relationCache.set(getCacheIdentifier(descriptor), result.tables); + let allTables: SourceTable[] = []; + for (let pattern of patterns) { + const result = await writer.resolveTables({ + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: descriptor, + pattern + }); - // Drop conflicting collections. - // This is generally not expected for MongoDB source dbs, so we log an error. - if (result.dropTables.length > 0) { - this.logger.error( - `Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}` - ); - await writer.drop(result.dropTables); - } + const snapshot = options.snapshot; + this.relationCache.set(getCacheIdentifier(descriptor), result.tables); - // Snapshot if: - // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) - // 2. Snapshot is not already done, AND: - // 3. The table is used in sync rules. - for (let table of result.tables) { - const shouldSnapshot = snapshot && !table.snapshotComplete && table.syncAny; - if (shouldSnapshot) { - this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); - await this.snapshotter.queueSnapshot(writer, table); + // Drop conflicting collections. + // This is generally not expected for MongoDB source dbs, so we log an error. + if (result.dropTables.length > 0) { + this.logger.error( + `Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}` + ); + await writer.drop(result.dropTables); + } + + // Snapshot if: + // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) + // 2. Snapshot is not already done, AND: + // 3. The table is used in sync rules. + for (let table of result.tables) { + const shouldSnapshot = snapshot && !table.snapshotComplete && table.syncAny; + if (shouldSnapshot) { + this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); + await this.snapshotter.queueSnapshot(writer, table); + } } + allTables.push(...result.tables); } - return result.tables; + return allTables; } private async drop(writer: storage.BucketDataWriter, entity: SourceEntityDescriptor): Promise { diff --git a/modules/module-mongodb/src/replication/MongoSnapshotter.ts b/modules/module-mongodb/src/replication/MongoSnapshotter.ts index c458fb99c..209596db1 100644 --- a/modules/module-mongodb/src/replication/MongoSnapshotter.ts +++ b/modules/module-mongodb/src/replication/MongoSnapshotter.ts @@ -266,7 +266,8 @@ export class MongoSnapshotter { const sourceTables = await writer.resolveTables({ connection_id: this.connection_id, connection_tag: this.connections.connectionTag, - entity_descriptor: getMongoRelation({ db: schema, coll: collection.name }) + entity_descriptor: getMongoRelation({ db: schema, coll: collection.name }), + pattern: tablePattern }); // TODO: dropTables? result.push(...sourceTables.tables); diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index fdecc59c3..1259d580c 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -140,10 +140,9 @@ export function bucketRequest( const source = parsed.sync_rules.bucketDataSources.find((b) => b.uniqueName === definitionName); if (source == null) { - throw new Error('Failed to find global bucket'); + throw new Error(`Failed to find global bucket ${bucket}`); } const bucketName = hydrationState.getBucketSourceScope(source).bucketPrefix + parameters; - console.log('query for bucket', bucketName); return { bucket: bucketName, start: BigInt(start ?? 0n), diff --git a/packages/service-core/src/storage/SourceTable.ts b/packages/service-core/src/storage/SourceTable.ts index 2a5eb3509..fcef47347 100644 --- a/packages/service-core/src/storage/SourceTable.ts +++ b/packages/service-core/src/storage/SourceTable.ts @@ -1,4 +1,4 @@ -import { DEFAULT_TAG } from '@powersync/service-sync-rules'; +import { DEFAULT_TAG, TablePattern } from '@powersync/service-sync-rules'; import * as util from '../util/util-index.js'; import { ColumnDescriptor, SourceEntityDescriptor } from './SourceEntity.js'; import { bson } from '../index.js'; @@ -19,6 +19,8 @@ export interface SourceTableOptions { bucketDataSourceIds?: number[]; parameterLookupSourceIds?: number[]; + // FIXME: Make required once all code is updated + pattern?: TablePattern; } export interface TableSnapshotStatus { @@ -113,6 +115,10 @@ export class SourceTable implements SourceEntityDescriptor { return this.options.parameterLookupSourceIds ?? []; } + get pattern() { + return this.options.pattern; + } + /** * In-memory clone of the table status. */ @@ -124,7 +130,8 @@ export class SourceTable implements SourceEntityDescriptor { schema: this.schema, name: this.name, replicaIdColumns: this.replicaIdColumns, - snapshotComplete: this.snapshotComplete + snapshotComplete: this.snapshotComplete, + pattern: this.pattern }); copy.syncData = this.syncData; copy.syncParameters = this.syncParameters; diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index cfcf4678e..8bb4fe0f0 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -3,7 +3,8 @@ import { BucketDataSource, HydratedSyncRules, ScopedParameterLookup, - SqliteJsonRow + SqliteJsonRow, + TablePattern } from '@powersync/service-sync-rules'; import * as util from '../util/util-index.js'; import { BucketStorageBatch, FlushedResult, SaveUpdate } from './BucketStorageBatch.js'; @@ -169,6 +170,7 @@ export interface ResolveTablesOptions { connection_id: number; connection_tag: string; entity_descriptor: SourceEntityDescriptor; + pattern: TablePattern; } export interface ResolveTableOptions { diff --git a/packages/sync-rules/src/HydratedSyncRules.ts b/packages/sync-rules/src/HydratedSyncRules.ts index 3e9381cb6..3e7266523 100644 --- a/packages/sync-rules/src/HydratedSyncRules.ts +++ b/packages/sync-rules/src/HydratedSyncRules.ts @@ -32,7 +32,7 @@ export interface RowProcessor { getSourceTables(): TablePattern[]; - getMatchingSources(table: SourceTableInterface): { + getMatchingSources(pattern: TablePattern): { bucketDataSources: BucketDataSource[]; parameterIndexLookupCreators: ParameterIndexLookupCreator[]; }; @@ -94,13 +94,16 @@ export class HydratedSyncRules implements RowProcessor { this.bucketSources = this.definition.bucketSources.map((source) => source.hydrate(params.createParams)); } - getMatchingSources(table: SourceTableInterface): { + getMatchingSources(pattern: TablePattern): { bucketDataSources: BucketDataSource[]; parameterIndexLookupCreators: ParameterIndexLookupCreator[]; } { - const bucketDataSources = this.bucketDataSources.filter((ds) => ds.tableSyncsData(table)); + // FIXME: Fix performance - don't scan all sources + const bucketDataSources = this.bucketDataSources.filter((ds) => + ds.getSourceTables().some((table) => table.equals(pattern)) + ); const parameterIndexLookupCreators: ParameterIndexLookupCreator[] = this.bucketParameterIndexLookupCreators.filter( - (ds) => ds.tableSyncsParameters(table) + (ds) => ds.getSourceTables().some((table) => table.equals(pattern)) ); return { bucketDataSources, From 13d2a31d38e8327aa91256b1a4bbdc161775f6e2 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 19 Jan 2026 17:01:03 +0200 Subject: [PATCH 043/101] Improve lookup performance in some cases. --- .../storage/implementation/MergedSyncRules.ts | 122 ++++++++++++------ .../src/replication/ChangeStream.ts | 17 +-- .../test/src/change_stream.test.ts | 5 +- packages/sync-rules/src/HydratedSyncRules.ts | 18 ++- 4 files changed, 112 insertions(+), 50 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts index bc7c0fe25..340c2557d 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts @@ -22,6 +22,7 @@ import { SqliteRow, SqliteValue, SqlSyncRules, + TableDataSources, TablePattern } from '@powersync/service-sync-rules'; import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; @@ -53,9 +54,22 @@ export class MergedSyncRules implements RowProcessor { private resolvedDataSources: Map; private resolvedParameterLookupSources: Map; - private sourcePatterns: TablePattern[]; + + // keyed by TablePattern.key + private tableDataSources: Map = new Map(); + private allSyncRules: SqlSyncRules[]; + // all table patterns + private sourcePatterns: TablePattern[]; + // sourcePatterns, non-wildcard, keyed by patternKey() + private indexedPatterns: Map = new Map(); + // all wildcard patterns + private wildcardPatterns: TablePattern[] = []; + + eventDescriptors: SqlEventDescriptor[] = []; + compatibility: CompatibilityContext = CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY; + constructor(sources: MongoPersistedSyncRules[]) { let resolvedDataSources = new Map(); let resolvedParameterLookupSources = new Map(); @@ -76,12 +90,12 @@ export class MergedSyncRules implements RowProcessor { } const evaluate = hydrateEvaluateRow(hydrationState, source); resolvedDataSources.set(id, { source, evaluate, id }); - } - for (let pattern of syncRules.getSourceTables()) { - const key = pattern.key; - if (!sourcePatternMap.has(key)) { - sourcePatternMap.set(key, pattern); + for (let pattern of source.getSourceTables()) { + if (!this.tableDataSources.has(pattern.key)) { + this.tableDataSources.set(pattern.key, { bucketDataSources: [], parameterIndexLookupCreators: [] }); + } + this.tableDataSources.get(pattern.key)!.bucketDataSources.push(source); } } @@ -93,11 +107,37 @@ export class MergedSyncRules implements RowProcessor { const withScope = hydrateEvaluateParameterRow(hydrationState, source); resolvedParameterLookupSources.set(id, { source, id, evaluate: withScope }); + + for (let pattern of source.getSourceTables()) { + if (!this.tableDataSources.has(pattern.key)) { + this.tableDataSources.set(pattern.key, { bucketDataSources: [], parameterIndexLookupCreators: [] }); + } + this.tableDataSources.get(pattern.key)!.parameterIndexLookupCreators.push(source); + } + } + + for (let pattern of syncRules.getSourceTables()) { + const key = pattern.key; + if (!sourcePatternMap.has(key)) { + sourcePatternMap.set(key, pattern); + } } } this.resolvedDataSources = resolvedDataSources; this.resolvedParameterLookupSources = resolvedParameterLookupSources; this.sourcePatterns = Array.from(sourcePatternMap.values()); + + for (let pattern of this.sourcePatterns) { + if (pattern.isWildcard) { + this.wildcardPatterns.push(pattern); + } else { + const key = patternKey(pattern); + if (!this.indexedPatterns.has(key)) { + this.indexedPatterns.set(key, []); + } + this.indexedPatterns.get(key)!.push(pattern); + } + } } /** @@ -105,33 +145,27 @@ export class MergedSyncRules implements RowProcessor { * @param pattern The source database table definition, _not_ the individually derived SourceTables. * @returns */ - getMatchingSources(pattern: TablePattern): { - bucketDataSources: BucketDataSource[]; - parameterIndexLookupCreators: ParameterIndexLookupCreator[]; - } { - // FIXME: Fix performance - don't scan all sources - const bucketDataSources = [...this.resolvedDataSources.values()] - .map((dataSource) => dataSource.source) - .filter((ds) => ds.getSourceTables().some((table) => table.equals(pattern))); - - const parameterIndexLookupCreators: ParameterIndexLookupCreator[] = [ - ...this.resolvedParameterLookupSources.values() - ] - .map((dataSource) => dataSource.source) - .filter((ds) => ds.getSourceTables().some((table) => table.equals(pattern))); - return { - bucketDataSources, - parameterIndexLookupCreators - }; + getMatchingSources(pattern: TablePattern): TableDataSources { + return this.tableDataSources.get(pattern.key) ?? { bucketDataSources: [], parameterIndexLookupCreators: [] }; } - eventDescriptors: SqlEventDescriptor[] = []; - compatibility: CompatibilityContext = CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY; - getSourceTables(): TablePattern[] { return this.sourcePatterns; } + getMatchingTablePatterns(table: SourceTableInterface): TablePattern[] { + // Equivalent to: + // return this.sourcePatterns.filter((pattern) => pattern.matches(table)); + const tables = this.indexedPatterns.get(patternKey(table)) ?? []; + if (this.wildcardPatterns.length === 0) { + // Fast path - no wildcards + return tables; + } else { + const matchedPatterns = this.wildcardPatterns.filter((pattern) => pattern.matches(table)); + return [...tables, ...matchedPatterns]; + } + } + applyRowContext( source: SqliteRow ): SqliteRow { @@ -145,14 +179,18 @@ export class MergedSyncRules implements RowProcessor { // 2. For re-replication: We may take a snapshot when adding a new source, with a new SourceTable. // In that case, we don't want to re-evaluate all existing sources, only the new one. - // FIXME: Fix performance - don't scan all sources const table = options.sourceTable; + // FIXME: Fix API to not require this type assertion if (!(table instanceof SourceTable)) { throw new ReplicationAssertionError(`Expected SourceTable instance`); } - const bucketDataSources = [...this.resolvedDataSources.values()].filter((ds) => - table.bucketDataSourceIds.includes(ds.id) - ); + const bucketDataSources: ResolvedDataSource[] = []; + for (let sourceId of table.bucketDataSourceIds) { + const ds = this.resolvedDataSources.get(sourceId); + if (ds) { + bucketDataSources.push(ds); + } + } const rawResults: EvaluationResult[] = bucketDataSources.flatMap((dataSource) => dataSource.evaluate(options)); const results = rawResults.filter(isEvaluatedRow) as EvaluatedRow[]; @@ -165,14 +203,17 @@ export class MergedSyncRules implements RowProcessor { table: SourceTableInterface, row: SqliteRow ): { results: EvaluatedParameters[]; errors: EvaluationError[] } { - // FIXME: Fix performance - don't scan all sources - + // FIXME: Fix API to not require this type assertion if (!(table instanceof SourceTable)) { throw new ReplicationAssertionError(`Expected SourceTable instance`); } - const parameterIndexLookupCreators = [...this.resolvedParameterLookupSources.values()].filter((ds) => - table.parameterLookupSourceIds.includes(ds.id) - ); + let parameterIndexLookupCreators: ResolvedParameterLookupSource[] = []; + for (let sourceId of table.parameterLookupSourceIds) { + const ds = this.resolvedParameterLookupSources.get(sourceId); + if (ds) { + parameterIndexLookupCreators.push(ds); + } + } const rawResults: EvaluatedParametersResult[] = parameterIndexLookupCreators.flatMap((creator) => creator.evaluate(table, row) ); @@ -181,3 +222,12 @@ export class MergedSyncRules implements RowProcessor { return { results, errors }; } } + +/** + * Key for a pattern or source table. + * + * Does not support wildcard patterns. + */ +function patternKey(pattern: TablePattern | SourceTableInterface): string { + return JSON.stringify([pattern.connectionTag, pattern.schema, pattern.name]); +} diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 5d2051c1e..1a1d2d7ef 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -474,13 +474,14 @@ export class ChangeStream { // Ignore the postImages check in this case. } - // FIXME: Optimize - avoid scanning all source tables - const patterns = writer.rowProcessor.getSourceTables().filter((t) => { - return t.matches({ - connectionTag: this.connections.connectionTag, - schema: descriptor.schema, - name: descriptor.name - }); + // In common cases, there would be at most one matching pattern, since patterns + // are de-duplicated. However, there may be multiple if: + // 1. There is overlap with direct name matching and wildcard matching. + // 2. There are multiple patterns with different replication config. + const patterns = writer.rowProcessor.getMatchingTablePatterns({ + connectionTag: this.connections.connectionTag, + schema: descriptor.schema, + name: descriptor.name }); let allTables: SourceTable[] = []; @@ -493,7 +494,6 @@ export class ChangeStream { }); const snapshot = options.snapshot; - this.relationCache.set(getCacheIdentifier(descriptor), result.tables); // Drop conflicting collections. // This is generally not expected for MongoDB source dbs, so we log an error. @@ -517,6 +517,7 @@ export class ChangeStream { } allTables.push(...result.tables); } + this.relationCache.set(getCacheIdentifier(descriptor), allTables); return allTables; } diff --git a/modules/module-mongodb/test/src/change_stream.test.ts b/modules/module-mongodb/test/src/change_stream.test.ts index e3d66e3b5..c6b9dd8ee 100644 --- a/modules/module-mongodb/test/src/change_stream.test.ts +++ b/modules/module-mongodb/test/src/change_stream.test.ts @@ -443,10 +443,11 @@ bucket_definitions: const data = await context.getBucketData('global[]'); // Either case is valid here if (data.length == 3) { - expect(data).toMatchObject([ + expect(data.sort((a, b) => a.object_id?.localeCompare(b.object_id!) ?? 0)).toMatchObject([ // An extra op here, since this triggers a snapshot in addition to getting the event. - test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test2' }), + // Can be either test1, test2, test2 or test2, test1, test2 test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test1' }), + test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test2' }), test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test2' }) ]); } else { diff --git a/packages/sync-rules/src/HydratedSyncRules.ts b/packages/sync-rules/src/HydratedSyncRules.ts index 3e7266523..14081795a 100644 --- a/packages/sync-rules/src/HydratedSyncRules.ts +++ b/packages/sync-rules/src/HydratedSyncRules.ts @@ -32,10 +32,9 @@ export interface RowProcessor { getSourceTables(): TablePattern[]; - getMatchingSources(pattern: TablePattern): { - bucketDataSources: BucketDataSource[]; - parameterIndexLookupCreators: ParameterIndexLookupCreator[]; - }; + getMatchingTablePatterns(table: SourceTableInterface): TablePattern[]; + + getMatchingSources(pattern: TablePattern): TableDataSources; applyRowContext( source: SqliteRow @@ -49,6 +48,11 @@ export interface RowProcessor { ): { results: EvaluatedParameters[]; errors: EvaluationError[] }; } +export interface TableDataSources { + bucketDataSources: BucketDataSource[]; + parameterIndexLookupCreators: ParameterIndexLookupCreator[]; +} + /** * Hydrated sync rules is sync rule definitions along with persisted state. Currently, the persisted state * specifically affects bucket names. @@ -111,6 +115,12 @@ export class HydratedSyncRules implements RowProcessor { }; } + getMatchingTablePatterns(table: SourceTableInterface): TablePattern[] { + return this.definition.getSourceTables().filter((pattern) => { + return pattern.matches(table); + }); + } + // These methods do not depend on hydration, so we can just forward them to the definition. getSourceTables() { From b9d517d594ef915db37840dd6c0dd4b3cb03b9cf Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 20 Jan 2026 11:59:46 +0200 Subject: [PATCH 044/101] Re-add check for resumeToken order. --- .../implementation/MongoBucketBatch.ts | 2 +- .../src/replication/ChangeStream.ts | 28 +++++++++++-------- packages/sync-rules/src/HydratedSyncRules.ts | 1 + 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 7df0ece1a..883833824 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -1182,7 +1182,7 @@ export class MongoBucketBatch * * This is set when creating the batch, but may not be updated afterwards. */ - public resumeFromLsn: string | null = null; + public readonly resumeFromLsn: string | null = null; private needsActivation = true; diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 1a1d2d7ef..0b5275416 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -615,6 +615,11 @@ export class ChangeStream { let lastEmptyResume = performance.now(); + /** + * Used only for checking change stream order. + */ + let lastCheckpointLsn: string | null = null; + while (true) { if (this.abortSignal.aborted) { break; @@ -760,17 +765,17 @@ export class ChangeStream { timestamp: changeDocument.clusterTime!, resume_token: changeDocument._id }); - // FIXME: Implement this check again. We can't rely on batch.lastCheckpointLsn anymore. - // if (batch.lastCheckpointLsn != null && lsn < batch.lastCheckpointLsn) { - // // Checkpoint out of order - should never happen with MongoDB. - // // If it does happen, we throw an error to stop the replication - restarting should recover. - // // Since we use batch.lastCheckpointLsn for the next resumeAfter, this should not result in an infinite loop. - // // Originally a workaround for https://jira.mongodb.org/browse/NODE-7042. - // // This has been fixed in the driver in the meantime, but we still keep this as a safety-check. - // throw new ReplicationAssertionError( - // `Change resumeToken ${(changeDocument._id as any)._data} (${timestampToDate(changeDocument.clusterTime!).toISOString()}) is less than last checkpoint LSN ${batch.lastCheckpointLsn}. Restarting replication.` - // ); - // } + + if (lastCheckpointLsn != null && lsn < lastCheckpointLsn) { + // Checkpoint out of order - should never happen with MongoDB. + // If it does happen, we throw an error to stop the replication - restarting should recover. + // Originally a workaround for https://jira.mongodb.org/browse/NODE-7042. + // This has been fixed in the driver in the meantime, but we still keep this as a safety-check. + throw new ReplicationAssertionError( + `Change resumeToken ${(changeDocument._id as any)._data} (${timestampToDate(changeDocument.clusterTime!).toISOString()}) is less than last seen LSN ${lastCheckpointLsn}. Restarting replication.` + ); + } + lastCheckpointLsn = lsn; if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) { waitForCheckpointLsn = null; @@ -778,7 +783,6 @@ export class ChangeStream { const didCommit = await writer.commitAll(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); if (didCommit) { - // TODO: Re-check this logic this.oldestUncommittedChange = null; this.isStartingReplication = false; changesSinceLastCheckpoint = 0; diff --git a/packages/sync-rules/src/HydratedSyncRules.ts b/packages/sync-rules/src/HydratedSyncRules.ts index 14081795a..8cb722533 100644 --- a/packages/sync-rules/src/HydratedSyncRules.ts +++ b/packages/sync-rules/src/HydratedSyncRules.ts @@ -103,6 +103,7 @@ export class HydratedSyncRules implements RowProcessor { parameterIndexLookupCreators: ParameterIndexLookupCreator[]; } { // FIXME: Fix performance - don't scan all sources + // Or just merge implementations with MergedSyncRules const bucketDataSources = this.bucketDataSources.filter((ds) => ds.getSourceTables().some((table) => table.equals(pattern)) ); From 048675f3ede2bf960b8eda4452287dfae558b2df Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 20 Jan 2026 12:11:55 +0200 Subject: [PATCH 045/101] Fix batch logic in tests. --- modules/module-mongodb/test/src/change_stream_utils.ts | 2 +- modules/module-mssql/test/src/CDCStreamTestContext.ts | 2 +- modules/module-postgres/test/src/wal_stream_utils.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/module-mongodb/test/src/change_stream_utils.ts b/modules/module-mongodb/test/src/change_stream_utils.ts index cdc430876..e7c6c4607 100644 --- a/modules/module-mongodb/test/src/change_stream_utils.ts +++ b/modules/module-mongodb/test/src/change_stream_utils.ts @@ -208,7 +208,7 @@ export class ChangeStreamTestContext { if (batches.length == 0 || !batches[0]!.chunkData.has_more) { break; } - map = [bucketRequest(syncRules, bucket, start)]; + map = [bucketRequest(syncRules, bucket, BigInt(batches[0]!.chunkData.next_after))]; } return data; } diff --git a/modules/module-mssql/test/src/CDCStreamTestContext.ts b/modules/module-mssql/test/src/CDCStreamTestContext.ts index d3a3f855a..8572b1aac 100644 --- a/modules/module-mssql/test/src/CDCStreamTestContext.ts +++ b/modules/module-mssql/test/src/CDCStreamTestContext.ts @@ -193,7 +193,7 @@ export class CDCStreamTestContext implements AsyncDisposable { if (batches.length == 0 || !batches[0]!.chunkData.has_more) { break; } - map = [bucketRequest(syncRules, bucket, start)]; + map = [bucketRequest(syncRules, bucket, BigInt(batches[0]!.chunkData.next_after))]; } return data; } diff --git a/modules/module-postgres/test/src/wal_stream_utils.ts b/modules/module-postgres/test/src/wal_stream_utils.ts index ba615b7b2..c7ada271a 100644 --- a/modules/module-postgres/test/src/wal_stream_utils.ts +++ b/modules/module-postgres/test/src/wal_stream_utils.ts @@ -195,7 +195,7 @@ export class WalStreamTestContext implements AsyncDisposable { if (batches.length == 0 || !batches[0]!.chunkData.has_more) { break; } - map = [bucketRequest(syncRules, bucket, start)]; + map = [bucketRequest(syncRules, bucket, BigInt(batches[0]!.chunkData.next_after))]; } return data; } From 08089fd728e08832b7d819661425620aed8a4731 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 20 Jan 2026 12:43:04 +0200 Subject: [PATCH 046/101] Fix metadata going missing. --- .../module-mongodb-storage/src/storage/MongoBucketStorage.ts | 3 ++- packages/service-core/src/storage/SourceTable.ts | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts index d8f380ecd..5587f1bdb 100644 --- a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts @@ -90,7 +90,8 @@ export class MongoBucketStorage rowProcessor: mergedProcessor, skipExistingRows: options.skipExistingRows ?? false, slotName: '', - storeCurrentData: options.storeCurrentData + storeCurrentData: options.storeCurrentData, + logger: options.logger }); for (let storage of mongoStorages) { diff --git a/packages/service-core/src/storage/SourceTable.ts b/packages/service-core/src/storage/SourceTable.ts index fcef47347..bad7fb5ca 100644 --- a/packages/service-core/src/storage/SourceTable.ts +++ b/packages/service-core/src/storage/SourceTable.ts @@ -131,7 +131,9 @@ export class SourceTable implements SourceEntityDescriptor { name: this.name, replicaIdColumns: this.replicaIdColumns, snapshotComplete: this.snapshotComplete, - pattern: this.pattern + pattern: this.pattern, + bucketDataSourceIds: this.bucketDataSourceIds, + parameterLookupSourceIds: this.parameterLookupSourceIds }); copy.syncData = this.syncData; copy.syncParameters = this.syncParameters; From 370bbdd099a6c3ccd31c1417de5177e98693afac Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 20 Jan 2026 14:22:42 +0200 Subject: [PATCH 047/101] Fix detecting new changes. --- .../src/storage/implementation/MongoSyncBucketStorage.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 7f3871903..5670c8e26 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -918,7 +918,10 @@ export class MongoSyncBucketStorage .find( { // We have an index on (_id.g, last_op). - '_id.g': this.group_id, + // We cannot do a plain filter this on _id.g anymore, since that depends on the bucket definition. + // For now we leave out the filter. But we may need to either: + // 1. Add a new index purely on last_op, or + // 2. Use an $in on all relevant _id.g values (from the sync rules mapping). last_op: { $gt: options.lastCheckpoint.checkpoint } }, { From 3d9bbe84d190edb14b4dbd3acce3790be8ca7df1 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 20 Jan 2026 14:49:33 +0200 Subject: [PATCH 048/101] Initial round of storage test fixes. --- .../implementation/MongoBucketBatch.ts | 3 +- .../__snapshots__/storage_sync.test.ts.snap | 88 +-- .../src/test-utils/general-utils.ts | 38 +- .../src/tests/register-sync-tests.ts | 748 +++++++++--------- .../src/storage/SyncRulesBucketStorage.ts | 5 + 5 files changed, 445 insertions(+), 437 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 883833824..f7c16e742 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -359,8 +359,9 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { (id) => !coveredParameterLookupSourceIds.has(id) ); if (pendingBucketDataSourceIds.length > 0 || pendingParameterLookupSourceIds.length > 0) { + const id = options.idGenerator ? (options.idGenerator() as bson.ObjectId) : new bson.ObjectId(); const doc: SourceTableDocument = { - _id: new bson.ObjectId(), + _id: id, connection_id: connection_id, relation_id: objectId, schema_name: schema, diff --git a/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap b/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap index e3449a7c7..fb2585a0e 100644 --- a/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap +++ b/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap @@ -6,7 +6,7 @@ exports[`sync - mongodb > compacting data - invalidate checkpoint 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": -93886621, "count": 2, "priority": 3, @@ -104,13 +104,13 @@ exports[`sync - mongodb > compacting data - invalidate checkpoint 2`] = ` ] `; -exports[`sync - mongodb > encodes sync rules id in buckes for streams 1`] = ` +exports[`sync - mongodb > encodes sync rules id in buckets for streams 1`] = ` [ { "checkpoint": { "buckets": [ { - "bucket": "1#test|0[]", + "bucket": "10002[]", "checksum": 920318466, "count": 1, "priority": 3, @@ -135,7 +135,7 @@ exports[`sync - mongodb > encodes sync rules id in buckes for streams 1`] = ` { "data": { "after": "0", - "bucket": "1#test|0[]", + "bucket": "10002[]", "data": [ { "checksum": 920318466, @@ -159,13 +159,13 @@ exports[`sync - mongodb > encodes sync rules id in buckes for streams 1`] = ` ] `; -exports[`sync - mongodb > encodes sync rules id in buckes for streams 2`] = ` +exports[`sync - mongodb > encodes sync rules id in buckets for streams 2`] = ` [ { "checkpoint": { "buckets": [ { - "bucket": "2#test|0[]", + "bucket": "10002[]", "checksum": 920318466, "count": 1, "priority": 3, @@ -228,7 +228,7 @@ exports[`sync - mongodb > expiring token 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": 0, "count": 0, "priority": 3, @@ -272,7 +272,7 @@ exports[`sync - mongodb > sends checkpoint complete line for empty checkpoint 1` "checkpoint": { "buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": -1221282404, "count": 1, "priority": 3, @@ -297,7 +297,7 @@ exports[`sync - mongodb > sends checkpoint complete line for empty checkpoint 1` { "data": { "after": "0", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": 3073684892, @@ -341,7 +341,7 @@ exports[`sync - mongodb > sync buckets in order 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "b0[]", + "bucket": "10003[]", "checksum": 920318466, "count": 1, "priority": 2, @@ -352,7 +352,7 @@ exports[`sync - mongodb > sync buckets in order 1`] = ` ], }, { - "bucket": "b1[]", + "bucket": "10004[]", "checksum": -1382098757, "count": 1, "priority": 1, @@ -382,7 +382,7 @@ exports[`sync - mongodb > sync buckets in order 1`] = ` { "data": { "after": "0", - "bucket": "b1[]", + "bucket": "10004[]", "data": [ { "checksum": 2912868539, @@ -407,7 +407,7 @@ exports[`sync - mongodb > sync buckets in order 1`] = ` { "data": { "after": "0", - "bucket": "b0[]", + "bucket": "10003[]", "data": [ { "checksum": 920318466, @@ -437,7 +437,7 @@ exports[`sync - mongodb > sync global data 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": -93886621, "count": 2, "priority": 3, @@ -462,7 +462,7 @@ exports[`sync - mongodb > sync global data 1`] = ` { "data": { "after": "0", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": 920318466, @@ -501,7 +501,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint "checkpoint": { "buckets": [ { - "bucket": "b0a[]", + "bucket": "10004[]", "checksum": -659831575, "count": 2000, "priority": 2, @@ -512,7 +512,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint ], }, { - "bucket": "b0b[]", + "bucket": "10005[]", "checksum": -659831575, "count": 2000, "priority": 2, @@ -523,7 +523,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint ], }, { - "bucket": "b1[]", + "bucket": "10006[]", "checksum": -1096116670, "count": 1, "priority": 1, @@ -558,7 +558,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "0", - "bucket": "b1[]", + "bucket": "10006[]", "data": undefined, "has_more": false, "next_after": "1", @@ -573,7 +573,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "0", - "bucket": "b0a[]", + "bucket": "10004[]", "data": undefined, "has_more": true, "next_after": "2000", @@ -582,7 +582,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "2000", - "bucket": "b0a[]", + "bucket": "10004[]", "data": undefined, "has_more": true, "next_after": "4000", @@ -594,7 +594,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint "removed_buckets": [], "updated_buckets": [ { - "bucket": "b0a[]", + "bucket": "10004[]", "checksum": 883076828, "count": 2001, "priority": 2, @@ -605,7 +605,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint ], }, { - "bucket": "b0b[]", + "bucket": "10005[]", "checksum": 883076828, "count": 2001, "priority": 2, @@ -616,7 +616,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint ], }, { - "bucket": "b1[]", + "bucket": "10006[]", "checksum": 1841937527, "count": 2, "priority": 1, @@ -633,7 +633,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "1", - "bucket": "b1[]", + "bucket": "10006[]", "data": undefined, "has_more": false, "next_after": "4002", @@ -648,7 +648,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "4000", - "bucket": "b0a[]", + "bucket": "10004[]", "data": undefined, "has_more": false, "next_after": "4003", @@ -657,7 +657,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "0", - "bucket": "b0b[]", + "bucket": "10005[]", "data": undefined, "has_more": true, "next_after": "1999", @@ -666,7 +666,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "1999", - "bucket": "b0b[]", + "bucket": "10005[]", "data": undefined, "has_more": true, "next_after": "3999", @@ -675,7 +675,7 @@ exports[`sync - mongodb > sync interrupts low-priority buckets on new checkpoint { "data": { "after": "3999", - "bucket": "b0b[]", + "bucket": "10005[]", "data": undefined, "has_more": false, "next_after": "4004", @@ -695,7 +695,7 @@ exports[`sync - mongodb > sync legacy non-raw data 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": -852817836, "count": 1, "priority": 3, @@ -720,7 +720,7 @@ exports[`sync - mongodb > sync legacy non-raw data 1`] = ` { "data": { "after": "0", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": 3442149460n, @@ -755,7 +755,7 @@ exports[`sync - mongodb > sync updates to data query only 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "by_user["user1"]", + "bucket": "10002["user1"]", "checksum": 0, "count": 0, "priority": 3, @@ -793,7 +793,7 @@ exports[`sync - mongodb > sync updates to data query only 2`] = ` "removed_buckets": [], "updated_buckets": [ { - "bucket": "by_user["user1"]", + "bucket": "10002["user1"]", "checksum": 1418351250, "count": 1, "priority": 3, @@ -810,7 +810,7 @@ exports[`sync - mongodb > sync updates to data query only 2`] = ` { "data": { "after": "0", - "bucket": "by_user["user1"]", + "bucket": "10002["user1"]", "data": [ { "checksum": 1418351250, @@ -819,7 +819,7 @@ exports[`sync - mongodb > sync updates to data query only 2`] = ` "object_type": "lists", "op": "PUT", "op_id": "2", - "subkey": "0ffb7b58-d14d-5efa-be6c-c8eda74ab7a8", + "subkey": "ae9cbda1-5d8a-5a61-aaa4-366940758339", }, ], "has_more": false, @@ -840,7 +840,7 @@ exports[`sync - mongodb > sync updates to global data 1`] = ` "checkpoint": { "buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": 0, "count": 0, "priority": 3, @@ -878,7 +878,7 @@ exports[`sync - mongodb > sync updates to global data 2`] = ` "removed_buckets": [], "updated_buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": 920318466, "count": 1, "priority": 3, @@ -895,7 +895,7 @@ exports[`sync - mongodb > sync updates to global data 2`] = ` { "data": { "after": "0", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": 920318466, @@ -927,7 +927,7 @@ exports[`sync - mongodb > sync updates to global data 3`] = ` "removed_buckets": [], "updated_buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": -93886621, "count": 2, "priority": 3, @@ -944,7 +944,7 @@ exports[`sync - mongodb > sync updates to global data 3`] = ` { "data": { "after": "1", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": 3280762209, @@ -1000,7 +1000,7 @@ exports[`sync - mongodb > sync updates to parameter query + data 2`] = ` "removed_buckets": [], "updated_buckets": [ { - "bucket": "by_user["user1"]", + "bucket": "10002["user1"]", "checksum": 1418351250, "count": 1, "priority": 3, @@ -1017,7 +1017,7 @@ exports[`sync - mongodb > sync updates to parameter query + data 2`] = ` { "data": { "after": "0", - "bucket": "by_user["user1"]", + "bucket": "10002["user1"]", "data": [ { "checksum": 1418351250, @@ -1026,7 +1026,7 @@ exports[`sync - mongodb > sync updates to parameter query + data 2`] = ` "object_type": "lists", "op": "PUT", "op_id": "1", - "subkey": "0ffb7b58-d14d-5efa-be6c-c8eda74ab7a8", + "subkey": "ae9cbda1-5d8a-5a61-aaa4-366940758339", }, ], "has_more": false, @@ -1073,7 +1073,7 @@ exports[`sync - mongodb > sync updates to parameter query only 2`] = ` "removed_buckets": [], "updated_buckets": [ { - "bucket": "by_user["user1"]", + "bucket": "10002["user1"]", "checksum": 0, "count": 0, "priority": 3, diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index 1259d580c..eb9e11994 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -1,5 +1,5 @@ import { BucketDataRequest, InternalOpId, storage, sync, utils } from '@powersync/service-core'; -import { GetQuerierOptions, RequestParameters, SqlSyncRules } from '@powersync/service-sync-rules'; +import { GetQuerierOptions, RequestParameters, SqlSyncRules, TablePattern } from '@powersync/service-sync-rules'; import { versionedHydrationState } from '@powersync/service-sync-rules'; import * as bson from 'bson'; @@ -58,6 +58,42 @@ export function makeTestTable( }); } +export async function resolveTestTable( + writer: storage.BucketDataWriter, + name: string, + replicaIdColumns: string[] | undefined, + options: { tableIdStrings: boolean }, + idIndex: number = 1 +) { + const relId = utils.hashData('table', name, (replicaIdColumns ?? ['id']).join(',')); + const idString = '6544e3899293153fa7b383' + (30 + idIndex).toString().padStart(2, '0'); + + const id = options.tableIdStrings == false ? new bson.ObjectId(idString) : idString; + let didGenerateId = false; + const result = await writer.resolveTables({ + connection_id: 1, + connection_tag: storage.SourceTable.DEFAULT_TAG, + + entity_descriptor: { + name: name, + schema: 'public', + objectId: relId, + + replicaIdColumns: (replicaIdColumns ?? ['id']).map((column) => ({ name: column, type: 'VARCHAR', typeId: 25 })) + }, + pattern: new TablePattern('public', name), + idGenerator: () => { + if (didGenerateId) { + throw new Error('idGenerator called multiple times - not supported in tests'); + } + didGenerateId = true; + console.log('got id', name, id); + return id; + } + }); + return result.tables[0]; +} + export function getBatchData( batch: utils.SyncBucketData[] | storage.SyncBucketDataChunk[] | storage.SyncBucketDataChunk ) { diff --git a/packages/service-core-tests/src/tests/register-sync-tests.ts b/packages/service-core-tests/src/tests/register-sync-tests.ts index a764ac84e..ef011fc86 100644 --- a/packages/service-core-tests/src/tests/register-sync-tests.ts +++ b/packages/service-core-tests/src/tests/register-sync-tests.ts @@ -45,8 +45,6 @@ export function registerSyncTests(config: storage.TestStorageConfig) { maxDataFetchConcurrency: 2 }); - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); - test('sync global data', async () => { await using f = await factory(); @@ -55,33 +53,33 @@ export function registerSyncTests(config: storage.TestStorageConfig) { }); const bucketStorage = f.getInstance(syncRules); + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); + await writer.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'Test 1' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - description: 'Test 2' - }, - afterReplicaId: 't2' - }); + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'Test 1' + }, + afterReplicaId: 't1' + }); - await batch.commit('0/1'); + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + description: 'Test 2' + }, + afterReplicaId: 't2' }); + await writer.commitAll('0/1'); + const stream = sync.streamResponse({ syncContext, bucketStorage: bucketStorage, @@ -118,32 +116,32 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'Test 1' + }, + afterReplicaId: 't1' + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'Test 1' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'earlier', - description: 'Test 2' - }, - afterReplicaId: 'earlier' - }); - - await batch.commit('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'earlier', + description: 'Test 2' + }, + afterReplicaId: 'earlier' }); + await writer.commitAll('0/1'); + const stream = sync.streamResponse({ syncContext, bucketStorage, @@ -180,33 +178,33 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - // Initial data: Add one priority row and 10k low-priority rows. - await batch.save({ - sourceTable: TEST_TABLE, + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + // Initial data: Add one priority row and 10k low-priority rows. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio', + description: 'High priority row' + }, + afterReplicaId: 'highprio' + }); + for (let i = 0; i < 10_000; i++) { + await writer.save({ + sourceTable: testTable, tag: storage.SaveOperationTag.INSERT, after: { - id: 'highprio', - description: 'High priority row' + id: `${i}`, + description: 'low prio' }, - afterReplicaId: 'highprio' + afterReplicaId: `${i}` }); - for (let i = 0; i < 10_000; i++) { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: `${i}`, - description: 'low prio' - }, - afterReplicaId: `${i}` - }); - } + } - await batch.commit('0/1'); - }); + await writer.commitAll('0/1'); const stream = sync.streamResponse({ syncContext, @@ -234,20 +232,18 @@ bucket_definitions: if (sentCheckpoints == 1) { // Save new data to interrupt the low-priority sync. - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Add another high-priority row. This should interrupt the long-running low-priority sync. - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'highprio2', - description: 'Another high-priority row' - }, - afterReplicaId: 'highprio2' - }); - - await batch.commit('0/2'); + // Add another high-priority row. This should interrupt the long-running low-priority sync. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio2', + description: 'Another high-priority row' + }, + afterReplicaId: 'highprio2' }); + + await writer.commitAll('0/2'); } else { // Low-priority sync from the first checkpoint was interrupted. This should not happen before // 1000 low-priority items were synchronized. @@ -291,33 +287,33 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - // Initial data: Add one priority row and 10k low-priority rows. - await batch.save({ - sourceTable: TEST_TABLE, + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + // Initial data: Add one priority row and 10k low-priority rows. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio', + description: 'user_one' + }, + afterReplicaId: 'highprio' + }); + for (let i = 0; i < 10_000; i++) { + await writer.save({ + sourceTable: testTable, tag: storage.SaveOperationTag.INSERT, after: { - id: 'highprio', - description: 'user_one' + id: `${i}`, + description: 'low prio' }, - afterReplicaId: 'highprio' + afterReplicaId: `${i}` }); - for (let i = 0; i < 10_000; i++) { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: `${i}`, - description: 'low prio' - }, - afterReplicaId: `${i}` - }); - } + } - await batch.commit('0/1'); - }); + await writer.commitAll('0/1'); const stream = sync.streamResponse({ syncContext, @@ -350,20 +346,18 @@ bucket_definitions: if (typeof next === 'object' && next !== null) { if ('partial_checkpoint_complete' in next) { if (sentCheckpoints == 1) { - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Add a high-priority row that doesn't affect this sync stream. - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'highprio2', - description: 'user_two' - }, - afterReplicaId: 'highprio2' - }); - - await batch.commit('0/2'); + // Add a high-priority row that doesn't affect this sync stream. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio2', + description: 'user_two' + }, + afterReplicaId: 'highprio2' }); + + await writer.commitAll('0/2'); } else { expect(sentCheckpoints).toBe(2); expect(sentRows).toBe(10002); @@ -384,20 +378,18 @@ bucket_definitions: if (completedCheckpoints == 1) { expect(sentRows).toBe(10001); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Add a high-priority row that affects this sync stream. - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'highprio3', - description: 'user_one' - }, - afterReplicaId: 'highprio3' - }); - - await batch.commit('0/3'); + // Add a high-priority row that affects this sync stream. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio3', + description: 'user_one' + }, + afterReplicaId: 'highprio3' }); + + await writer.commitAll('0/3'); } } } @@ -433,33 +425,33 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - // Initial data: Add one priority row and 10k low-priority rows. - await batch.save({ - sourceTable: TEST_TABLE, + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + // Initial data: Add one priority row and 10k low-priority rows. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio', + description: 'High priority row' + }, + afterReplicaId: 'highprio' + }); + for (let i = 0; i < 2_000; i++) { + await writer.save({ + sourceTable: testTable, tag: storage.SaveOperationTag.INSERT, after: { - id: 'highprio', - description: 'High priority row' + id: `${i}`, + description: 'low prio' }, - afterReplicaId: 'highprio' + afterReplicaId: `${i}` }); - for (let i = 0; i < 2_000; i++) { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: `${i}`, - description: 'low prio' - }, - afterReplicaId: `${i}` - }); - } + } - await batch.commit('0/1'); - }); + await writer.commitAll('0/1'); const stream = sync.streamResponse({ syncContext, @@ -496,31 +488,29 @@ bucket_definitions: if (sentRows == 1001) { // Save new data to interrupt the low-priority sync. - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Add another high-priority row. This should interrupt the long-running low-priority sync. - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'highprio2', - description: 'Another high-priority row' - }, - afterReplicaId: 'highprio2' - }); - - // Also add a low-priority row - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: '2001', - description: 'Another low-priority row' - }, - afterReplicaId: '2001' - }); - - await batch.commit('0/2'); + // Add another high-priority row. This should interrupt the long-running low-priority sync. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'highprio2', + description: 'Another high-priority row' + }, + afterReplicaId: 'highprio2' }); + + // Also add a low-priority row + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: '2001', + description: 'Another low-priority row' + }, + afterReplicaId: '2001' + }); + + await writer.commitAll('0/2'); } if (sentRows >= 1000 && sentRows <= 2001) { @@ -563,20 +553,20 @@ bucket_definitions: content: BASIC_SYNC_RULES }); const bucketStorage = f.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'sync' - }, - afterReplicaId: 't1' - }); - await batch.commit('0/1'); + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'sync' + }, + afterReplicaId: 't1' }); + await writer.commitAll('0/1'); const stream = sync.streamResponse({ syncContext, @@ -607,9 +597,7 @@ bucket_definitions: if (receivedCompletions == 1) { // Trigger an empty bucket update. await bucketStorage.createManagedWriteCheckpoint({ user_id: '', heads: { '1': '1/0' } }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.commit('1/0'); - }); + await writer.commitAll('1/0'); } else { break; } @@ -621,30 +609,30 @@ bucket_definitions: }); test('sync legacy non-raw data', async () => { - const f = await factory(); + await using f = await factory(); const syncRules = await f.updateSyncRules({ content: BASIC_SYNC_RULES }); const bucketStorage = await f.getInstance(syncRules); - - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'Test\n"string"', - large_num: 12345678901234567890n - }, - afterReplicaId: 't1' - }); - - await batch.commit('0/1'); + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'Test\n"string"', + large_num: 12345678901234567890n + }, + afterReplicaId: 't1' }); + await writer.commitAll('0/1'); + const stream = sync.streamResponse({ syncContext, bucketStorage, @@ -700,11 +688,11 @@ bucket_definitions: }); const bucketStorage = await f.getInstance(syncRules); + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); // Activate - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/0'); - await batch.keepalive('0/0'); - }); + await writer.markAllSnapshotDone('0/0'); + await writer.keepaliveAll('0/0'); const stream = sync.streamResponse({ syncContext, @@ -726,36 +714,32 @@ bucket_definitions: expect(await getCheckpointLines(iter)).toMatchSnapshot(); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'Test 1' - }, - afterReplicaId: 't1' - }); - - await batch.commit('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'Test 1' + }, + afterReplicaId: 't1' }); - expect(await getCheckpointLines(iter)).toMatchSnapshot(); + await writer.commitAll('0/1'); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - description: 'Test 2' - }, - afterReplicaId: 't2' - }); + expect(await getCheckpointLines(iter)).toMatchSnapshot(); - await batch.commit('0/2'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + description: 'Test 2' + }, + afterReplicaId: 't2' }); + await writer.commitAll('0/2'); + expect(await getCheckpointLines(iter)).toMatchSnapshot(); }); @@ -771,15 +755,13 @@ bucket_definitions: ` }); - const usersTable = test_utils.makeTestTable('users', ['id'], config); - const listsTable = test_utils.makeTestTable('lists', ['id'], config); - const bucketStorage = await f.getInstance(syncRules); + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const usersTable = await test_utils.resolveTestTable(writer, 'users', ['id'], config, 1); + // Activate - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/0'); - await batch.keepalive('0/0'); - }); + await writer.markAllSnapshotDone('0/0'); + await writer.keepaliveAll('0/0'); const stream = sync.streamResponse({ syncContext, @@ -805,24 +787,22 @@ bucket_definitions: expect(checkpoint1).toMatchSnapshot(); // Add user - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: usersTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'user1', - name: 'User 1' - }, - afterReplicaId: 'user1' - }); - - await batch.commit('0/1'); + await writer.save({ + sourceTable: usersTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'user1', + name: 'User 1' + }, + afterReplicaId: 'user1' }); + await writer.commitAll('0/1'); + const checkpoint2 = await getCheckpointLines(iter); expect( (checkpoint2[0] as StreamingSyncCheckpointDiff).checkpoint_diff?.updated_buckets?.map((b) => b.bucket) - ).toEqual(['by_user["user1"]']); + ).toEqual(['10002["user1"]']); expect(checkpoint2).toMatchSnapshot(); }); @@ -838,26 +818,24 @@ bucket_definitions: ` }); - const usersTable = test_utils.makeTestTable('users', ['id'], config); - const listsTable = test_utils.makeTestTable('lists', ['id'], config); - const bucketStorage = await f.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: usersTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'user1', - name: 'User 1' - }, - afterReplicaId: 'user1' - }); - - await batch.commit('0/1'); + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const usersTable = await test_utils.resolveTestTable(writer, 'users', ['id'], config, 1); + const listsTable = await test_utils.resolveTestTable(writer, 'lists', ['id'], config, 2); + + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: usersTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'user1', + name: 'User 1' + }, + afterReplicaId: 'user1' }); + await writer.commitAll('0/1'); + const stream = sync.streamResponse({ syncContext, bucketStorage, @@ -878,29 +856,27 @@ bucket_definitions: const checkpoint1 = await getCheckpointLines(iter); expect((checkpoint1[0] as StreamingSyncCheckpoint).checkpoint?.buckets?.map((b) => b.bucket)).toEqual([ - 'by_user["user1"]' + '10002["user1"]' // FIXME: don't hardcode ]); expect(checkpoint1).toMatchSnapshot(); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: listsTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'list1', - user_id: 'user1', - name: 'User 1' - }, - afterReplicaId: 'list1' - }); - - await batch.commit('0/1'); + await writer.save({ + sourceTable: listsTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'list1', + user_id: 'user1', + name: 'User 1' + }, + afterReplicaId: 'list1' }); + await writer.commitAll('0/1'); + const checkpoint2 = await getCheckpointLines(iter); expect( (checkpoint2[0] as StreamingSyncCheckpointDiff).checkpoint_diff?.updated_buckets?.map((b) => b.bucket) - ).toEqual(['by_user["user1"]']); + ).toEqual(['10002["user1"]']); expect(checkpoint2).toMatchSnapshot(); }); @@ -916,15 +892,13 @@ bucket_definitions: ` }); - const usersTable = test_utils.makeTestTable('users', ['id'], config); - const listsTable = test_utils.makeTestTable('lists', ['id'], config); - const bucketStorage = await f.getInstance(syncRules); + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const usersTable = await test_utils.resolveTestTable(writer, 'users', ['id'], config, 1); + const listsTable = await test_utils.resolveTestTable(writer, 'lists', ['id'], config, 2); // Activate - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/0'); - await batch.keepalive('0/0'); - }); + await writer.markAllSnapshotDone('0/0'); + await writer.keepaliveAll('0/0'); const stream = sync.streamResponse({ syncContext, @@ -947,36 +921,34 @@ bucket_definitions: // Initial empty checkpoint expect(await getCheckpointLines(iter)).toMatchSnapshot(); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: listsTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'list1', - user_id: 'user1', - name: 'User 1' - }, - afterReplicaId: 'list1' - }); - - await batch.save({ - sourceTable: usersTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'user1', - name: 'User 1' - }, - afterReplicaId: 'user1' - }); + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: listsTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'list1', + user_id: 'user1', + name: 'User 1' + }, + afterReplicaId: 'list1' + }); - await batch.commit('0/1'); + await writer.save({ + sourceTable: usersTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'user1', + name: 'User 1' + }, + afterReplicaId: 'user1' }); + await writer.commitAll('0/1'); + const checkpoint2 = await getCheckpointLines(iter); expect( (checkpoint2[0] as StreamingSyncCheckpointDiff).checkpoint_diff?.updated_buckets?.map((b) => b.bucket) - ).toEqual(['by_user["user1"]']); + ).toEqual(['10002["user1"]']); // TODO: don't hardcode bucket name expect(checkpoint2).toMatchSnapshot(); }); @@ -988,11 +960,10 @@ bucket_definitions: }); const bucketStorage = await f.getInstance(syncRules); + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); // Activate - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/0'); - await batch.keepalive('0/0'); - }); + await writer.markAllSnapshotDone('0/0'); + await writer.keepaliveAll('0/0'); const exp = Date.now() / 1000 + 0.1; @@ -1034,32 +1005,32 @@ bucket_definitions: }); const bucketStorage = await f.getInstance(syncRules); + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'Test 1' + }, + afterReplicaId: 't1' + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'Test 1' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - description: 'Test 2' - }, - afterReplicaId: 't2' - }); - - await batch.commit('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + description: 'Test 2' + }, + afterReplicaId: 't2' }); + await writer.commitAll('0/1'); + const stream = sync.streamResponse({ syncContext, bucketStorage, @@ -1091,31 +1062,29 @@ bucket_definitions: // Now we save additional data AND compact before continuing. // This invalidates the checkpoint we've received above. - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't1', - description: 'Test 1b' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't2', - description: 'Test 2b' - }, - afterReplicaId: 't2' - }); + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't1', + description: 'Test 1b' + }, + afterReplicaId: 't1' + }); - await batch.commit('0/2'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't2', + description: 'Test 2b' + }, + afterReplicaId: 't2' }); + await writer.commitAll('0/2'); + await bucketStorage.compact({ minBucketChanges: 1 }); @@ -1178,12 +1147,11 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - // <= the managed write checkpoint LSN below - await batch.commit('0/1'); - }); + await writer.markAllSnapshotDone('0/1'); + // <= the managed write checkpoint LSN below + await writer.commitAll('0/1'); const checkpoint = await bucketStorage.createManagedWriteCheckpoint({ user_id: 'test', @@ -1215,11 +1183,9 @@ bucket_definitions: }) }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - // must be >= the managed write checkpoint LSN - await batch.commit('1/0'); - }); + await writer.markAllSnapshotDone('0/1'); + // must be >= the managed write checkpoint LSN + await writer.commitAll('1/0'); // At this point the LSN has advanced, so the write checkpoint should be // included in the next checkpoint message. @@ -1233,7 +1199,7 @@ bucket_definitions: }); }); - test('encodes sync rules id in buckes for streams', async () => { + test('encodes sync rules id in buckets for streams', async () => { await using f = await factory(); const rules = ` streams: @@ -1250,20 +1216,20 @@ config: content: rules }); const bucketStorage = f.getInstance(syncRules); + await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('0/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - description: 'Test 1' - }, - afterReplicaId: 't1' - }); - await batch.commit('0/1'); + await writer.markAllSnapshotDone('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + description: 'Test 1' + }, + afterReplicaId: 't1' }); + await writer.commitAll('0/1'); const stream = sync.streamResponse({ syncContext, diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index 8bb4fe0f0..0c6088a0d 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -13,6 +13,7 @@ import { ParseSyncRulesOptions, PersistedSyncRules } from './PersistedSyncRulesC import { SourceEntityDescriptor } from './SourceEntity.js'; import { SourceTable } from './SourceTable.js'; import { SyncStorageWriteCheckpointAPI } from './WriteCheckpointAPI.js'; +import { bson } from '../index.js'; /** * Storage for a specific copy of sync rules. @@ -171,6 +172,10 @@ export interface ResolveTablesOptions { connection_tag: string; entity_descriptor: SourceEntityDescriptor; pattern: TablePattern; + /** + * For tests only - custom id generator. + */ + idGenerator?: () => string | bson.ObjectId; } export interface ResolveTableOptions { From 604461e385df979d090f701e88340f8e0be4aa91 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 20 Jan 2026 14:56:05 +0200 Subject: [PATCH 049/101] Round 2 of storage test fixes. --- .../src/test-utils/general-utils.ts | 17 +- .../tests/register-data-storage-data-tests.ts | 1231 ++++++++--------- 2 files changed, 604 insertions(+), 644 deletions(-) diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index eb9e11994..fc641e194 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -70,6 +70,17 @@ export async function resolveTestTable( const id = options.tableIdStrings == false ? new bson.ObjectId(idString) : idString; let didGenerateId = false; + const patterns = writer.rowProcessor.getMatchingTablePatterns({ + schema: 'public', + name: name, + connectionTag: storage.SourceTable.DEFAULT_TAG + }); + if (patterns.length == 0) { + throw new Error(`Table ${name} not found in sync rules`); + } else if (patterns.length > 1) { + throw new Error(`Multiple patterns match table ${name} - not supported in test`); + } + const pattern = patterns[0]; const result = await writer.resolveTables({ connection_id: 1, connection_tag: storage.SourceTable.DEFAULT_TAG, @@ -81,7 +92,7 @@ export async function resolveTestTable( replicaIdColumns: (replicaIdColumns ?? ['id']).map((column) => ({ name: column, type: 'VARCHAR', typeId: 25 })) }, - pattern: new TablePattern('public', name), + pattern, idGenerator: () => { if (didGenerateId) { throw new Error('idGenerator called multiple times - not supported in tests'); @@ -91,6 +102,10 @@ export async function resolveTestTable( return id; } }); + const table = result.tables[0]; + if (table == null) { + throw new Error(`Failed to resolve test table ${name}`); + } return result.tables[0]; } diff --git a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts index 3b778158f..0b625fc43 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts @@ -1,12 +1,4 @@ -import { - BucketDataBatchOptions, - BucketDataRequest, - BucketRequest, - getUuidReplicaIdentityBson, - InternalOpId, - OplogEntry, - storage -} from '@powersync/service-core'; +import { BucketDataBatchOptions, getUuidReplicaIdentityBson, OplogEntry, storage } from '@powersync/service-core'; import { describe, expect, test } from 'vitest'; import * as test_utils from '../test-utils/test-utils-index.js'; import { bucketRequest } from '../test-utils/test-utils-index.js'; @@ -33,7 +25,6 @@ const normalizeOplogData = (data: OplogEntry['data']) => { */ export function registerDataStorageDataTests(config: storage.TestStorageConfig) { const generateStorageFactory = config.factory; - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); test('removing row', async () => { await using factory = await generateStorageFactory(); @@ -43,30 +34,29 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; - await batch.markAllSnapshotDone('1/1'); + await writer.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') + }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); + await writer.commitAll('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); @@ -106,37 +96,32 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - const sourceTable = TEST_TABLE; - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); + await writer.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); - - await batch.commit('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; + await writer.commitAll('0/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); - await batch.commit('2/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') }); + await writer.commitAll('2/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); @@ -173,41 +158,36 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - const sourceTable = TEST_TABLE; - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); + await writer.markAllSnapshotDone('1/1'); - await batch.commit('0/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; + await writer.commitAll('0/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 'test1' - }, - after: { - id: 'test1', - description: 'test1' - }, - beforeReplicaId: test_utils.rid('test1'), - afterReplicaId: test_utils.rid('test1') - }); - await batch.commit('2/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 'test1' + }, + after: { + id: 'test1', + description: 'test1' + }, + beforeReplicaId: test_utils.rid('test1'), + afterReplicaId: test_utils.rid('test1') }); + await writer.commitAll('2/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const request = bucketRequest(syncRules); @@ -243,30 +223,29 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; - await batch.markAllSnapshotDone('1/1'); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); - await batch.commit('1/1'); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') + }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') }); + await writer.commitAll('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const request = bucketRequest(syncRules); @@ -302,47 +281,46 @@ bucket_definitions: global: data: - SELECT client_id as id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + client_id: 'client1a', + description: 'test1a' + }, + afterReplicaId: test_utils.rid('test1') + }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'test1', + client_id: 'client1b', + description: 'test1b' + }, + afterReplicaId: test_utils.rid('test1') + }); - const sourceTable = TEST_TABLE; - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - client_id: 'client1a', - description: 'test1a' - }, - afterReplicaId: test_utils.rid('test1') - }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'test1', - client_id: 'client1b', - description: 'test1b' - }, - afterReplicaId: test_utils.rid('test1') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test2', - client_id: 'client2', - description: 'test2' - }, - afterReplicaId: test_utils.rid('test2') - }); - - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test2', + client_id: 'client2', + description: 'test2' + }, + afterReplicaId: test_utils.rid('test2') }); + + await writer.commitAll('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); const data = batch[0].chunkData.data.map((d) => { @@ -368,46 +346,39 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; - await batch.markAllSnapshotDone('1/1'); + await writer.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') }); + await writer.flush(); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); - - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; + await writer.commitAll('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); + await writer.flush(); const { checkpoint } = await bucketStorage.getCheckpoint(); const request = bucketRequest(syncRules); @@ -447,91 +418,85 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; + await writer.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') }); + await writer.flush(); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; + await writer.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'test1', - description: undefined - }, - afterReplicaId: test_utils.rid('test1') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'test1', - description: undefined - }, - afterReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'test1', + description: undefined + }, + afterReplicaId: test_utils.rid('test1') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'test1', + description: undefined + }, + afterReplicaId: test_utils.rid('test1') + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; + await writer.commitAll('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'test1', - description: undefined - }, - afterReplicaId: test_utils.rid('test1') - }); + await writer.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'test1', - description: undefined - }, - afterReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'test1', + description: undefined + }, + afterReplicaId: test_utils.rid('test1') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - beforeReplicaId: test_utils.rid('test1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'test1', + description: undefined + }, + afterReplicaId: test_utils.rid('test1') + }); - await batch.commit('2/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') }); + await writer.commitAll('2/1'); + const { checkpoint } = await bucketStorage.getCheckpoint(); const request = bucketRequest(syncRules); @@ -581,117 +546,114 @@ bucket_definitions: global: data: - SELECT id, description FROM "test" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); // Pre-setup - const result1 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: test_utils.rid('test1') - }); + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: test_utils.rid('test1') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test2', - description: 'test2a' - }, - afterReplicaId: test_utils.rid('test2') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test2', + description: 'test2a' + }, + afterReplicaId: test_utils.rid('test2') }); + const result1 = await writer.flush(); const checkpoint1 = result1?.flushed_op ?? 0n; // Test batch - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; - // b - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1b' - }, - afterReplicaId: test_utils.rid('test1') - }); + // b + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1b' + }, + afterReplicaId: test_utils.rid('test1') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 'test1' - }, - beforeReplicaId: test_utils.rid('test1'), - after: { - id: 'test2', - description: 'test2b' - }, - afterReplicaId: test_utils.rid('test2') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 'test1' + }, + beforeReplicaId: test_utils.rid('test1'), + after: { + id: 'test2', + description: 'test2b' + }, + afterReplicaId: test_utils.rid('test2') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 'test2' - }, - beforeReplicaId: test_utils.rid('test2'), - after: { - id: 'test3', - description: 'test3b' - }, + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 'test2' + }, + beforeReplicaId: test_utils.rid('test2'), + after: { + id: 'test3', + description: 'test3b' + }, - afterReplicaId: test_utils.rid('test3') - }); + afterReplicaId: test_utils.rid('test3') + }); - // c - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'test2', - description: 'test2c' - }, - afterReplicaId: test_utils.rid('test2') - }); + // c + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'test2', + description: 'test2c' + }, + afterReplicaId: test_utils.rid('test2') + }); - // d - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test4', - description: 'test4d' - }, - afterReplicaId: test_utils.rid('test4') - }); + // d + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test4', + description: 'test4d' + }, + afterReplicaId: test_utils.rid('test4') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 'test4' - }, - beforeReplicaId: test_utils.rid('test4'), - after: { - id: 'test5', - description: 'test5d' - }, - afterReplicaId: test_utils.rid('test5') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 'test4' + }, + beforeReplicaId: test_utils.rid('test4'), + after: { + id: 'test5', + description: 'test5d' + }, + afterReplicaId: test_utils.rid('test5') }); + const result2 = await writer.flush(); const checkpoint2 = result2!.flushed_op; const request = bucketRequest(syncRules, 'global[]', checkpoint1); @@ -739,59 +701,56 @@ bucket_definitions: global: data: - SELECT id, description FROM "test" -` + ` }); const bucketStorage = factory.getInstance(syncRules); - - const sourceTable = test_utils.makeTestTable('test', ['id', 'description'], config); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id', 'description'], config); // Pre-setup - const result1 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: rid2('test1', 'test1a') - }); + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: rid2('test1', 'test1a') }); + const result1 = await writer.flush(); const checkpoint1 = result1?.flushed_op ?? 0n; - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Unchanged, but has a before id - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 'test1', - description: 'test1a' - }, - beforeReplicaId: rid2('test1', 'test1a'), - after: { - id: 'test1', - description: 'test1b' - }, - afterReplicaId: rid2('test1', 'test1b') - }); + // Unchanged, but has a before id + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 'test1', + description: 'test1a' + }, + beforeReplicaId: rid2('test1', 'test1a'), + after: { + id: 'test1', + description: 'test1b' + }, + afterReplicaId: rid2('test1', 'test1b') }); + await writer.flush(); - const result3 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Delete - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 'test1', - description: 'test1b' - }, - beforeReplicaId: rid2('test1', 'test1b'), - after: undefined - }); + // Delete + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 'test1', + description: 'test1b' + }, + beforeReplicaId: rid2('test1', 'test1b'), + after: undefined }); + const result3 = await writer.flush(); const checkpoint3 = result3!.flushed_op; const request = bucketRequest(syncRules); @@ -849,59 +808,56 @@ bucket_definitions: global: data: - SELECT id, description FROM "test" -` + ` }); const bucketStorage = factory.getInstance(syncRules); - - const sourceTable = test_utils.makeTestTable('test', ['id', 'description'], config); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id', 'description'], config); // Pre-setup - const result1 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: rid2('test1', 'test1a') - }); + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: rid2('test1', 'test1a') }); + const result1 = await writer.flush(); const checkpoint1 = result1?.flushed_op ?? 0n; - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Unchanged, but has a before id - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 'test1', - description: 'test1a' - }, - beforeReplicaId: rid2('test1', 'test1a'), - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: rid2('test1', 'test1a') - }); + // Unchanged, but has a before id + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 'test1', + description: 'test1a' + }, + beforeReplicaId: rid2('test1', 'test1a'), + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: rid2('test1', 'test1a') }); + await writer.flush(); - const result3 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Delete - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 'test1', - description: 'test1a' - }, - beforeReplicaId: rid2('test1', 'test1a'), - after: undefined - }); + // Delete + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 'test1', + description: 'test1a' + }, + beforeReplicaId: rid2('test1', 'test1a'), + after: undefined }); + const result3 = await writer.flush(); const checkpoint3 = result3!.flushed_op; const request = bucketRequest(syncRules); @@ -949,60 +905,58 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + const largeDescription = '0123456789'.repeat(12_000_00); + + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; - - const largeDescription = '0123456789'.repeat(12_000_00); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'large1', - description: largeDescription - }, - afterReplicaId: test_utils.rid('large1') - }); - - // Large enough to split the returned batch - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'large2', - description: largeDescription - }, - afterReplicaId: test_utils.rid('large2') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'large1', + description: largeDescription + }, + afterReplicaId: test_utils.rid('large1') + }); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test3', - description: 'test3' - }, - afterReplicaId: test_utils.rid('test3') - }); + // Large enough to split the returned batch + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'large2', + description: largeDescription + }, + afterReplicaId: test_utils.rid('large2') + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test3', + description: 'test3' + }, + afterReplicaId: test_utils.rid('test3') }); + await writer.commitAll('1/1'); + const { checkpoint } = await bucketStorage.getCheckpoint(); const request = bucketRequest(syncRules); @@ -1058,28 +1012,27 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; + await writer.markAllSnapshotDone('1/1'); - for (let i = 1; i <= 6; i++) { - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: `test${i}`, - description: `test${i}` - }, - afterReplicaId: `test${i}` - }); - } + for (let i = 1; i <= 6; i++) { + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: `test${i}`, + description: `test${i}` + }, + afterReplicaId: `test${i}` + }); + } - await batch.commit('1/1'); - }); + await writer.commitAll('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const request = bucketRequest(syncRules); @@ -1140,26 +1093,25 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - const sourceTable = TEST_TABLE; - - for (let i = 1; i <= 10; i++) { - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: `test${i}`, - description: `test${i}`, - bucket: i == 1 ? 'global1' : 'global2' - }, - afterReplicaId: `test${i}` - }); - } - - await batch.commit('1/1'); - }); + await writer.markAllSnapshotDone('1/1'); + + for (let i = 1; i <= 10; i++) { + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: `test${i}`, + description: `test${i}`, + bucket: i == 1 ? 'global1' : 'global2' + }, + afterReplicaId: `test${i}` + }); + } + + await writer.commitAll('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const global1Request = bucketRequest(syncRules, 'global1[]', 0n); @@ -1288,10 +1240,9 @@ bucket_definitions: const r = await f.configureSyncRules({ content: 'bucket_definitions: {}', validate: false }); const storage = f.getInstance(r.persisted_sync_rules!); - await storage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/0'); - await batch.keepalive('1/0'); - }); + await using writer = await f.createCombinedWriter([storage], test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/0'); + await writer.keepaliveAll('1/0'); const metrics2 = await f.getStorageMetrics(); expect(metrics2.operations_size_bytes).toBeLessThanOrEqual(20_000); @@ -1314,36 +1265,34 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - - const sourceTable = test_utils.makeTestTable('test', ['id'], config); - const sourceTableIgnore = test_utils.makeTestTable('test_ignore', ['id'], config); - - const result1 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - // This saves a record to current_data, but not bucket_data. - // This causes a checkpoint to be created without increasing the op_id sequence. - await batch.save({ - sourceTable: sourceTableIgnore, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config, 1); + const sourceTableIgnore = await test_utils.resolveTestTable(writer, 'test_ignore', ['id'], config, 2); + + await writer.markAllSnapshotDone('1/1'); + // This saves a record to current_data, but not bucket_data. + // This causes a checkpoint to be created without increasing the op_id sequence. + await writer.save({ + sourceTable: sourceTableIgnore, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1' + }, + afterReplicaId: test_utils.rid('test1') }); + const result1 = await writer.flush(); const checkpoint1 = result1!.flushed_op; - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test2' - }, - afterReplicaId: test_utils.rid('test2') - }); + await writer.save({ + sourceTable: sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test2' + }, + afterReplicaId: test_utils.rid('test2') }); + const result2 = await writer.flush(); const checkpoint2 = result2!.flushed_op; // we expect 0n and 1n, or 1n and 2n. @@ -1358,24 +1307,23 @@ bucket_definitions: global: data: - SELECT client_id as id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); - - const sourceTable = TEST_TABLE; - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: test_utils.rid('test1') - }); - await batch.commit('1/1'); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: test_utils.rid('test1') }); + await writer.commitAll('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const request = bucketRequest(syncRules); @@ -1395,31 +1343,29 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.commit('1/1'); - - const cp1 = await bucketStorage.getCheckpoint(); - expect(cp1.lsn).toEqual('1/1'); - - await batch.commit('2/1', { createEmptyCheckpoints: true }); - const cp2 = await bucketStorage.getCheckpoint(); - expect(cp2.lsn).toEqual('2/1'); - - await batch.keepalive('3/1'); - const cp3 = await bucketStorage.getCheckpoint(); - expect(cp3.lsn).toEqual('3/1'); - - // For the last one, we skip creating empty checkpoints - // This means the LSN stays at 3/1. - await batch.commit('4/1', { createEmptyCheckpoints: false }); - const cp4 = await bucketStorage.getCheckpoint(); - expect(cp4.lsn).toEqual('3/1'); - }); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/1'); + await writer.commitAll('1/1'); + + const cp1 = await bucketStorage.getCheckpoint(); + expect(cp1.lsn).toEqual('1/1'); + + await writer.commitAll('2/1', { createEmptyCheckpoints: true }); + const cp2 = await bucketStorage.getCheckpoint(); + expect(cp2.lsn).toEqual('2/1'); + + await writer.keepaliveAll('3/1'); + const cp3 = await bucketStorage.getCheckpoint(); + expect(cp3.lsn).toEqual('3/1'); + + // For the last one, we skip creating empty checkpoints + // This means the LSN stays at 3/1. + await writer.commitAll('4/1', { createEmptyCheckpoints: false }); + const cp4 = await bucketStorage.getCheckpoint(); + expect(cp4.lsn).toEqual('3/1'); }); test('empty checkpoints (2)', async () => { @@ -1433,40 +1379,38 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer1 = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer2 = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer2, 'test', ['id'], config); + + // We simulate two concurrent batches, but sequential calls are enough for this test. + await writer1.markAllSnapshotDone('1/1'); + await writer1.commitAll('1/1'); + + await writer1.commitAll('2/1', { createEmptyCheckpoints: false }); + const cp2 = await bucketStorage.getCheckpoint(); + expect(cp2.lsn).toEqual('1/1'); // checkpoint 2/1 skipped + + await writer2.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: test_utils.rid('test1') + }); + // This simulates what happens on a snapshot processor. + // This may later change to a flush() rather than commit(). + await writer2.commitAll(test_utils.BATCH_OPTIONS.zeroLSN); - const sourceTable = TEST_TABLE; - // We simulate two concurrent batches, but nesting is the easiest way to do this. - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch1) => { - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch2) => { - await batch1.markAllSnapshotDone('1/1'); - await batch1.commit('1/1'); - - await batch1.commit('2/1', { createEmptyCheckpoints: false }); - const cp2 = await bucketStorage.getCheckpoint(); - expect(cp2.lsn).toEqual('1/1'); // checkpoint 2/1 skipped - - await batch2.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: test_utils.rid('test1') - }); - // This simulates what happens on a snapshot processor. - // This may later change to a flush() rather than commit(). - await batch2.commit(test_utils.BATCH_OPTIONS.zeroLSN); - - const cp3 = await bucketStorage.getCheckpoint(); - expect(cp3.lsn).toEqual('1/1'); // Still unchanged + const cp3 = await bucketStorage.getCheckpoint(); + expect(cp3.lsn).toEqual('1/1'); // Still unchanged - // This now needs to advance the LSN, despite {createEmptyCheckpoints: false} - await batch1.commit('4/1', { createEmptyCheckpoints: false }); - const cp4 = await bucketStorage.getCheckpoint(); - expect(cp4.lsn).toEqual('4/1'); - }); - }); + // This now needs to advance the LSN, despite {createEmptyCheckpoints: false} + await writer1.commitAll('4/1', { createEmptyCheckpoints: false }); + const cp4 = await bucketStorage.getCheckpoint(); + expect(cp4.lsn).toEqual('4/1'); }); test('deleting while streaming', async () => { @@ -1477,42 +1421,44 @@ bucket_definitions: global: data: - SELECT id, description FROM "%" -` + ` }); const bucketStorage = factory.getInstance(syncRules); + await using snapshotWriter = await factory.createCombinedWriter([bucketStorage], { + ...test_utils.BATCH_OPTIONS, + skipExistingRows: true + }); + await using streamingWriter = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const snapshotTable = await test_utils.resolveTestTable(snapshotWriter, 'test', ['id'], config, 1); + const streamingTable = await test_utils.resolveTestTable(streamingWriter, 'test', ['id'], config, 1); - const sourceTable = TEST_TABLE; - // We simulate two concurrent batches, and nesting is the easiest way to do this. + // We simulate two concurrent batches; separate writers are enough for this test. // For this test, we assume that we start with a row "test1", which is picked up by a snapshot // query, right before the delete is streamed. But the snapshot query is only persisted _after_ // the delete is streamed, and we need to ensure that the streamed delete takes precedence. - await bucketStorage.startBatch({ ...test_utils.BATCH_OPTIONS, skipExistingRows: true }, async (snapshotBatch) => { - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (streamingBatch) => { - streamingBatch.save({ - sourceTable, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 'test1' - }, - beforeReplicaId: test_utils.rid('test1') - }); - await streamingBatch.commit('2/1'); - - await snapshotBatch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1a' - }, - afterReplicaId: test_utils.rid('test1') - }); - await snapshotBatch.markAllSnapshotDone('3/1'); - await snapshotBatch.commit('1/1'); + await streamingWriter.save({ + sourceTable: streamingTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 'test1' + }, + beforeReplicaId: test_utils.rid('test1') + }); + await streamingWriter.commitAll('2/1'); - await streamingBatch.keepalive('3/1'); - }); + await snapshotWriter.save({ + sourceTable: snapshotTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: test_utils.rid('test1') }); + await snapshotWriter.markAllSnapshotDone('3/1'); + await snapshotWriter.commitAll('1/1'); + + await streamingWriter.keepaliveAll('3/1'); const cp = await bucketStorage.getCheckpoint(); expect(cp.lsn).toEqual('3/1'); @@ -1542,27 +1488,26 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - - const sourceTable = test_utils.makeTestTable('test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - for (let u of ['u1', 'u2', 'u3', 'u4']) { - for (let t of ['t1', 't2', 't3', 't4']) { - const id = `${t}_${u}`; - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id, - description: `${t} description`, - user_id: u - }, - afterReplicaId: test_utils.rid(id) - }); - } + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + for (let u of ['u1', 'u2', 'u3', 'u4']) { + for (let t of ['t1', 't2', 't3', 't4']) { + const id = `${t}_${u}`; + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id, + description: `${t} description`, + user_id: u + }, + afterReplicaId: test_utils.rid(id) + }); } - await batch.commit('1/1'); - }); + } + await writer.commitAll('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); bucketStorage.clearChecksumCache(); From 4f802cc1f73cb345589f57a34a4e6196e91959b9 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 20 Jan 2026 15:12:05 +0200 Subject: [PATCH 050/101] Test fix round 3. --- .../src/test-utils/general-utils.ts | 32 +- .../register-data-storage-parameter-tests.ts | 518 +++++++++--------- .../test/src/checksum_cache.test.ts | 2 +- .../test/src/sync/BucketChecksumState.test.ts | 26 +- 4 files changed, 290 insertions(+), 288 deletions(-) diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index fc641e194..847060f27 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -1,8 +1,14 @@ -import { BucketDataRequest, InternalOpId, storage, sync, utils } from '@powersync/service-core'; -import { GetQuerierOptions, RequestParameters, SqlSyncRules, TablePattern } from '@powersync/service-sync-rules'; -import { versionedHydrationState } from '@powersync/service-sync-rules'; +import { BucketDataRequest, InternalOpId, storage, utils } from '@powersync/service-core'; +import { + GetQuerierOptions, + RequestParameters, + SqlSyncRules, + versionedHydrationState +} from '@powersync/service-sync-rules'; import * as bson from 'bson'; +import { SOURCE } from '@powersync/service-sync-rules'; + export const ZERO_LSN = '0/0'; export const PARSE_OPTIONS: storage.ParseSyncRulesOptions = { @@ -200,3 +206,23 @@ export function bucketRequest( source: source }; } + +/** + * Removes the source property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSource(obj: T): Omit { + const { source, ...rest } = obj; + return rest; +} + +/** + * Removes the [SOURCE] symbol property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSourceSymbol(obj: T): Omit { + const { [SOURCE]: source, ...rest } = obj; + return rest; +} diff --git a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts index bebf38e49..ae3944ca8 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts @@ -16,8 +16,7 @@ import * as test_utils from '../test-utils/test-utils-index.js'; */ export function registerDataStorageParameterTests(config: storage.TestStorageConfig) { const generateStorageFactory = config.factory; - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); - const MYBUCKET_1: ParameterLookupScope = { lookupName: 'mybucket', queryId: '1', source: null as any }; + const MYBUCKET_1: ParameterLookupScope = { lookupName: '20002', queryId: '', source: null as any }; test('save and load parameters', async () => { await using factory = await generateStorageFactory(); @@ -31,37 +30,44 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + const MYBUCKET_1: ParameterLookupScope = { lookupName: '20002', queryId: '', source: null as any }; + // We could get the scope automatically like this: + // const parsed = syncRules.parsed(test_utils.PARSE_OPTIONS); + // const hydrated = parsed.hydratedSyncRules(); + // const parameterSource = hydrated.definition.bucketParameterLookupSources[0]; + // const parameterLookupScope = parsed.hydrationState.getParameterIndexLookupScope(parameterSource); + + await writer.markAllSnapshotDone('1/1'); + + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + id1: 'user3', + id2: 'user4', + group_id: 'group2a' + }, + afterReplicaId: test_utils.rid('t2') + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - id1: 'user3', - id2: 'user4', - group_id: 'group2a' - }, - afterReplicaId: test_utils.rid('t2') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - id1: 'user1', - id2: 'user2', - group_id: 'group1a' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + id1: 'user1', + id2: 'user2', + group_id: 'group1a' + }, + afterReplicaId: test_utils.rid('t1') }); + await writer.commitAll('1/1'); + const checkpoint = await bucketStorage.getCheckpoint(); const parameters = await checkpoint.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, ['user1'])]); expect(parameters).toEqual([ @@ -83,33 +89,31 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'user1', - group_id: 'group1' - }, - afterReplicaId: test_utils.rid('user1') - }); - await batch.commit('1/1'); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'user1', + group_id: 'group1' + }, + afterReplicaId: test_utils.rid('user1') }); + await writer.commitAll('1/1'); const checkpoint1 = await bucketStorage.getCheckpoint(); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'user1', - group_id: 'group2' - }, - afterReplicaId: test_utils.rid('user1') - }); - await batch.commit('1/2'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'user1', + group_id: 'group2' + }, + afterReplicaId: test_utils.rid('user1') }); + await writer.commitAll('1/2'); const checkpoint2 = await bucketStorage.getCheckpoint(); const parameters = await checkpoint2.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, ['user1'])]); @@ -143,48 +147,45 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(syncRules); - const table = test_utils.makeTestTable('todos', ['id', 'list_id'], config); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - // Create two todos which initially belong to different lists - await batch.save({ - sourceTable: table, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'todo1', - list_id: 'list1' - }, - afterReplicaId: test_utils.rid('todo1') - }); - await batch.save({ - sourceTable: table, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'todo2', - list_id: 'list2' - }, - afterReplicaId: test_utils.rid('todo2') - }); - - await batch.commit('1/1'); - }); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Update the second todo item to now belong to list 1 - await batch.save({ - sourceTable: table, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 'todo2', - list_id: 'list1' - }, - afterReplicaId: test_utils.rid('todo2') - }); - - await batch.commit('1/1'); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const table = await test_utils.resolveTestTable(writer, 'todos', ['id', 'list_id'], config); + + await writer.markAllSnapshotDone('1/1'); + // Create two todos which initially belong to different lists + await writer.save({ + sourceTable: table, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'todo1', + list_id: 'list1' + }, + afterReplicaId: test_utils.rid('todo1') + }); + await writer.save({ + sourceTable: table, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'todo2', + list_id: 'list2' + }, + afterReplicaId: test_utils.rid('todo2') + }); + + await writer.commitAll('1/1'); + + // Update the second todo item to now belong to list 1 + await writer.save({ + sourceTable: table, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 'todo2', + list_id: 'list1' + }, + afterReplicaId: test_utils.rid('todo2') }); + await writer.commitAll('1/1'); + // We specifically request the todo_ids for both lists. // There removal operation for the association of `list2`::`todo2` should not interfere with the new // association of `list1`::`todo2` @@ -216,25 +217,25 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - group_id: 'group1', - n1: 314n, - f2: 314, - f3: 3.14 - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.commit('1/1'); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + group_id: 'group1', + n1: 314n, + f2: 314, + f3: 3.14 + }, + afterReplicaId: test_utils.rid('t1') }); + await writer.commitAll('1/1'); + const TEST_PARAMS = { group_id: 'group1' }; const checkpoint = await bucketStorage.getCheckpoint(); @@ -267,36 +268,36 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + group_id: 'group1', + n1: 1152921504606846976n // 2^60 + }, + afterReplicaId: test_utils.rid('t1') + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - group_id: 'group1', - n1: 1152921504606846976n // 2^60 - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't1', - group_id: 'group1', - // Simulate a TOAST value, even though it can't happen for values like this - // in practice. - n1: undefined - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't1', + group_id: 'group1', + // Simulate a TOAST value, even though it can't happen for values like this + // in practice. + n1: undefined + }, + afterReplicaId: test_utils.rid('t1') }); + await writer.commitAll('1/1'); + const TEST_PARAMS = { group_id: 'group1' }; const checkpoint = await bucketStorage.getCheckpoint(); @@ -308,8 +309,6 @@ bucket_definitions: }); test('save and load parameters with workspaceId', async () => { - const WORKSPACE_TABLE = test_utils.makeTestTable('workspace', ['id'], config); - await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ content: ` @@ -323,20 +322,20 @@ bucket_definitions: }); const sync_rules = syncRules.parsed(test_utils.PARSE_OPTIONS).hydratedSyncRules(); const bucketStorage = factory.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace1', - userId: 'u1' - }, - afterReplicaId: test_utils.rid('workspace1') - }); - await batch.commit('1/1'); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const workspaceTable = await test_utils.resolveTestTable(writer, 'workspace', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace1', + userId: 'u1' + }, + afterReplicaId: test_utils.rid('workspace1') }); + await writer.commitAll('1/1'); const checkpoint = await bucketStorage.getCheckpoint(); const parameters = new RequestParameters({ sub: 'u1' }, {}); @@ -345,7 +344,7 @@ bucket_definitions: const lookups = querier.parameterQueryLookups; expect(lookups).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '1', source: null as any }, ['u1']) + ScopedParameterLookup.direct({ lookupName: '20002', queryId: '', source: null as any }, ['u1']) ]); const parameter_sets = await checkpoint.getParameterSets(lookups); @@ -356,14 +355,12 @@ bucket_definitions: return checkpoint.getParameterSets(lookups); } }); - expect(buckets).toEqual([ - { bucket: 'by_workspace["workspace1"]', priority: 3, definition: 'by_workspace', inclusion_reasons: ['default'] } + expect(buckets.map(test_utils.removeSourceSymbol)).toEqual([ + { bucket: '10002["workspace1"]', priority: 3, definition: 'by_workspace', inclusion_reasons: ['default'] } ]); }); test('save and load parameters with dynamic global buckets', async () => { - const WORKSPACE_TABLE = test_utils.makeTestTable('workspace', undefined, config); - await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ content: ` @@ -377,42 +374,42 @@ bucket_definitions: }); const sync_rules = syncRules.parsed(test_utils.PARSE_OPTIONS).hydratedSyncRules(); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const workspaceTable = await test_utils.resolveTestTable(writer, 'workspace', undefined, config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace1', + visibility: 'public' + }, + afterReplicaId: test_utils.rid('workspace1') + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace1', - visibility: 'public' - }, - afterReplicaId: test_utils.rid('workspace1') - }); - - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace2', - visibility: 'private' - }, - afterReplicaId: test_utils.rid('workspace2') - }); - - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace3', - visibility: 'public' - }, - afterReplicaId: test_utils.rid('workspace3') - }); - - await batch.commit('1/1'); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace2', + visibility: 'private' + }, + afterReplicaId: test_utils.rid('workspace2') }); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace3', + visibility: 'public' + }, + afterReplicaId: test_utils.rid('workspace3') + }); + + await writer.commitAll('1/1'); + const checkpoint = await bucketStorage.getCheckpoint(); const parameters = new RequestParameters({ sub: 'unknown' }, {}); @@ -421,7 +418,7 @@ bucket_definitions: const lookups = querier.parameterQueryLookups; expect(lookups).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'by_public_workspace', queryId: '1', source: null as any }, []) + ScopedParameterLookup.direct({ lookupName: '20002', queryId: '', source: null as any }, []) ]); const parameter_sets = await checkpoint.getParameterSets(lookups); @@ -434,15 +431,15 @@ bucket_definitions: } }); buckets.sort((a, b) => a.bucket.localeCompare(b.bucket)); - expect(buckets).toEqual([ + expect(buckets.map(test_utils.removeSourceSymbol)).toEqual([ { - bucket: 'by_public_workspace["workspace1"]', + bucket: '10002["workspace1"]', priority: 3, definition: 'by_public_workspace', inclusion_reasons: ['default'] }, { - bucket: 'by_public_workspace["workspace3"]', + bucket: '10002["workspace3"]', priority: 3, definition: 'by_public_workspace', inclusion_reasons: ['default'] @@ -451,8 +448,6 @@ bucket_definitions: }); test('multiple parameter queries', async () => { - const WORKSPACE_TABLE = test_utils.makeTestTable('workspace', undefined, config); - await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ content: ` @@ -468,54 +463,54 @@ bucket_definitions: }); const sync_rules = syncRules.parsed(test_utils.PARSE_OPTIONS).hydratedSyncRules(); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const workspaceTable = await test_utils.resolveTestTable(writer, 'workspace', undefined, config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace1', + visibility: 'public' + }, + afterReplicaId: test_utils.rid('workspace1') + }); + + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace2', + visibility: 'private' + }, + afterReplicaId: test_utils.rid('workspace2') + }); + + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace3', + user_id: 'u1', + visibility: 'private' + }, + afterReplicaId: test_utils.rid('workspace3') + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace1', - visibility: 'public' - }, - afterReplicaId: test_utils.rid('workspace1') - }); - - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace2', - visibility: 'private' - }, - afterReplicaId: test_utils.rid('workspace2') - }); - - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace3', - user_id: 'u1', - visibility: 'private' - }, - afterReplicaId: test_utils.rid('workspace3') - }); - - await batch.save({ - sourceTable: WORKSPACE_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'workspace4', - user_id: 'u2', - visibility: 'private' - }, - afterReplicaId: test_utils.rid('workspace4') - }); - - await batch.commit('1/1'); + await writer.save({ + sourceTable: workspaceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'workspace4', + user_id: 'u2', + visibility: 'private' + }, + afterReplicaId: test_utils.rid('workspace4') }); + await writer.commitAll('1/1'); + const checkpoint = await bucketStorage.getCheckpoint(); const parameters = new RequestParameters({ sub: 'u1' }, {}); @@ -525,8 +520,8 @@ bucket_definitions: const lookups = querier.parameterQueryLookups; expect(lookups).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '1', source: null as any }, []), - ScopedParameterLookup.direct({ lookupName: 'by_workspace', queryId: '2', source: null as any }, ['u1']) + ScopedParameterLookup.direct({ lookupName: '20003', queryId: '', source: null as any }, []), + ScopedParameterLookup.direct({ lookupName: '20004', queryId: '', source: null as any }, ['u1']) ]); const parameter_sets = await checkpoint.getParameterSets(lookups); @@ -542,7 +537,7 @@ bucket_definitions: }) ).map((e) => e.bucket); buckets.sort(); - expect(buckets).toEqual(['by_workspace["workspace1"]', 'by_workspace["workspace3"]']); + expect(buckets).toEqual(['10003["workspace1"]', '10003["workspace3"]']); }); test('truncate parameters', async () => { @@ -557,24 +552,25 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - id1: 'user3', - id2: 'user4', - group_id: 'group2a' - }, - afterReplicaId: test_utils.rid('t2') - }); - - await batch.truncate([TEST_TABLE]); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + id1: 'user3', + id2: 'user4', + group_id: 'group2a' + }, + afterReplicaId: test_utils.rid('t2') }); + await writer.truncate([testTable]); + await writer.flush(); + const checkpoint = await bucketStorage.getCheckpoint(); const parameters = await checkpoint.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, ['user1'])]); diff --git a/packages/service-core/test/src/checksum_cache.test.ts b/packages/service-core/test/src/checksum_cache.test.ts index a9cc56aaf..1ae7d9ba6 100644 --- a/packages/service-core/test/src/checksum_cache.test.ts +++ b/packages/service-core/test/src/checksum_cache.test.ts @@ -3,7 +3,7 @@ import { addChecksums, BucketChecksum, InternalOpId, PartialChecksum } from '@/u import { BucketDataSource } from '@powersync/service-sync-rules'; import * as crypto from 'node:crypto'; import { describe, expect, it } from 'vitest'; -import { removeSource } from './sync/BucketChecksumState.test.js'; +import { removeSource } from './utils.js'; /** * Create a deterministic BucketChecksum based on the bucket name and checkpoint for testing purposes. diff --git a/packages/service-core/test/src/sync/BucketChecksumState.test.ts b/packages/service-core/test/src/sync/BucketChecksumState.test.ts index aef48b61d..24b1b2a2f 100644 --- a/packages/service-core/test/src/sync/BucketChecksumState.test.ts +++ b/packages/service-core/test/src/sync/BucketChecksumState.test.ts @@ -17,12 +17,12 @@ import { DEFAULT_HYDRATION_STATE, RequestJwtPayload, ScopedParameterLookup, - SOURCE, SqliteJsonRow, - SqlSyncRules + SqlSyncRules, + versionedHydrationState } from '@powersync/service-sync-rules'; -import { versionedHydrationState } from '@powersync/service-sync-rules'; import { beforeEach, describe, expect, test } from 'vitest'; +import { removeSource, removeSourceSymbol } from '../utils.js'; describe('BucketChecksumState', () => { // Single global[] bucket. @@ -892,23 +892,3 @@ class MockBucketChecksumStateStorage implements BucketChecksumStateStorage { }; } } - -/** - * Removes the source property from an object. - * - * This is for tests where we don't care about this value, and it adds a lot of noise in the output. - */ -export function removeSource(obj: T): Omit { - const { source, ...rest } = obj; - return rest; -} - -/** - * Removes the [SOURCE] symbol property from an object. - * - * This is for tests where we don't care about this value, and it adds a lot of noise in the output. - */ -export function removeSourceSymbol(obj: T): Omit { - const { [SOURCE]: source, ...rest } = obj; - return rest; -} From c58bdd6c635febcf5c702e11dd4f6a78efe21594 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 20 Jan 2026 16:06:25 +0200 Subject: [PATCH 051/101] Initial compacting fixes. --- .../implementation/BucketDefinitionMapping.ts | 2 +- .../storage/implementation/MongoCompactor.ts | 73 ++- .../implementation/MongoParameterCompactor.ts | 1 + .../test/src/storage_compacting.test.ts | 65 +-- .../src/tests/register-compacting-tests.ts | 486 +++++++++--------- .../register-parameter-compacting-tests.ts | 172 +++---- 6 files changed, 411 insertions(+), 388 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts b/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts index 4af2dd666..fc48cfbbd 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/BucketDefinitionMapping.ts @@ -1,5 +1,5 @@ import { ServiceAssertionError } from '@powersync/lib-services-framework'; -import { BucketDataSource, ParameterIndexLookupCreator } from '@powersync/service-sync-rules'; +import { BucketDataSource, ParameterIndexLookupCreator, SqlSyncRules } from '@powersync/service-sync-rules'; import { SyncRuleDocument } from './models.js'; export class BucketDefinitionMapping { diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoCompactor.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoCompactor.ts index a4225096d..0f61708bf 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoCompactor.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoCompactor.ts @@ -14,9 +14,11 @@ import { PowerSyncMongo } from './db.js'; import { BucketDataDocument, BucketDataKey, BucketStateDocument } from './models.js'; import { MongoSyncBucketStorage } from './MongoSyncBucketStorage.js'; import { cacheKey } from './OperationBatch.js'; -import { BucketDataSource } from '@powersync/service-sync-rules'; +import { BucketDataSource, SqlSyncRules } from '@powersync/service-sync-rules'; interface CurrentBucketState { + def: number; + /** Bucket name */ bucket: string; @@ -80,14 +82,12 @@ export class MongoCompactor { private maxOpId: bigint; private buckets: string[] | undefined; private signal?: AbortSignal; - private group_id: number; constructor( private storage: MongoSyncBucketStorage, private db: PowerSyncMongo, options?: MongoCompactOptions ) { - this.group_id = storage.group_id; this.idLimitBytes = (options?.memoryLimitMB ?? DEFAULT_MEMORY_LIMIT_MB) * 1024 * 1024; this.moveBatchLimit = options?.moveBatchLimit ?? DEFAULT_MOVE_BATCH_LIMIT; this.moveBatchQueryLimit = options?.moveBatchQueryLimit ?? DEFAULT_MOVE_BATCH_QUERY_LIMIT; @@ -105,18 +105,23 @@ export class MongoCompactor { */ async compact() { if (this.buckets) { - for (let bucket of this.buckets) { - // We can make this more efficient later on by iterating - // through the buckets in a single query. - // That makes batching more tricky, so we leave for later. - await this.compactSingleBucket(bucket); - } + throw new Error('Compacting specific buckets is not implemented currently'); + // for (let bucket of this.buckets) { + // // We can make this more efficient later on by iterating + // // through the buckets in a single query. + // // That makes batching more tricky, so we leave for later. + // await this.compactSingleBucket(bucket); + // } } else { await this.compactDirtyBuckets(); } } private async compactDirtyBuckets() { + const persistedSyncRules = this.storage.sync_rules.parsed({ defaultSchema: 'n/a' }); + const hydrated = persistedSyncRules.hydratedSyncRules(); + const sqlSyncRules = hydrated.definition; + while (!this.signal?.aborted) { // Process all buckets with 10 or more changes since last time. // We exclude the last 100 compacted buckets, to avoid repeatedly re-compacting the same buckets over and over @@ -125,6 +130,7 @@ export class MongoCompactor { let recentlyCompacted: string[] = []; const buckets = await this.dirtyBucketBatch({ + sqlSyncRules, minBucketChanges: this.minBucketChanges, exclude: recentlyCompacted }); @@ -132,8 +138,8 @@ export class MongoCompactor { // All done break; } - for (let { bucket } of buckets) { - await this.compactSingleBucket(bucket); + for (let { bucket, def } of buckets) { + await this.compactSingleBucket(def, bucket); recentlyCompacted.push(bucket); } if (recentlyCompacted.length > TRACK_RECENTLY_COMPACTED_NUMBER) { @@ -142,10 +148,11 @@ export class MongoCompactor { } } - private async compactSingleBucket(bucket: string) { + private async compactSingleBucket(def: number, bucket: string) { const idLimitBytes = this.idLimitBytes; let currentState: CurrentBucketState = { + def, bucket, seen: new Map(), trackingSize: 0, @@ -159,14 +166,14 @@ export class MongoCompactor { // Constant lower bound const lowerBound: BucketDataKey = { - g: this.group_id, + g: def, b: bucket, o: new mongo.MinKey() as any }; // Upper bound is adjusted for each batch let upperBound: BucketDataKey = { - g: this.group_id, + g: def, b: bucket, o: new mongo.MaxKey() as any }; @@ -294,7 +301,7 @@ export class MongoCompactor { currentState.seen.clear(); if (currentState.lastNotPut != null && currentState.opsSincePut >= 1) { logger.info( - `Inserting CLEAR at ${this.group_id}:${bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations` + `Inserting CLEAR at ${currentState.def}:${bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations` ); // Need flush() before clear() await this.flush(); @@ -321,7 +328,7 @@ export class MongoCompactor { updateOne: { filter: { _id: { - g: this.group_id, + g: state.def, b: state.bucket } }, @@ -384,12 +391,12 @@ export class MongoCompactor { const opFilter = { _id: { $gte: { - g: this.group_id, + g: currentState.def, b: bucket, o: new mongo.MinKey() as any }, $lte: { - g: this.group_id, + g: currentState.def, b: bucket, o: clearOp } @@ -451,7 +458,7 @@ export class MongoCompactor { { _id: { $gte: { - g: this.group_id, + g: currentState.def, b: bucket, o: new mongo.MinKey() as any }, @@ -491,9 +498,10 @@ export class MongoCompactor { * Subset of compact, only populating checksums where relevant. */ async populateChecksums(options: { minBucketChanges: number }): Promise { + const sqlSyncRules = this.storage.sync_rules.parsed({ defaultSchema: 'n/a' }).hydratedSyncRules().definition; let count = 0; while (!this.signal?.aborted) { - const buckets = await this.dirtyBucketBatch(options); + const buckets = await this.dirtyBucketBatch({ ...options, sqlSyncRules }); if (buckets.length == 0) { // All done break; @@ -525,17 +533,26 @@ export class MongoCompactor { * set estimate_since_compact.count: 0 when done, before fetching the next batch. */ private async dirtyBucketBatch(options: { + sqlSyncRules: SqlSyncRules; minBucketChanges: number; exclude?: string[]; - }): Promise<{ bucket: string; estimatedCount: number; source: BucketDataSource }[]> { + }): Promise<{ def: number; bucket: string; estimatedCount: number; source: BucketDataSource }[]> { if (options.minBucketChanges <= 0) { throw new ReplicationAssertionError('minBucketChanges must be >= 1'); } + + const mapping = this.storage.sync_rules.mapping; + let definitions = new Map(); + for (let source of options.sqlSyncRules.bucketDataSources) { + const id = mapping.bucketSourceId(source); + definitions.set(id, source); + } + // We make use of an index on {_id.g: 1, 'estimate_since_compact.count': -1} const dirtyBuckets = await this.db.bucket_state .find( { - '_id.g': this.group_id, + '_id.g': { $in: [...definitions.keys()] }, 'estimate_since_compact.count': { $gte: options.minBucketChanges }, '_id.b': { $nin: options.exclude ?? [] } }, @@ -555,13 +572,16 @@ export class MongoCompactor { .toArray(); return dirtyBuckets.map((bucket) => ({ + def: bucket._id.g, bucket: bucket._id.b, estimatedCount: bucket.estimate_since_compact!.count + (bucket.compacted_state?.count ?? 0), - source: null as any // FIXME: Implement this + source: definitions.get(bucket._id.g)! })); } private async updateChecksumsBatch(buckets: BucketChecksumRequest[]) { + const sourceMap = new Map(buckets.map((b) => [b.bucket, b.source])); + const checksums = await this.storage.checksums.computePartialChecksumsDirect( buckets.map((bucket) => { return { @@ -573,6 +593,11 @@ export class MongoCompactor { ); for (let bucketChecksum of checksums.values()) { + const source = sourceMap.get(bucketChecksum.bucket); + if (!source) { + throw new ServiceAssertionError(`Unknown source for bucket ${bucketChecksum.bucket}`); + } + const sourceId = this.storage.sync_rules.mapping.bucketSourceId(source); if (isPartialChecksum(bucketChecksum)) { // Should never happen since we don't specify `start` throw new ServiceAssertionError(`Full checksum expected, got ${JSON.stringify(bucketChecksum)}`); @@ -582,7 +607,7 @@ export class MongoCompactor { updateOne: { filter: { _id: { - g: this.group_id, + g: sourceId, b: bucketChecksum.bucket } }, diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoParameterCompactor.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoParameterCompactor.ts index 3b7f6add6..6b5deb4d5 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoParameterCompactor.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoParameterCompactor.ts @@ -4,6 +4,7 @@ import { LRUCache } from 'lru-cache'; import { PowerSyncMongo } from './db.js'; import { mongo } from '@powersync/lib-service-mongodb'; import { BucketParameterDocument } from './models.js'; +import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; /** * Compacts parameter lookup data (the bucket_parameters collection). diff --git a/modules/module-mongodb-storage/test/src/storage_compacting.test.ts b/modules/module-mongodb-storage/test/src/storage_compacting.test.ts index 264399477..908826034 100644 --- a/modules/module-mongodb-storage/test/src/storage_compacting.test.ts +++ b/modules/module-mongodb-storage/test/src/storage_compacting.test.ts @@ -6,37 +6,42 @@ import { storage, SyncRulesBucketStorage } from '@powersync/service-core'; describe('Mongo Sync Bucket Storage Compact', () => { register.registerCompactTests(INITIALIZED_MONGO_STORAGE_FACTORY); - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], INITIALIZED_MONGO_STORAGE_FACTORY); - describe('with blank bucket_state', () => { // This can happen when migrating from older service versions, that did not populate bucket_state yet. - const populate = async (bucketStorage: SyncRulesBucketStorage) => { - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - owner_id: 'u1' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - owner_id: 'u2' - }, - afterReplicaId: test_utils.rid('t2') - }); - - await batch.commit('1/1'); + const populate = async (bucketStorage: SyncRulesBucketStorage, sourceTableIndex: number) => { + await using writer = await bucketStorage.factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + + const sourceTable = await test_utils.resolveTestTable( + writer, + 'test', + ['id'], + INITIALIZED_MONGO_STORAGE_FACTORY, + sourceTableIndex + ); + await writer.markAllSnapshotDone('1/1'); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + owner_id: 'u1' + }, + afterReplicaId: test_utils.rid('t1') + }); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + owner_id: 'u2' + }, + afterReplicaId: test_utils.rid('t2') }); + await writer.commitAll('1/1'); + return bucketStorage.getCheckpoint(); }; @@ -51,7 +56,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - const { checkpoint } = await populate(bucketStorage); + const { checkpoint } = await populate(bucketStorage, 1); return { bucketStorage, checkpoint, factory, syncRules }; }; @@ -91,7 +96,7 @@ bucket_definitions: // Populate old sync rules version const { factory } = await setup(); - // Not populate another version (bucket definition name changed) + // Now populate another version (bucket definition name changed) const syncRules = await factory.updateSyncRules({ content: ` bucket_definitions: @@ -102,7 +107,7 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(syncRules); - await populate(bucketStorage); + await populate(bucketStorage, 2); const { checkpoint } = await bucketStorage.getCheckpoint(); // Default is to small small numbers - should be a no-op diff --git a/packages/service-core-tests/src/tests/register-compacting-tests.ts b/packages/service-core-tests/src/tests/register-compacting-tests.ts index 4475d4b7e..4b6f7b5e1 100644 --- a/packages/service-core-tests/src/tests/register-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-compacting-tests.ts @@ -5,7 +5,6 @@ import { bucketRequest } from '../test-utils/test-utils-index.js'; export function registerCompactTests(config: storage.TestStorageConfig) { const generateStorageFactory = config.factory; - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); test('compacting (1)', async () => { await using factory = await generateStorageFactory(); @@ -17,40 +16,41 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); const request = bucketRequest(syncRules); - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2' - }, - afterReplicaId: test_utils.rid('t2') - }); + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1' + }, + afterReplicaId: test_utils.rid('t1') + }); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't2' - }, - afterReplicaId: test_utils.rid('t2') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2' + }, + afterReplicaId: test_utils.rid('t2') + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't2' + }, + afterReplicaId: test_utils.rid('t2') }); + const result = await writer.flush(); + await writer.commitAll('1/1'); + const checkpoint = result!.flushed_op; const request2 = bucketRequest(syncRules); @@ -122,48 +122,49 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1' + }, + afterReplicaId: test_utils.rid('t1') + }); - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2' - }, - afterReplicaId: test_utils.rid('t2') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't1' - }, - beforeReplicaId: test_utils.rid('t1') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2' + }, + afterReplicaId: test_utils.rid('t2') + }); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't2' - }, - afterReplicaId: test_utils.rid('t2') - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't1' + }, + beforeReplicaId: test_utils.rid('t1') + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't2' + }, + afterReplicaId: test_utils.rid('t2') }); + const result = await writer.flush(); + await writer.commitAll('1/1'); + const checkpoint = result!.flushed_op; const request = bucketRequest(syncRules); @@ -236,54 +237,54 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1' + }, + afterReplicaId: 't1' + }); - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2' - }, - afterReplicaId: 't2' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't1' - }, - beforeReplicaId: 't1' - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2' + }, + afterReplicaId: 't2' + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't1' + }, + beforeReplicaId: 't1' }); + const result = await writer.flush(); + await writer.commitAll('1/1'); + const checkpoint1 = result!.flushed_op; const request = bucketRequest(syncRules); await bucketStorage.getChecksums(checkpoint1, [request]); - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't2' - }, - beforeReplicaId: 't2' - }); - await batch.commit('2/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't2' + }, + beforeReplicaId: 't2' }); + const result2 = await writer.flush(); + await writer.commitAll('2/1'); const checkpoint2 = result2!.flushed_op; await bucketStorage.compact({ @@ -323,77 +324,77 @@ bucket_definitions: - select * from test where b = bucket.b` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + /** + * Repeatedly create operations which fall into different buckets. + * The bucket operations are purposely interleaved as the op_id increases. + * A large amount of operations are created here. + * The configured window of compacting operations is 100. This means the initial window will + * contain operations from multiple buckets. + */ + for (let count = 0; count < 100; count++) { + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + b: 'b1', + value: 'start' + }, + afterReplicaId: test_utils.rid('t1') + }); - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - /** - * Repeatedly create operations which fall into different buckets. - * The bucket operations are purposely interleaved as the op_id increases. - * A large amount of operations are created here. - * The configured window of compacting operations is 100. This means the initial window will - * contain operations from multiple buckets. - */ - for (let count = 0; count < 100; count++) { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - b: 'b1', - value: 'start' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't1', - b: 'b1', - value: 'intermediate' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - b: 'b2', - value: 'start' - }, - afterReplicaId: test_utils.rid('t2') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't1', - b: 'b1', - value: 'final' - }, - afterReplicaId: test_utils.rid('t1') - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't2', - b: 'b2', - value: 'final' - }, - afterReplicaId: test_utils.rid('t2') - }); - - await batch.commit('1/1'); - } - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't1', + b: 'b1', + value: 'intermediate' + }, + afterReplicaId: test_utils.rid('t1') + }); - const checkpoint = result!.flushed_op; + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + b: 'b2', + value: 'start' + }, + afterReplicaId: test_utils.rid('t2') + }); + + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't1', + b: 'b1', + value: 'final' + }, + afterReplicaId: test_utils.rid('t1') + }); + + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't2', + b: 'b2', + value: 'final' + }, + afterReplicaId: test_utils.rid('t2') + }); + + await writer.commitAll('1/1'); + } + + const checkpoint = (await bucketStorage.getCheckpoint()).checkpoint; await bucketStorage.compact({ clearBatchLimit: 100, @@ -444,39 +445,39 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1' + }, + afterReplicaId: 't1' + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2' - }, - afterReplicaId: 't2' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't1' - }, - beforeReplicaId: 't1' - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2' + }, + afterReplicaId: 't2' + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't1' + }, + beforeReplicaId: 't1' }); + await writer.commitAll('1/1'); + await bucketStorage.compact({ clearBatchLimit: 2, moveBatchLimit: 1, @@ -484,17 +485,16 @@ bucket_definitions: minBucketChanges: 1 }); - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't2' - }, - beforeReplicaId: 't2' - }); - await batch.commit('2/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't2' + }, + beforeReplicaId: 't2' }); + const result2 = await writer.flush(); + await writer.commitAll('2/1'); const checkpoint2 = result2!.flushed_op; await bucketStorage.clearChecksumCache(); const request = bucketRequest(syncRules); @@ -515,44 +515,44 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); const request = bucketRequest(syncRules); - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't1' - }, - afterReplicaId: 't1' - }); + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1' + }, + afterReplicaId: 't1' + }); - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't1' + }, + afterReplicaId: 't1' }); + const result = await writer.flush(); + await writer.commitAll('1/1'); + // Get checksums here just to populate the cache await bucketStorage.getChecksums(result!.flushed_op, [request]); - const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't1' - }, - beforeReplicaId: 't1' - }); - await batch.commit('2/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't1' + }, + beforeReplicaId: 't1' }); + const result2 = await writer.flush(); + await writer.commitAll('2/1'); await bucketStorage.compact({ clearBatchLimit: 20, diff --git a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts index 7d21ed8d6..4a7f5a311 100644 --- a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts @@ -6,8 +6,6 @@ import * as test_utils from '../test-utils/test-utils-index.js'; export function registerParameterCompactTests(config: storage.TestStorageConfig) { const generateStorageFactory = config.factory; - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); - test('compacting parameters', async () => { await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ @@ -19,60 +17,58 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1' + }, + afterReplicaId: 't1' + }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1' - }, - afterReplicaId: 't1' - }); - - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2' - }, - afterReplicaId: 't2' - }); - - await batch.commit('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2' + }, + afterReplicaId: 't2' }); + await writer.commitAll('1/1'); + const lookup = ScopedParameterLookup.direct({ lookupName: 'test', queryId: '1', source: null as any }, ['t1']); const checkpoint1 = await bucketStorage.getCheckpoint(); const parameters1 = await checkpoint1.getParameterSets([lookup]); expect(parameters1).toEqual([{ id: 't1' }]); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - before: { - id: 't1' - }, - beforeReplicaId: 't1', - after: { - id: 't1' - }, - afterReplicaId: 't1' - }); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 't1' + }, + beforeReplicaId: 't1', + after: { + id: 't1' + }, + afterReplicaId: 't1' + }); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't1' - }, - beforeReplicaId: 't1' - }); - await batch.commit('1/2'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't1' + }, + beforeReplicaId: 't1' }); + await writer.commitAll('1/2'); const checkpoint2 = await bucketStorage.getCheckpoint(); const parameters2 = await checkpoint2.getParameterSets([lookup]); expect(parameters2).toEqual([]); @@ -103,57 +99,53 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't1', - uid: 'u1' - }, - afterReplicaId: 't1' - }); - // Interleave with another operation, to evict the other cache entry when compacting. - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 't2', - uid: 'u1' - }, - afterReplicaId: 't2' - }); - - await batch.commit('1/1'); + await writer.markAllSnapshotDone('1/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't1', + uid: 'u1' + }, + afterReplicaId: 't1' }); + // Interleave with another operation, to evict the other cache entry when compacting. + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 't2', + uid: 'u1' + }, + afterReplicaId: 't2' + }); + + await writer.commitAll('1/1'); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.DELETE, - before: { - id: 't1', - uid: 'u1' - }, - beforeReplicaId: 't1' - }); - await batch.commit('2/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 't1', + uid: 'u1' + }, + beforeReplicaId: 't1' }); + await writer.commitAll('2/1'); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.save({ - sourceTable: TEST_TABLE, - tag: storage.SaveOperationTag.UPDATE, - after: { - id: 't2', - uid: 'u2' - }, - afterReplicaId: 't2' - }); - await batch.commit('3/1'); + await writer.save({ + sourceTable: testTable, + tag: storage.SaveOperationTag.UPDATE, + after: { + id: 't2', + uid: 'u2' + }, + afterReplicaId: 't2' }); + await writer.commitAll('3/1'); const lookup = ScopedParameterLookup.direct({ lookupName: 'test', queryId: '1', source: null as any }, ['u1']); From b5baf5c6800a68a69ae3734ac3d83d1022df4de7 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 20 Jan 2026 16:22:18 +0200 Subject: [PATCH 052/101] Fix parameter compacting. --- .../implementation/MongoParameterCompactor.ts | 13 ++++++++----- .../implementation/MongoSyncBucketStorage.ts | 2 +- .../src/__snapshots__/storage_sync.test.ts.snap | 6 +++--- .../src/test-utils/general-utils.ts | 1 - .../tests/register-parameter-compacting-tests.ts | 2 +- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoParameterCompactor.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoParameterCompactor.ts index 6b5deb4d5..c567c1d49 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoParameterCompactor.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoParameterCompactor.ts @@ -1,10 +1,10 @@ +import { mongo } from '@powersync/lib-service-mongodb'; import { logger } from '@powersync/lib-services-framework'; import { bson, CompactOptions, InternalOpId } from '@powersync/service-core'; import { LRUCache } from 'lru-cache'; import { PowerSyncMongo } from './db.js'; -import { mongo } from '@powersync/lib-service-mongodb'; import { BucketParameterDocument } from './models.js'; -import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; +import { MongoSyncBucketStorage } from './MongoSyncBucketStorage.js'; /** * Compacts parameter lookup data (the bucket_parameters collection). @@ -16,13 +16,13 @@ import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; export class MongoParameterCompactor { constructor( private db: PowerSyncMongo, - private group_id: number, + private storage: MongoSyncBucketStorage, private checkpoint: InternalOpId, private options: CompactOptions ) {} async compact() { - logger.info(`Compacting parameters for group ${this.group_id} up to checkpoint ${this.checkpoint}`); + logger.info(`Compacting parameters for group ${this.storage.group_id} up to checkpoint ${this.checkpoint}`); // This is the currently-active checkpoint. // We do not remove any data that may be used by this checkpoint. // snapshot queries ensure that if any clients are still using older checkpoints, they would @@ -33,9 +33,12 @@ export class MongoParameterCompactor { // In theory, we could let MongoDB do more of the work here, by grouping by (key, lookup) // in MongoDB already. However, that risks running into cases where MongoDB needs to process // very large amounts of data before returning results, which could lead to timeouts. + + // Note: This does _not_ currently filter by sync rules version. + // We may need to change the storage structure to group by parameter index lookup creator id in the future. const cursor = this.db.bucket_parameters.find( { - 'key.g': this.group_id + 'key.g': 0 }, { sort: { lookup: 1, _id: 1 }, diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 5670c8e26..41889308c 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -675,7 +675,7 @@ export class MongoSyncBucketStorage await new MongoCompactor(this, this.db, { ...options, maxOpId }).compact(); if (maxOpId != null && options?.compactParameterData) { - await new MongoParameterCompactor(this.db, this.group_id, maxOpId, options).compact(); + await new MongoParameterCompactor(this.db, this, maxOpId, options).compact(); } } diff --git a/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap b/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap index fb2585a0e..579d24666 100644 --- a/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap +++ b/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap @@ -36,7 +36,7 @@ exports[`sync - mongodb > compacting data - invalidate checkpoint 2`] = ` { "data": { "after": "0", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": -93886621, @@ -54,7 +54,7 @@ exports[`sync - mongodb > compacting data - invalidate checkpoint 2`] = ` "removed_buckets": [], "updated_buckets": [ { - "bucket": "mybucket[]", + "bucket": "10002[]", "checksum": 499012468, "count": 4, "priority": 3, @@ -71,7 +71,7 @@ exports[`sync - mongodb > compacting data - invalidate checkpoint 2`] = ` { "data": { "after": "2", - "bucket": "mybucket[]", + "bucket": "10002[]", "data": [ { "checksum": 1859363232, diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index 847060f27..000194eb9 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -104,7 +104,6 @@ export async function resolveTestTable( throw new Error('idGenerator called multiple times - not supported in tests'); } didGenerateId = true; - console.log('got id', name, id); return id; } }); diff --git a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts index 4a7f5a311..c8ab8dc22 100644 --- a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts @@ -41,7 +41,7 @@ bucket_definitions: await writer.commitAll('1/1'); - const lookup = ScopedParameterLookup.direct({ lookupName: 'test', queryId: '1', source: null as any }, ['t1']); + const lookup = ScopedParameterLookup.direct({ lookupName: '20002', queryId: '', source: null as any }, ['t1']); const checkpoint1 = await bucketStorage.getCheckpoint(); const parameters1 = await checkpoint1.getParameterSets([lookup]); From e81181a8c827451fe2e58b869a38f0e78efb7fbd Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 20 Jan 2026 16:32:57 +0200 Subject: [PATCH 053/101] Add missing test file. --- packages/service-core/test/src/utils.ts | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 packages/service-core/test/src/utils.ts diff --git a/packages/service-core/test/src/utils.ts b/packages/service-core/test/src/utils.ts new file mode 100644 index 000000000..d37876be2 --- /dev/null +++ b/packages/service-core/test/src/utils.ts @@ -0,0 +1,20 @@ +import { SOURCE } from '@powersync/service-sync-rules'; +/** + * Removes the source property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSource(obj: T): Omit { + const { source, ...rest } = obj; + return rest; +} + +/** + * Removes the [SOURCE] symbol property from an object. + * + * This is for tests where we don't care about this value, and it adds a lot of noise in the output. + */ +export function removeSourceSymbol(obj: T): Omit { + const { [SOURCE]: source, ...rest } = obj; + return rest; +} From c46ce88ea86efd221d456e12760daea135826612 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 10:20:58 +0200 Subject: [PATCH 054/101] Test fix round 4. --- .../test/src/storage_sync.test.ts | 111 +++++++++--------- .../test/src/storage.test.ts | 110 ++++++++--------- .../src/test-utils/general-utils.ts | 45 +------ 3 files changed, 113 insertions(+), 153 deletions(-) diff --git a/modules/module-mongodb-storage/test/src/storage_sync.test.ts b/modules/module-mongodb-storage/test/src/storage_sync.test.ts index 55ab5cdd2..02037c6f4 100644 --- a/modules/module-mongodb-storage/test/src/storage_sync.test.ts +++ b/modules/module-mongodb-storage/test/src/storage_sync.test.ts @@ -5,77 +5,78 @@ import { INITIALIZED_MONGO_STORAGE_FACTORY } from './util.js'; describe('sync - mongodb', () => { register.registerSyncTests(INITIALIZED_MONGO_STORAGE_FACTORY); - const TEST_TABLE = test_utils.makeTestTable('test', ['id'], INITIALIZED_MONGO_STORAGE_FACTORY); // The split of returned results can vary depending on storage drivers test('large batch (2)', async () => { // Test syncing a batch of data that is small in count, // but large enough in size to be split over multiple returned chunks. // Similar to the above test, but splits over 1MB chunks. - const sync_rules = test_utils.testRules( - ` + + await using factory = await INITIALIZED_MONGO_STORAGE_FACTORY.factory(); + const syncRules = await factory.updateSyncRules({ + content: ` bucket_definitions: global: data: - SELECT id, description FROM "%" ` - ); - await using factory = await INITIALIZED_MONGO_STORAGE_FACTORY.factory(); - const bucketStorage = factory.getInstance(sync_rules); - - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; - - const largeDescription = '0123456789'.repeat(2_000_00); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'large1', - description: largeDescription - }, - afterReplicaId: test_utils.rid('large1') - }); - - // Large enough to split the returned batch - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'large2', - description: largeDescription - }, - afterReplicaId: test_utils.rid('large2') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test3', - description: 'test3' - }, - afterReplicaId: test_utils.rid('test3') - }); }); + const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], INITIALIZED_MONGO_STORAGE_FACTORY); + + const largeDescription = '0123456789'.repeat(2_000_00); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') + }); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'large1', + description: largeDescription + }, + afterReplicaId: test_utils.rid('large1') + }); + + // Large enough to split the returned batch + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'large2', + description: largeDescription + }, + afterReplicaId: test_utils.rid('large2') + }); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test3', + description: 'test3' + }, + afterReplicaId: test_utils.rid('test3') + }); + + const result = await writer.flush(); const checkpoint = result!.flushed_op; const options: storage.BucketDataBatchOptions = {}; const batch1 = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(sync_rules, 'global[]', 0n)], options) + bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules, 'global[]', 0n)], options) ); expect(test_utils.getBatchData(batch1)).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 }, @@ -90,7 +91,7 @@ describe('sync - mongodb', () => { const batch2 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - [bucketRequest(sync_rules, 'global[]', batch1[0].chunkData.next_after)], + [bucketRequest(syncRules, 'global[]', batch1[0].chunkData.next_after)], options ) ); @@ -106,7 +107,7 @@ describe('sync - mongodb', () => { const batch3 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - [bucketRequest(sync_rules, 'global[]', batch2[0].chunkData.next_after)], + [bucketRequest(syncRules, 'global[]', batch2[0].chunkData.next_after)], options ) ); diff --git a/modules/module-postgres-storage/test/src/storage.test.ts b/modules/module-postgres-storage/test/src/storage.test.ts index 672026e55..2d4f43944 100644 --- a/modules/module-postgres-storage/test/src/storage.test.ts +++ b/modules/module-postgres-storage/test/src/storage.test.ts @@ -24,70 +24,70 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { // Test syncing a batch of data that is small in count, // but large enough in size to be split over multiple returned chunks. // Similar to the above test, but splits over 1MB chunks. - const sync_rules = test_utils.testRules( - ` + await using factory = await POSTGRES_STORAGE_FACTORY.factory(); + const syncRules = await factory.updateSyncRules({ + content: ` bucket_definitions: global: data: - SELECT id, description FROM "%" ` - ); - await using factory = await POSTGRES_STORAGE_FACTORY.factory(); - const bucketStorage = factory.getInstance(sync_rules); - - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = test_utils.makeTestTable('test', ['id'], POSTGRES_STORAGE_FACTORY); - - const largeDescription = '0123456789'.repeat(2_000_00); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test1', - description: 'test1' - }, - afterReplicaId: test_utils.rid('test1') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'large1', - description: largeDescription - }, - afterReplicaId: test_utils.rid('large1') - }); - - // Large enough to split the returned batch - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'large2', - description: largeDescription - }, - afterReplicaId: test_utils.rid('large2') - }); - - await batch.save({ - sourceTable, - tag: storage.SaveOperationTag.INSERT, - after: { - id: 'test3', - description: 'test3' - }, - afterReplicaId: test_utils.rid('test3') - }); }); + const bucketStorage = factory.getInstance(syncRules); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], POSTGRES_STORAGE_FACTORY); + + const largeDescription = '0123456789'.repeat(2_000_00); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') + }); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'large1', + description: largeDescription + }, + afterReplicaId: test_utils.rid('large1') + }); + + // Large enough to split the returned batch + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'large2', + description: largeDescription + }, + afterReplicaId: test_utils.rid('large2') + }); + + await writer.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test3', + description: 'test3' + }, + afterReplicaId: test_utils.rid('test3') + }); + + const result = await writer.flush(); const checkpoint = result!.flushed_op; const options: storage.BucketDataBatchOptions = {}; const batch1 = await test_utils.fromAsync( - bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(sync_rules, 'global[]', 0n)], options) + bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules, 'global[]', 0n)], options) ); expect(test_utils.getBatchData(batch1)).toEqual([ { op_id: '1', op: 'PUT', object_id: 'test1', checksum: 2871785649 } @@ -101,7 +101,7 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { const batch2 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - [bucketRequest(sync_rules, 'global[]', batch1[0].chunkData.next_after)], + [bucketRequest(syncRules, 'global[]', batch1[0].chunkData.next_after)], options ) ); @@ -117,7 +117,7 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { const batch3 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - [bucketRequest(sync_rules, 'global[]', batch2[0].chunkData.next_after)], + [bucketRequest(syncRules, 'global[]', batch2[0].chunkData.next_after)], options ) ); @@ -133,7 +133,7 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { const batch4 = await test_utils.fromAsync( bucketStorage.getBucketDataBatch( checkpoint, - [bucketRequest(sync_rules, 'global[]', batch3[0].chunkData.next_after)], + [bucketRequest(syncRules, 'global[]', batch3[0].chunkData.next_after)], options ) ); diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index 000194eb9..8bb5e42d4 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -21,49 +21,6 @@ export const BATCH_OPTIONS: storage.StartBatchOptions = { storeCurrentData: true }; -export function testRules(content: string): storage.PersistedSyncRulesContent { - return { - id: 1, - sync_rules_content: content, - slot_name: 'test', - active: true, - last_checkpoint_lsn: '', - parsed(options) { - return { - id: 1, - sync_rules: SqlSyncRules.fromYaml(content, options), - slot_name: 'test', - hydratedSyncRules() { - return this.sync_rules.hydrate({ hydrationState: versionedHydrationState(1) }); - }, - hydrationState: versionedHydrationState(1) - }; - }, - lock() { - throw new Error('Not implemented'); - } - }; -} - -export function makeTestTable( - name: string, - replicaIdColumns: string[] | undefined, - options: { tableIdStrings: boolean } -) { - const relId = utils.hashData('table', name, (replicaIdColumns ?? ['id']).join(',')); - const id = - options.tableIdStrings == false ? new bson.ObjectId('6544e3899293153fa7b38331') : '6544e3899293153fa7b38331'; - return new storage.SourceTable({ - id: id, - connectionTag: storage.SourceTable.DEFAULT_TAG, - objectId: relId, - schema: 'public', - name: name, - replicaIdColumns: (replicaIdColumns ?? ['id']).map((column) => ({ name: column, type: 'VARCHAR', typeId: 25 })), - snapshotComplete: true - }); -} - export async function resolveTestTable( writer: storage.BucketDataWriter, name: string, @@ -72,6 +29,8 @@ export async function resolveTestTable( idIndex: number = 1 ) { const relId = utils.hashData('table', name, (replicaIdColumns ?? ['id']).join(',')); + // Semi-hardcoded id for tests, to get consistent output. + // If the same test uses multiple tables, pass idIndex to get different ids. const idString = '6544e3899293153fa7b383' + (30 + idIndex).toString().padStart(2, '0'); const id = options.tableIdStrings == false ? new bson.ObjectId(idString) : idString; From ebdae0e98f1d7ffd4029a8b491884b1cb669ac2a Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 10:36:05 +0200 Subject: [PATCH 055/101] Another test fix. --- .../__snapshots__/storage_sync.test.ts.snap | 8 +++---- .../src/tests/register-sync-tests.ts | 22 +++++++++++++++---- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap b/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap index 579d24666..436738361 100644 --- a/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap +++ b/modules/module-mongodb-storage/test/src/__snapshots__/storage_sync.test.ts.snap @@ -165,7 +165,7 @@ exports[`sync - mongodb > encodes sync rules id in buckets for streams 2`] = ` "checkpoint": { "buckets": [ { - "bucket": "10002[]", + "bucket": "20002[]", "checksum": 920318466, "count": 1, "priority": 3, @@ -181,7 +181,7 @@ exports[`sync - mongodb > encodes sync rules id in buckets for streams 2`] = ` { "errors": [], "is_default": true, - "name": "test", + "name": "test2", }, ], "write_checkpoint": undefined, @@ -190,7 +190,7 @@ exports[`sync - mongodb > encodes sync rules id in buckets for streams 2`] = ` { "data": { "after": "0", - "bucket": "2#test|0[]", + "bucket": "20002[]", "data": [ { "checksum": 920318466, @@ -199,7 +199,7 @@ exports[`sync - mongodb > encodes sync rules id in buckets for streams 2`] = ` "object_type": "test", "op": "PUT", "op_id": "2", - "subkey": "e5aa2ddc-1328-58fa-a000-0b5ed31eaf1a", + "subkey": "bfe6a7fc-1a36-5a95-877f-518ff63ecb56", }, ], "has_more": false, diff --git a/packages/service-core-tests/src/tests/register-sync-tests.ts b/packages/service-core-tests/src/tests/register-sync-tests.ts index ef011fc86..b50ac9844 100644 --- a/packages/service-core-tests/src/tests/register-sync-tests.ts +++ b/packages/service-core-tests/src/tests/register-sync-tests.ts @@ -1201,7 +1201,10 @@ bucket_definitions: test('encodes sync rules id in buckets for streams', async () => { await using f = await factory(); - const rules = ` + // This test relies making an actual update to sync rules to test the different bucket names. + // The actual naming scheme may change, as long as the two buckets have different names. + const rules = [ + ` streams: test: auto_subscribe: true @@ -1209,15 +1212,26 @@ streams: config: edition: 2 -`; +`, + ` +streams: + test2: + auto_subscribe: true + query: SELECT * FROM test WHERE 1; + +config: + edition: 2 +` + ]; for (let i = 0; i < 2; i++) { const syncRules = await f.updateSyncRules({ - content: rules + content: rules[i] }); const bucketStorage = f.getInstance(syncRules); await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); - const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); + + const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config, i + 1); await writer.markAllSnapshotDone('0/1'); await writer.save({ From bafd3225cf859fcde91fc7b809dc2d226ff37b79 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 11:13:20 +0200 Subject: [PATCH 056/101] Improve queue responsiveness. --- .../src/replication/MongoSnapshotter.ts | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/modules/module-mongodb/src/replication/MongoSnapshotter.ts b/modules/module-mongodb/src/replication/MongoSnapshotter.ts index 209596db1..c3ba11cb2 100644 --- a/modules/module-mongodb/src/replication/MongoSnapshotter.ts +++ b/modules/module-mongodb/src/replication/MongoSnapshotter.ts @@ -11,7 +11,7 @@ import { InternalOpId, MetricsEngine, SaveOperationTag, SourceTable, storage } f import { DatabaseInputRow, RowProcessor, SqliteInputRow, SqliteRow, TablePattern } from '@powersync/service-sync-rules'; import { ReplicationMetric } from '@powersync/service-types'; import * as timers from 'node:timers/promises'; -import pDefer from 'p-defer'; +import pDefer, { DeferredPromise } from 'p-defer'; import { MongoLSN } from '../common/MongoLSN.js'; import { PostImagesOption } from '../types/types.js'; import { escapeRegExp } from '../utils.js'; @@ -60,6 +60,7 @@ export class MongoSnapshotter { private changeStreamTimeout: number; private queue = new Set(); + private nextItemQueued: DeferredPromise | null = null; private initialSnapshotDone = pDefer(); private lastSnapshotOpId: InternalOpId | null = null; @@ -75,6 +76,11 @@ export class MongoSnapshotter { this.logger = options.logger ?? defaultLogger; this.checkpointStreamId = options.checkpointStreamId; this.changeStreamTimeout = Math.ceil(this.client.options.socketTimeoutMS * 0.9); + + this.abortSignal.addEventListener('abort', () => { + // Wake up the queue if is waiting for items + this.nextItemQueued?.resolve(); + }); } private get usePostImages() { @@ -130,6 +136,7 @@ export class MongoSnapshotter { for (let table of tablesWithStatus) { this.queue.add(table); } + this.nextItemQueued?.resolve(); } async waitForInitialSnapshot() { @@ -147,7 +154,12 @@ export class MongoSnapshotter { const table = this.queue.values().next().value; if (table == null) { this.initialSnapshotDone.resolve(); - await timers.setTimeout(500, { signal: this.abortSignal }); + // There must be no await in between checking the queue above and creating this deferred promise, + // otherwise we may miss new items being queued. + this.nextItemQueued = pDefer(); + await this.nextItemQueued.promise; + this.nextItemQueued = null; + // At this point, either we have have a new item in the queue, or we are aborted. continue; } @@ -214,9 +226,15 @@ export class MongoSnapshotter { this.logger.info(`Flushed snapshot at ${this.lastSnapshotOpId}`); } + private queueTable(table: storage.SourceTable) { + // These two operations must be atomic to avoid race conditions + this.queue.add(table); + this.nextItemQueued?.resolve(); + } + async queueSnapshot(writer: storage.BucketDataWriter, table: storage.SourceTable) { await writer.markTableSnapshotRequired(table); - this.queue.add(table); + this.queueTable(table); } async estimatedCount(table: storage.SourceTable): Promise { From ab4a86727be5cc35ee35751e46055cd40fac0e2b Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 12:32:51 +0200 Subject: [PATCH 057/101] Postgres storage fixes part 1. --- .../implementation/MongoBucketBatch.ts | 9 +- .../src/replication/MongoSnapshotter.ts | 16 -- .../storage/PostgresBucketStorageFactory.ts | 25 ++- .../src/storage/PostgresSyncRulesStorage.ts | 3 +- .../src/storage/batch/PostgresBucketBatch.ts | 7 +- .../src/storage/batch/PostgresWriter.ts | 178 ++++++++++++++++++ .../__snapshots__/storage_sync.test.ts.snap | 110 +++++++++++ .../src/storage/BucketStorageBatch.ts | 12 +- packages/sync-rules/src/HydratedSyncRules.ts | 72 +++++++ 9 files changed, 392 insertions(+), 40 deletions(-) create mode 100644 modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index f7c16e742..6cae94f84 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -1,11 +1,5 @@ import { mongo } from '@powersync/lib-service-mongodb'; -import { - RowProcessor, - SourceTableInterface, - SqlEventDescriptor, - SqliteRow, - SqliteValue -} from '@powersync/service-sync-rules'; +import { RowProcessor, SqlEventDescriptor, SqliteRow, SqliteValue } from '@powersync/service-sync-rules'; import * as bson from 'bson'; import { @@ -26,7 +20,6 @@ import { maxLsn, SaveOperationTag, SourceTable, - SourceTableId, storage, SyncRuleState, utils diff --git a/modules/module-mongodb/src/replication/MongoSnapshotter.ts b/modules/module-mongodb/src/replication/MongoSnapshotter.ts index c3ba11cb2..0b6b75372 100644 --- a/modules/module-mongodb/src/replication/MongoSnapshotter.ts +++ b/modules/module-mongodb/src/replication/MongoSnapshotter.ts @@ -10,7 +10,6 @@ import { import { InternalOpId, MetricsEngine, SaveOperationTag, SourceTable, storage } from '@powersync/service-core'; import { DatabaseInputRow, RowProcessor, SqliteInputRow, SqliteRow, TablePattern } from '@powersync/service-sync-rules'; import { ReplicationMetric } from '@powersync/service-types'; -import * as timers from 'node:timers/promises'; import pDefer, { DeferredPromise } from 'p-defer'; import { MongoLSN } from '../common/MongoLSN.js'; import { PostImagesOption } from '../types/types.js'; @@ -371,21 +370,6 @@ export class MongoSnapshotter { return rowProcessor.applyRowContext(inputRow); } - private async getCollectionInfo(db: string, name: string): Promise { - const collection = ( - await this.client - .db(db) - .listCollections( - { - name: name - }, - { nameOnly: false } - ) - .toArray() - )[0]; - return collection; - } - private async checkPostImages(db: string, collectionInfo: mongo.CollectionInfo) { if (!this.usePostImages) { // Nothing to check diff --git a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts index 7feda539d..8ba87be60 100644 --- a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts +++ b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts @@ -9,10 +9,11 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; import { models, NormalizedPostgresStorageConfig } from '../types/types.js'; import { NOTIFICATION_CHANNEL, STORAGE_SCHEMA_NAME } from '../utils/db.js'; -import { notifySyncRulesUpdate } from './batch/PostgresBucketBatch.js'; +import { notifySyncRulesUpdate, PostgresBucketBatch } from './batch/PostgresBucketBatch.js'; import { PostgresSyncRulesStorage } from './PostgresSyncRulesStorage.js'; import { PostgresPersistedSyncRulesContent } from './sync-rules/PostgresPersistedSyncRulesContent.js'; import { getStorageApplicationName } from '../utils/application-name.js'; +import { PostgresWriter } from './batch/PostgresWriter.js'; export type PostgresBucketStorageOptions = { config: NormalizedPostgresStorageConfig; @@ -43,11 +44,27 @@ export class PostgresBucketStorageFactory }); } - createCombinedWriter( - storage: SyncRulesBucketStorage[], + async createCombinedWriter( + storages: SyncRulesBucketStorage[], options: storage.StartBatchOptions ): Promise { - throw new Error('Not implemented yet'); + const syncRules = storages.map((s) => s.getHydratedSyncRules(options)); + + const rowProcessor = new sync_rules.MultiSyncRules(syncRules); + const writer = new PostgresWriter({ + ...options, + db: this.db, + rowProcessor, + storeCurrentData: options.storeCurrentData ?? true, + skipExistingRows: options.skipExistingRows ?? false + }); + + for (let storage of storages) { + const bucketBatch = (await storage.createWriter(options)) as PostgresBucketBatch; + writer.addSubWriter(bucketBatch); + } + + return writer; } async [Symbol.asyncDispose]() { diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index 7c4b2a989..6877a704a 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -359,6 +359,7 @@ export class PostgresSyncRulesStorage const batch = new PostgresBucketBatch({ logger: options.logger ?? framework.logger, db: this.db, + storage: this, sync_rules: this.sync_rules.parsed(options).hydratedSyncRules(), group_id: this.group_id, slot_name: this.slot_name, @@ -378,7 +379,7 @@ export class PostgresSyncRulesStorage options: storage.StartBatchOptions, callback: (batch: storage.BucketStorageBatch) => Promise ): Promise { - const batch = await this.createWriter(options); + await using batch = await this.createWriter(options); await callback(batch); await batch.flush(); diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts index 1a48766c2..f3611070e 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts @@ -21,10 +21,12 @@ import { batchCreateCustomWriteCheckpoints } from '../checkpoints/PostgresWriteC import { cacheKey, encodedCacheKey, OperationBatch, RecordOperation } from './OperationBatch.js'; import { PostgresPersistedBatch, postgresTableId } from './PostgresPersistedBatch.js'; import { bigint } from '../../types/codecs.js'; +import { PostgresSyncRulesStorage } from '../PostgresSyncRulesStorage.js'; export interface PostgresBucketBatchOptions { logger: Logger; db: lib_postgres.DatabaseClient; + storage: PostgresSyncRulesStorage; sync_rules: sync_rules.HydratedSyncRules; group_id: number; slot_name: string; @@ -58,7 +60,6 @@ const CheckpointWithStatus = StatefulCheckpoint.and( created_checkpoint: t.boolean }) ); -type CheckpointWithStatusDecoded = t.Decoded; /** * 15MB. Currently matches MongoDB. @@ -79,11 +80,12 @@ export class PostgresBucketBatch protected db: lib_postgres.DatabaseClient; protected group_id: number; protected last_checkpoint_lsn: string | null; + public readonly storage: PostgresSyncRulesStorage; protected persisted_op: InternalOpId | null; protected write_checkpoint_batch: storage.CustomWriteCheckpointOptions[]; - protected readonly sync_rules: sync_rules.HydratedSyncRules; + public readonly sync_rules: sync_rules.HydratedSyncRules; protected batch: OperationBatch | null; private lastWaitingLogThrottled = 0; private markRecordUnavailable: BucketStorageMarkRecordUnavailable | undefined; @@ -94,6 +96,7 @@ export class PostgresBucketBatch super(); this.logger = options.logger; this.db = options.db; + this.storage = options.storage; this.group_id = options.group_id; this.last_checkpoint_lsn = options.last_checkpoint_lsn; this.resumeFromLsn = options.resumeFromLsn; diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts new file mode 100644 index 000000000..e6147cb62 --- /dev/null +++ b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts @@ -0,0 +1,178 @@ +import * as lib_postgres from '@powersync/lib-service-postgres'; +import { Logger, ReplicationAssertionError, logger as defaultLogger } from '@powersync/lib-services-framework'; +import { BucketStorageMarkRecordUnavailable, maxLsn, storage } from '@powersync/service-core'; +import { RowProcessor } from '@powersync/service-sync-rules'; +import { OperationBatch } from './OperationBatch.js'; +import { PostgresBucketBatch } from './PostgresBucketBatch.js'; + +export interface PostgresWriterOptions { + db: lib_postgres.DatabaseClient; + rowProcessor: RowProcessor; + storeCurrentData: boolean; + skipExistingRows: boolean; + logger?: Logger; + markRecordUnavailable?: BucketStorageMarkRecordUnavailable; +} + +export class PostgresWriter implements storage.BucketDataWriter { + private batch: OperationBatch | null = null; + public readonly rowProcessor: RowProcessor; + write_checkpoint_batch: storage.CustomWriteCheckpointOptions[] = []; + + protected db: lib_postgres.DatabaseClient; + private readonly logger: Logger; + private readonly storeCurrentData: boolean; + private readonly skipExistingRows: boolean; + + private markRecordUnavailable: BucketStorageMarkRecordUnavailable | undefined; + public subWriters: PostgresBucketBatch[] = []; + + private sourceTableMap = new WeakMap(); + + constructor(options: PostgresWriterOptions) { + this.db = options.db; + this.rowProcessor = options.rowProcessor; + this.storeCurrentData = options.storeCurrentData; + this.skipExistingRows = options.skipExistingRows; + this.logger = options.logger ?? defaultLogger; + this.markRecordUnavailable = options.markRecordUnavailable; + } + + addSubWriter(subWriter: PostgresBucketBatch) { + this.subWriters.push(subWriter); + } + + get resumeFromLsn(): string | null { + // FIXME: check the logic here when there are multiple batches + let lsn: string | null = null; + for (let sub of this.subWriters) { + // TODO: should this be min instead? + lsn = maxLsn(lsn, sub.resumeFromLsn); + } + return lsn; + } + + async keepaliveAll(lsn: string): Promise { + let didAny = false; + for (let batch of this.subWriters) { + const didBatchKeepalive = await batch.keepalive(lsn); + didAny ||= didBatchKeepalive; + } + return didAny; + } + + async commitAll(lsn: string, options?: storage.BucketBatchCommitOptions): Promise { + let didCommit = false; + for (let batch of this.subWriters) { + const didWriterCommit = await batch.commit(lsn, options); + didCommit ||= didWriterCommit; + } + return didCommit; + } + + async setAllResumeLsn(lsn: string): Promise { + for (let batch of this.subWriters) { + await batch.setResumeLsn(lsn); + } + } + + async resolveTables(options: storage.ResolveTablesOptions): Promise { + let result: storage.ResolveTablesResult = { + tables: [], + dropTables: [] + }; + for (let subWriter of this.subWriters) { + const subResult = await subWriter.storage.resolveTable({ + connection_id: options.connection_id, + connection_tag: options.connection_tag, + entity_descriptor: options.entity_descriptor, + sync_rules: subWriter.sync_rules + }); + result.tables.push(subResult.table); + this.sourceTableMap.set(subResult.table, subWriter); + result.dropTables.push(...subResult.dropTables); + } + return result; + } + + private subWriterForTable(table: storage.SourceTable): PostgresBucketBatch { + // FIXME: store on the SourceTable instead? + const mapped = this.sourceTableMap.get(table); + if (mapped != null) { + return mapped; + } + throw new ReplicationAssertionError(`No sub-writer found for source table ${table.qualifiedName}`); + } + + getTable(ref: storage.SourceTable): Promise { + throw new Error('Method not implemented.'); + } + async save(record: storage.SaveOptions): Promise { + const writer = this.subWriterForTable(record.sourceTable); + return writer.save(record); + } + + async truncate(sourceTables: storage.SourceTable[]): Promise { + for (let table of sourceTables) { + const writer = this.subWriterForTable(table); + await writer.truncate([table]); + } + // FIXME: do we need the result? + return null; + } + + async drop(sourceTables: storage.SourceTable[]): Promise { + for (let table of sourceTables) { + const writer = this.subWriterForTable(table); + await writer.drop([table]); + } + // FIXME: do we need the result? + return null; + } + + async flush(options?: storage.BatchBucketFlushOptions): Promise { + for (let writer of this.subWriters) { + await writer.flush(); + } + // FIXME: do we need the result? + return null; + } + + async markTableSnapshotDone( + tables: storage.SourceTable[], + no_checkpoint_before_lsn?: string + ): Promise { + let result: storage.SourceTable[] = []; + for (let table of tables) { + const writer = this.subWriterForTable(table); + const mapped = await writer.markTableSnapshotDone([table], no_checkpoint_before_lsn); + result.push(...mapped); + } + return result; + } + + async markTableSnapshotRequired(table: storage.SourceTable): Promise { + const writer = this.subWriterForTable(table); + await writer.markTableSnapshotRequired(table); + } + + async markAllSnapshotDone(no_checkpoint_before_lsn: string): Promise { + for (let writer of this.subWriters) { + await writer.markAllSnapshotDone(no_checkpoint_before_lsn); + } + } + + updateTableProgress( + table: storage.SourceTable, + progress: Partial + ): Promise { + const writer = this.subWriterForTable(table); + return writer.updateTableProgress(table, progress); + } + + async [Symbol.asyncDispose]() { + for (let writer of this.subWriters) { + await writer[Symbol.asyncDispose](); + } + } +} diff --git a/modules/module-postgres-storage/test/src/__snapshots__/storage_sync.test.ts.snap b/modules/module-postgres-storage/test/src/__snapshots__/storage_sync.test.ts.snap index 08a73c8c1..53de900a2 100644 --- a/modules/module-postgres-storage/test/src/__snapshots__/storage_sync.test.ts.snap +++ b/modules/module-postgres-storage/test/src/__snapshots__/storage_sync.test.ts.snap @@ -214,6 +214,116 @@ exports[`sync - postgres > encodes sync rules id in buckes for streams 2`] = ` ] `; +exports[`sync - postgres > encodes sync rules id in buckets for streams 1`] = ` +[ + { + "checkpoint": { + "buckets": [ + { + "bucket": "1#test|0[]", + "checksum": 920318466, + "count": 1, + "priority": 3, + "subscriptions": [ + { + "default": 0, + }, + ], + }, + ], + "last_op_id": "1", + "streams": [ + { + "errors": [], + "is_default": true, + "name": "test", + }, + ], + "write_checkpoint": undefined, + }, + }, + { + "data": { + "after": "0", + "bucket": "1#test|0[]", + "data": [ + { + "checksum": 920318466, + "data": "{"id":"t1","description":"Test 1"}", + "object_id": "t1", + "object_type": "test", + "op": "PUT", + "op_id": "1", + "subkey": "02d285ac-4f96-5124-8fba-c6d1df992dd1", + }, + ], + "has_more": false, + "next_after": "1", + }, + }, + { + "checkpoint_complete": { + "last_op_id": "1", + }, + }, +] +`; + +exports[`sync - postgres > encodes sync rules id in buckets for streams 2`] = ` +[ + { + "checkpoint": { + "buckets": [ + { + "bucket": "2#test2|0[]", + "checksum": 920318466, + "count": 1, + "priority": 3, + "subscriptions": [ + { + "default": 0, + }, + ], + }, + ], + "last_op_id": "2", + "streams": [ + { + "errors": [], + "is_default": true, + "name": "test2", + }, + ], + "write_checkpoint": undefined, + }, + }, + { + "data": { + "after": "0", + "bucket": "2#test2|0[]", + "data": [ + { + "checksum": 920318466, + "data": "{"id":"t1","description":"Test 1"}", + "object_id": "t1", + "object_type": "test", + "op": "PUT", + "op_id": "2", + "subkey": "8a5f3fdd-3f59-5153-92ae-ac115c458441", + }, + ], + "has_more": false, + "next_after": "2", + }, + }, + { + "checkpoint_complete": { + "last_op_id": "2", + }, + }, +] +`; + exports[`sync - postgres > expired token 1`] = ` [ { diff --git a/packages/service-core/src/storage/BucketStorageBatch.ts b/packages/service-core/src/storage/BucketStorageBatch.ts index 6fda718ea..73528e12f 100644 --- a/packages/service-core/src/storage/BucketStorageBatch.ts +++ b/packages/service-core/src/storage/BucketStorageBatch.ts @@ -7,16 +7,10 @@ import { ToastableSqliteRow } from '@powersync/service-sync-rules'; import { BSON } from 'bson'; -import { ReplicationEventPayload } from './ReplicationEventPayload.js'; -import { SourceTable, SourceTableId, TableSnapshotStatus } from './SourceTable.js'; -import { - BatchedCustomWriteCheckpointOptions, - ResolveTableOptions, - ResolveTableResult, - ResolveTablesOptions, - ResolveTablesResult -} from './storage-index.js'; import { InternalOpId } from '../util/utils.js'; +import { ReplicationEventPayload } from './ReplicationEventPayload.js'; +import { SourceTable, TableSnapshotStatus } from './SourceTable.js'; +import { BatchedCustomWriteCheckpointOptions, ResolveTablesOptions, ResolveTablesResult } from './storage-index.js'; export const DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS: ResolvedBucketBatchCommitOptions = { createEmptyCheckpoints: true, diff --git a/packages/sync-rules/src/HydratedSyncRules.ts b/packages/sync-rules/src/HydratedSyncRules.ts index 8cb722533..9bcce43c2 100644 --- a/packages/sync-rules/src/HydratedSyncRules.ts +++ b/packages/sync-rules/src/HydratedSyncRules.ts @@ -204,3 +204,75 @@ export class HydratedSyncRules implements RowProcessor { return { querier, errors }; } } + +/** + * Combines multiple hydrated sync rules into a single row processor. + * + * Does not merge any definitions; simply forwards calls to all contained sync rules. + */ +export class MultiSyncRules implements RowProcessor { + private readonly syncRules: HydratedSyncRules[]; + + constructor(syncRules: HydratedSyncRules[]) { + this.syncRules = syncRules; + } + + get eventDescriptors(): SqlEventDescriptor[] { + return this.syncRules.flatMap((sr) => sr.eventDescriptors); + } + + get compatibility(): CompatibilityContext { + // FIXME + return this.syncRules[0].compatibility; + } + + getSourceTables(): TablePattern[] { + return this.syncRules.flatMap((sr) => sr.getSourceTables()); + } + + getMatchingTablePatterns(table: SourceTableInterface): TablePattern[] { + return this.syncRules.flatMap((sr) => sr.getMatchingTablePatterns(table)); + } + + getMatchingSources(pattern: TablePattern): TableDataSources { + let result: TableDataSources = { bucketDataSources: [], parameterIndexLookupCreators: [] }; + for (let sr of this.syncRules) { + const sources = sr.getMatchingSources(pattern); + result.bucketDataSources.push(...sources.bucketDataSources); + result.parameterIndexLookupCreators.push(...sources.parameterIndexLookupCreators); + } + return result; + } + + applyRowContext( + source: SqliteRow + ): SqliteRow { + // FIXME + return this.syncRules[0].applyRowContext(source); + } + + evaluateRowWithErrors(options: EvaluateRowOptions): { results: EvaluatedRow[]; errors: EvaluationError[] } { + let results: EvaluatedRow[] = []; + let errors: EvaluationError[] = []; + for (let sr of this.syncRules) { + const { results: srResults, errors: srErrors } = sr.evaluateRowWithErrors(options); + results.push(...srResults); + errors.push(...srErrors); + } + return { results, errors }; + } + + evaluateParameterRowWithErrors( + table: SourceTableInterface, + row: SqliteRow + ): { results: EvaluatedParameters[]; errors: EvaluationError[] } { + let results: EvaluatedParameters[] = []; + let errors: EvaluationError[] = []; + for (let sr of this.syncRules) { + const { results: srResults, errors: srErrors } = sr.evaluateParameterRowWithErrors(table, row); + results.push(...srResults); + errors.push(...srErrors); + } + return { results, errors }; + } +} From a85771effa8bc4beb35e285dc9d1d314488e5b7d Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 12:48:45 +0200 Subject: [PATCH 058/101] Some postgres storage test fixes. --- .../src/storage/PostgresSyncRulesStorage.ts | 4 +- .../src/storage/batch/PostgresWriter.ts | 3 +- .../__snapshots__/storage_sync.test.ts.snap | 114 +----------------- .../src/test-utils/general-utils.ts | 4 + .../src/tests/register-sync-tests.ts | 16 ++- .../src/storage/SyncRulesBucketStorage.ts | 6 +- 6 files changed, 26 insertions(+), 121 deletions(-) diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index 6877a704a..8f39b0c99 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -38,6 +38,7 @@ import { PostgresBucketBatch } from './batch/PostgresBucketBatch.js'; import { PostgresWriteCheckpointAPI } from './checkpoints/PostgresWriteCheckpointAPI.js'; import { PostgresBucketStorageFactory } from './PostgresBucketStorageFactory.js'; import { PostgresCompactor } from './PostgresCompactor.js'; +import { postgresTableId } from './batch/PostgresPersistedBatch.js'; export type PostgresSyncRulesStorageOptions = { factory: PostgresBucketStorageFactory; @@ -224,6 +225,7 @@ export class PostgresSyncRulesStorage } if (sourceTableRow == null) { + const id = options.idGenerator ? postgresTableId(options.idGenerator()) : uuid.v4(); const row = await db.sql` INSERT INTO source_tables ( @@ -237,7 +239,7 @@ export class PostgresSyncRulesStorage ) VALUES ( - ${{ type: 'varchar', value: uuid.v4() }}, + ${{ type: 'varchar', value: id }}, ${{ type: 'int4', value: group_id }}, ${{ type: 'int4', value: connection_id }}, --- The objectId can be string | number | undefined, we store it as jsonb value diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts index e6147cb62..3e3e10ee2 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts @@ -86,7 +86,8 @@ export class PostgresWriter implements storage.BucketDataWriter { connection_id: options.connection_id, connection_tag: options.connection_tag, entity_descriptor: options.entity_descriptor, - sync_rules: subWriter.sync_rules + sync_rules: subWriter.sync_rules, + idGenerator: options.idGenerator }); result.tables.push(subResult.table); this.sourceTableMap.set(subResult.table, subWriter); diff --git a/modules/module-postgres-storage/test/src/__snapshots__/storage_sync.test.ts.snap b/modules/module-postgres-storage/test/src/__snapshots__/storage_sync.test.ts.snap index 53de900a2..4cab7117e 100644 --- a/modules/module-postgres-storage/test/src/__snapshots__/storage_sync.test.ts.snap +++ b/modules/module-postgres-storage/test/src/__snapshots__/storage_sync.test.ts.snap @@ -104,116 +104,6 @@ exports[`sync - postgres > compacting data - invalidate checkpoint 2`] = ` ] `; -exports[`sync - postgres > encodes sync rules id in buckes for streams 1`] = ` -[ - { - "checkpoint": { - "buckets": [ - { - "bucket": "1#test|0[]", - "checksum": 920318466, - "count": 1, - "priority": 3, - "subscriptions": [ - { - "default": 0, - }, - ], - }, - ], - "last_op_id": "1", - "streams": [ - { - "errors": [], - "is_default": true, - "name": "test", - }, - ], - "write_checkpoint": undefined, - }, - }, - { - "data": { - "after": "0", - "bucket": "1#test|0[]", - "data": [ - { - "checksum": 920318466, - "data": "{"id":"t1","description":"Test 1"}", - "object_id": "t1", - "object_type": "test", - "op": "PUT", - "op_id": "1", - "subkey": "02d285ac-4f96-5124-8fba-c6d1df992dd1", - }, - ], - "has_more": false, - "next_after": "1", - }, - }, - { - "checkpoint_complete": { - "last_op_id": "1", - }, - }, -] -`; - -exports[`sync - postgres > encodes sync rules id in buckes for streams 2`] = ` -[ - { - "checkpoint": { - "buckets": [ - { - "bucket": "2#test|0[]", - "checksum": 920318466, - "count": 1, - "priority": 3, - "subscriptions": [ - { - "default": 0, - }, - ], - }, - ], - "last_op_id": "2", - "streams": [ - { - "errors": [], - "is_default": true, - "name": "test", - }, - ], - "write_checkpoint": undefined, - }, - }, - { - "data": { - "after": "0", - "bucket": "2#test|0[]", - "data": [ - { - "checksum": 920318466, - "data": "{"id":"t1","description":"Test 1"}", - "object_id": "t1", - "object_type": "test", - "op": "PUT", - "op_id": "2", - "subkey": "02d285ac-4f96-5124-8fba-c6d1df992dd1", - }, - ], - "has_more": false, - "next_after": "2", - }, - }, - { - "checkpoint_complete": { - "last_op_id": "2", - }, - }, -] -`; - exports[`sync - postgres > encodes sync rules id in buckets for streams 1`] = ` [ { @@ -929,7 +819,7 @@ exports[`sync - postgres > sync updates to data query only 2`] = ` "object_type": "lists", "op": "PUT", "op_id": "2", - "subkey": "5ad0aa14-3d5e-5428-ad5b-2c33927d991c", + "subkey": "b9f16d58-e6f5-55b5-9622-7bc360dba34f", }, ], "has_more": false, @@ -1136,7 +1026,7 @@ exports[`sync - postgres > sync updates to parameter query + data 2`] = ` "object_type": "lists", "op": "PUT", "op_id": "1", - "subkey": "5ad0aa14-3d5e-5428-ad5b-2c33927d991c", + "subkey": "b9f16d58-e6f5-55b5-9622-7bc360dba34f", }, ], "has_more": false, diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index 8bb5e42d4..2f72a58fc 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -142,6 +142,10 @@ function isParsedSyncRules( return (syncRules as storage.PersistedSyncRules).sync_rules !== undefined; } +/** + * Bucket names no longer purely depend on the sync rules. + * This converts a bucket name like "global[]" into the actual bucket name, for use in tests. + */ export function bucketRequest( syncRules: storage.PersistedSyncRulesContent | storage.PersistedSyncRules, bucket?: string, diff --git a/packages/service-core-tests/src/tests/register-sync-tests.ts b/packages/service-core-tests/src/tests/register-sync-tests.ts index b50ac9844..9a5ac7f07 100644 --- a/packages/service-core-tests/src/tests/register-sync-tests.ts +++ b/packages/service-core-tests/src/tests/register-sync-tests.ts @@ -800,9 +800,11 @@ bucket_definitions: await writer.commitAll('0/1'); const checkpoint2 = await getCheckpointLines(iter); + + const { bucket } = test_utils.bucketRequest(syncRules, 'by_user["user1"]'); expect( (checkpoint2[0] as StreamingSyncCheckpointDiff).checkpoint_diff?.updated_buckets?.map((b) => b.bucket) - ).toEqual(['10002["user1"]']); + ).toEqual([bucket]); expect(checkpoint2).toMatchSnapshot(); }); @@ -855,9 +857,9 @@ bucket_definitions: }); const checkpoint1 = await getCheckpointLines(iter); - expect((checkpoint1[0] as StreamingSyncCheckpoint).checkpoint?.buckets?.map((b) => b.bucket)).toEqual([ - '10002["user1"]' // FIXME: don't hardcode - ]); + + const { bucket } = test_utils.bucketRequest(syncRules, 'by_user["user1"]'); + expect((checkpoint1[0] as StreamingSyncCheckpoint).checkpoint?.buckets?.map((b) => b.bucket)).toEqual([bucket]); expect(checkpoint1).toMatchSnapshot(); await writer.save({ @@ -876,7 +878,7 @@ bucket_definitions: const checkpoint2 = await getCheckpointLines(iter); expect( (checkpoint2[0] as StreamingSyncCheckpointDiff).checkpoint_diff?.updated_buckets?.map((b) => b.bucket) - ).toEqual(['10002["user1"]']); + ).toEqual([bucket]); expect(checkpoint2).toMatchSnapshot(); }); @@ -945,10 +947,12 @@ bucket_definitions: await writer.commitAll('0/1'); + const { bucket } = test_utils.bucketRequest(syncRules, 'by_user["user1"]'); + const checkpoint2 = await getCheckpointLines(iter); expect( (checkpoint2[0] as StreamingSyncCheckpointDiff).checkpoint_diff?.updated_buckets?.map((b) => b.bucket) - ).toEqual(['10002["user1"]']); // TODO: don't hardcode bucket name + ).toEqual([bucket]); expect(checkpoint2).toMatchSnapshot(); }); diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index 0c6088a0d..d275f2760 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -173,7 +173,7 @@ export interface ResolveTablesOptions { entity_descriptor: SourceEntityDescriptor; pattern: TablePattern; /** - * For tests only - custom id generator. + * For tests only - custom id generator for stable ids. */ idGenerator?: () => string | bson.ObjectId; } @@ -183,6 +183,10 @@ export interface ResolveTableOptions { connection_tag: string; entity_descriptor: SourceEntityDescriptor; sync_rules: HydratedSyncRules; + /** + * For tests only - custom id generator for stable ids. + */ + idGenerator?: () => string | bson.ObjectId; } export interface ResolveTablesResult { From 18f2d995e89e6c7602da63a6f9bf3218f7cdc059 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 13:01:46 +0200 Subject: [PATCH 059/101] Implement getTable. --- .../src/storage/batch/PostgresWriter.ts | 53 ++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts index 3e3e10ee2..bd5dccab7 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts @@ -4,6 +4,8 @@ import { BucketStorageMarkRecordUnavailable, maxLsn, storage } from '@powersync/ import { RowProcessor } from '@powersync/service-sync-rules'; import { OperationBatch } from './OperationBatch.js'; import { PostgresBucketBatch } from './PostgresBucketBatch.js'; +import { models } from '../../types/types.js'; +import { postgresTableId } from './PostgresPersistedBatch.js'; export interface PostgresWriterOptions { db: lib_postgres.DatabaseClient; @@ -105,9 +107,56 @@ export class PostgresWriter implements storage.BucketDataWriter { throw new ReplicationAssertionError(`No sub-writer found for source table ${table.qualifiedName}`); } - getTable(ref: storage.SourceTable): Promise { - throw new Error('Method not implemented.'); + async getTable(ref: storage.SourceTable): Promise { + const sourceTableRow = await this.db.sql` + SELECT + * + FROM + source_tables + WHERE + id = ${{ type: 'varchar', value: postgresTableId(ref.id) }} + ` + .decoded(models.SourceTable) + .first(); + if (sourceTableRow == null) { + return null; + } + + const subWriter = this.subWriters.find((sw) => sw.storage.group_id === sourceTableRow.group_id); + if (subWriter == null) { + throw new ReplicationAssertionError( + `No sub-writer found for source table ${ref.qualifiedName} with group ID ${sourceTableRow.group_id}` + ); + } + + const sourceTable = new storage.SourceTable({ + // Immutable values + id: sourceTableRow.id, + connectionTag: ref.connectionTag, + objectId: ref.objectId, + schema: ref.schema, + name: ref.name, + replicaIdColumns: ref.replicaIdColumns, + pattern: ref.pattern, + + // Table state + snapshotComplete: sourceTableRow!.snapshot_done ?? true + }); + if (!sourceTable.snapshotComplete) { + sourceTable.snapshotStatus = { + totalEstimatedCount: Number(sourceTableRow!.snapshot_total_estimated_count ?? -1n), + replicatedCount: Number(sourceTableRow!.snapshot_replicated_count ?? 0n), + lastKey: sourceTableRow!.snapshot_last_key + }; + } + // Immutable + sourceTable.syncEvent = ref.syncEvent; + sourceTable.syncData = ref.syncData; + sourceTable.syncParameters = ref.syncParameters; + this.sourceTableMap.set(sourceTable, subWriter); + return sourceTable; } + async save(record: storage.SaveOptions): Promise { const writer = this.subWriterForTable(record.sourceTable); return writer.save(record); From 130041f3ea62de53d3f45fd8e2ec4f8c7a5f85ca Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 13:51:26 +0200 Subject: [PATCH 060/101] Fix updating table progress. --- .../src/storage/batch/PostgresWriter.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts index bd5dccab7..e3c2ca618 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts @@ -212,12 +212,14 @@ export class PostgresWriter implements storage.BucketDataWriter { } } - updateTableProgress( + async updateTableProgress( table: storage.SourceTable, progress: Partial ): Promise { const writer = this.subWriterForTable(table); - return writer.updateTableProgress(table, progress); + const updatedTable = await writer.updateTableProgress(table, progress); + this.sourceTableMap.set(updatedTable, writer); + return updatedTable; } async [Symbol.asyncDispose]() { From a93c031811b667ed6a1d0941342d3f9e9a2870e1 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 16:05:06 +0200 Subject: [PATCH 061/101] Remove some startBatch() usage from tests. --- .../register-data-storage-checkpoint-tests.ts | 93 +++++++------------ 1 file changed, 36 insertions(+), 57 deletions(-) diff --git a/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts index d597f2cba..548dd435b 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts @@ -33,18 +33,15 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - }); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/1'); const writeCheckpoint = await bucketStorage.createManagedWriteCheckpoint({ heads: { '1': '5/0' }, user_id: 'user1' }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.keepalive('5/0'); - }); + await writer.keepaliveAll('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -71,9 +68,8 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(r.persisted_sync_rules!); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - }); + await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/1'); const abortController = new AbortController(); context.onTestFinished(() => abortController.abort()); @@ -81,9 +77,7 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.keepalive('5/0'); - }); + await writer.keepaliveAll('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -104,9 +98,7 @@ bucket_definitions: // We have to trigger a new keepalive after the checkpoint, at least to cover postgres storage. // This is what is effetively triggered with RouteAPI.createReplicationHead(). // MongoDB storage doesn't explicitly need this anymore. - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.keepalive('6/0'); - }); + await writer.keepaliveAll('6/0'); let result2 = await iter.next(); if (result2.value?.base?.lsn == '5/0') { @@ -138,9 +130,8 @@ bucket_definitions: const bucketStorage = factory.getInstance(r.persisted_sync_rules!); bucketStorage.setWriteCheckpointMode(storage.WriteCheckpointMode.CUSTOM); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - }); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/1'); const abortController = new AbortController(); context.onTestFinished(() => abortController.abort()); @@ -148,14 +139,12 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.addCustomWriteCheckpoint({ - checkpoint: 5n, - user_id: 'user1' - }); - await batch.flush(); - await batch.keepalive('5/0'); + writer.addCustomWriteCheckpoint({ + checkpoint: 5n, + user_id: 'user1' }); + await writer.flush(); + await writer.keepalive('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -182,9 +171,8 @@ bucket_definitions: const bucketStorage = factory.getInstance(r.persisted_sync_rules!); bucketStorage.setWriteCheckpointMode(storage.WriteCheckpointMode.CUSTOM); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - }); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/1'); const abortController = new AbortController(); context.onTestFinished(() => abortController.abort()); @@ -192,17 +180,15 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - // Flush to clear state - await batch.flush(); + // Flush to clear state + await writer.flush(); - await batch.addCustomWriteCheckpoint({ - checkpoint: 5n, - user_id: 'user1' - }); - await batch.flush(); - await batch.keepalive('5/0'); + writer.addCustomWriteCheckpoint({ + checkpoint: 5n, + user_id: 'user1' }); + await writer.flush(); + await writer.keepalive('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -229,9 +215,8 @@ bucket_definitions: const bucketStorage = factory.getInstance(r.persisted_sync_rules!); bucketStorage.setWriteCheckpointMode(storage.WriteCheckpointMode.CUSTOM); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.markAllSnapshotDone('1/1'); - }); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + await writer.markAllSnapshotDone('1/1'); const abortController = new AbortController(); context.onTestFinished(() => abortController.abort()); @@ -239,9 +224,7 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.keepalive('5/0'); - }); + await writer.keepalive('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -254,14 +237,12 @@ bucket_definitions: } }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - batch.addCustomWriteCheckpoint({ - checkpoint: 6n, - user_id: 'user1' - }); - await batch.flush(); - await batch.keepalive('6/0'); + writer.addCustomWriteCheckpoint({ + checkpoint: 6n, + user_id: 'user1' }); + await writer.flush(); + await writer.keepalive('6/0'); let result2 = await iter.next(); expect(result2).toMatchObject({ @@ -275,14 +256,12 @@ bucket_definitions: } }); - await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - batch.addCustomWriteCheckpoint({ - checkpoint: 7n, - user_id: 'user1' - }); - await batch.flush(); - await batch.keepalive('7/0'); + writer.addCustomWriteCheckpoint({ + checkpoint: 7n, + user_id: 'user1' }); + await writer.flush(); + await writer.keepalive('7/0'); let result3 = await iter.next(); expect(result3).toMatchObject({ From b70f3be728acd03f5d9c5f175a1368831c67c9d2 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 16:11:45 +0200 Subject: [PATCH 062/101] Move WalStream to new BucketDataWriter. --- .../src/storage/batch/PostgresWriter.ts | 34 ++- .../src/replication/PostgresSnapshotter.ts | 129 +++++---- .../src/replication/WalStream.ts | 258 +++++++++--------- 3 files changed, 214 insertions(+), 207 deletions(-) diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts index e3c2ca618..3cbe8d153 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts @@ -163,29 +163,32 @@ export class PostgresWriter implements storage.BucketDataWriter { } async truncate(sourceTables: storage.SourceTable[]): Promise { + let flushedResult: storage.FlushedResult | null = null; for (let table of sourceTables) { const writer = this.subWriterForTable(table); - await writer.truncate([table]); + const subResult = await writer.truncate([table]); + flushedResult = maxFlushedResult(flushedResult, subResult); } - // FIXME: do we need the result? - return null; + return flushedResult; } async drop(sourceTables: storage.SourceTable[]): Promise { + let flushedResult: storage.FlushedResult | null = null; for (let table of sourceTables) { const writer = this.subWriterForTable(table); - await writer.drop([table]); + const subResult = await writer.drop([table]); + flushedResult = maxFlushedResult(flushedResult, subResult); } - // FIXME: do we need the result? - return null; + return flushedResult; } async flush(options?: storage.BatchBucketFlushOptions): Promise { + let flushedResult: storage.FlushedResult | null = null; for (let writer of this.subWriters) { - await writer.flush(); + const subResult = await writer.flush(); + flushedResult = maxFlushedResult(flushedResult, subResult); } - // FIXME: do we need the result? - return null; + return flushedResult; } async markTableSnapshotDone( @@ -228,3 +231,16 @@ export class PostgresWriter implements storage.BucketDataWriter { } } } + +function maxFlushedResult( + a: storage.FlushedResult | null, + b: storage.FlushedResult | null +): storage.FlushedResult | null { + if (a == null) { + return b; + } + if (b == null) { + return a; + } + return a.flushed_op > b.flushed_op ? a : b; +} diff --git a/modules/module-postgres/src/replication/PostgresSnapshotter.ts b/modules/module-postgres/src/replication/PostgresSnapshotter.ts index cf1cf44e2..ac64fec3d 100644 --- a/modules/module-postgres/src/replication/PostgresSnapshotter.ts +++ b/modules/module-postgres/src/replication/PostgresSnapshotter.ts @@ -99,7 +99,7 @@ export class PostgresSnapshotter { } async getQualifiedTableNames( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, db: pgwire.PgConnection, tablePattern: TablePattern ): Promise { @@ -176,7 +176,7 @@ export class PostgresSnapshotter { } const table = await this.handleRelation({ - batch, + writer, db, name, schema, @@ -303,34 +303,30 @@ export class PostgresSnapshotter { async replicateTable(requestTable: SourceTable) { const db = await this.connections.snapshotConnection(); - try { - const flushResults = await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: ZERO_LSN, - defaultSchema: POSTGRES_DEFAULT_SCHEMA, - storeCurrentData: true, - skipExistingRows: true - }, - async (batch) => { - // Get fresh table info, in case it was updated while queuing - const table = await this.handleRelation({ - batch, - db: db, - name: requestTable.name, - schema: requestTable.schema, - relId: requestTable.objectId as number - }); - await this.snapshotTableInTx(batch, db, table); - // This commit ensures we set keepalive_op. - // It may be better if that is automatically set when flushing. - await batch.commit(ZERO_LSN); - } - ); - this.logger.info(`Flushed snapshot at ${flushResults?.flushed_op}`); - } finally { - await db.end(); - } + await using _ = { [Symbol.asyncDispose]: () => db.end() }; + await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: true + }); + + // Get fresh table info, in case it was updated while queuing + const table = await this.handleRelation({ + writer, + db: db, + name: requestTable.name, + schema: requestTable.schema, + relId: requestTable.objectId as number + }); + await this.snapshotTableInTx(writer, db, table); + // This commit ensures we set keepalive_op. + // It may be better if that is automatically set when flushing. + const flushResults = await writer.flush(); + await writer.commitAll(ZERO_LSN); + + this.logger.info(`Flushed snapshot at ${flushResults?.flushed_op}`); } async waitForInitialSnapshot() { @@ -414,34 +410,31 @@ export class PostgresSnapshotter { async queueSnapshotTables(db: pgwire.PgConnection) { const sourceTables = this.sync_rules.getSourceTables(); - await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: ZERO_LSN, - defaultSchema: POSTGRES_DEFAULT_SCHEMA, - storeCurrentData: true, - skipExistingRows: true - }, - async (batch) => { - for (let tablePattern of sourceTables) { - const tables = await this.getQualifiedTableNames(batch, db, tablePattern); - // Pre-get counts - for (let table of tables) { - if (table.snapshotComplete) { - this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); - continue; - } - const count = await this.estimatedCountNumber(db, table); - table = await batch.updateTableProgress(table, { totalEstimatedCount: count }); - this.relationCache.update(table); - - this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`); - - this.queue.add(table); - } + await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: true + }); + + for (let tablePattern of sourceTables) { + const tables = await this.getQualifiedTableNames(writer, db, tablePattern); + // Pre-get counts + for (let table of tables) { + if (table.snapshotComplete) { + this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); + continue; } + const count = await this.estimatedCountNumber(db, table); + table = await writer.updateTableProgress(table, { totalEstimatedCount: count }); + this.relationCache.update(table); + + this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`); + + this.queue.add(table); } - ); + } } static *getQueryData(results: Iterable): Generator { @@ -450,13 +443,13 @@ export class PostgresSnapshotter { } } - public async queueSnapshot(batch: storage.BucketStorageBatch, table: storage.SourceTable) { - await batch.markTableSnapshotRequired(table); + public async queueSnapshot(writer: storage.BucketDataWriter, table: storage.SourceTable) { + await writer.markTableSnapshotRequired(table); this.queue.add(table); } public async snapshotTableInTx( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, db: pgwire.PgConnection, table: storage.SourceTable, limited?: PrimaryKeyValue[] @@ -467,7 +460,7 @@ export class PostgresSnapshotter { await db.query('BEGIN'); try { let tableLsnNotBefore: string; - await this.snapshotTable(batch, db, table, limited); + await this.snapshotTable(writer, db, table, limited); // Get the current LSN. // The data will only be consistent once incremental replication has passed that point. @@ -487,7 +480,7 @@ export class PostgresSnapshotter { // Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction. await db.query('COMMIT'); this.logger.info(`Snapshot complete for table ${table.qualifiedName}, resume at ${tableLsnNotBefore}`); - const [resultTable] = await batch.markTableSnapshotDone([table], tableLsnNotBefore); + const [resultTable] = await writer.markTableSnapshotDone([table], tableLsnNotBefore); this.relationCache.update(resultTable); return resultTable; } catch (e) { @@ -497,7 +490,7 @@ export class PostgresSnapshotter { } private async snapshotTable( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, db: pgwire.PgConnection, table: storage.SourceTable, limited?: PrimaryKeyValue[] @@ -556,7 +549,7 @@ export class PostgresSnapshotter { ); // This auto-flushes when the batch reaches its size limit - await batch.save({ + await writer.save({ tag: storage.SaveOperationTag.INSERT, sourceTable: table, before: undefined, @@ -573,7 +566,7 @@ export class PostgresSnapshotter { } // Important: flush before marking progress - await batch.flush(); + await writer.flush(); if (limited == null) { let lastKey: Uint8Array | undefined; if (q instanceof ChunkedSnapshotQuery) { @@ -587,7 +580,7 @@ export class PostgresSnapshotter { totalEstimatedCount = await this.estimatedCountNumber(db, table); lastCountTime = performance.now(); } - table = await batch.updateTableProgress(table, { + table = await writer.updateTableProgress(table, { lastKey: lastKey, replicatedCount: at, totalEstimatedCount: totalEstimatedCount @@ -607,13 +600,13 @@ export class PostgresSnapshotter { } private async handleRelation(options: { - batch: storage.BucketStorageBatch; + writer: storage.BucketDataWriter; db: pgwire.PgConnection; name: string; schema: string; relId: number; }) { - const { batch, db, name, schema, relId } = options; + const { writer, db, name, schema, relId } = options; const cresult = await getReplicationIdentityColumns(db, relId); const columnTypesResult = await db.query({ @@ -639,7 +632,7 @@ export class PostgresSnapshotter { this.relationCache.update(result.table); // Drop conflicting tables. This includes for example renamed tables. - await batch.drop(result.dropTables); + await writer.drop(result.dropTables); // Ensure we have a description for custom types referenced in the table. await this.connections.types.fetchTypes(columnTypes); diff --git a/modules/module-postgres/src/replication/WalStream.ts b/modules/module-postgres/src/replication/WalStream.ts index 7d2345c7d..209a6d911 100644 --- a/modules/module-postgres/src/replication/WalStream.ts +++ b/modules/module-postgres/src/replication/WalStream.ts @@ -7,6 +7,7 @@ import { ReplicationAssertionError } from '@powersync/lib-services-framework'; import { + BucketDataWriter, BucketStorageBatch, getUuidReplicaIdentityBson, MetricsEngine, @@ -172,12 +173,12 @@ export class WalStream { } async handleRelation(options: { - batch: storage.BucketStorageBatch; + writer: storage.BucketDataWriter; descriptor: SourceEntityDescriptor; snapshot: boolean; referencedTypeIds: number[]; }) { - const { batch, descriptor, snapshot, referencedTypeIds } = options; + const { writer, descriptor, snapshot, referencedTypeIds } = options; if (!descriptor.objectId && typeof descriptor.objectId != 'number') { throw new ReplicationAssertionError(`objectId expected, got ${typeof descriptor.objectId}`); @@ -193,7 +194,7 @@ export class WalStream { // Drop conflicting tables. This includes for example renamed tables. if (result.dropTables.length > 0) { this.logger.info(`Dropping conflicting tables: ${result.dropTables.map((t) => t.qualifiedName).join(', ')}`); - await batch.drop(result.dropTables); + await writer.drop(result.dropTables); } // Ensure we have a description for custom types referenced in the table. @@ -207,7 +208,7 @@ export class WalStream { if (shouldSnapshot) { this.logger.info(`Queuing snapshot for new table ${result.table.qualifiedName}`); - await this.snapshotter.queueSnapshot(batch, result.table); + await this.snapshotter.queueSnapshot(writer, result.table); } return result.table; @@ -221,7 +222,7 @@ export class WalStream { * We handle this similar to an inline table snapshot, but limited to the specific * set of rows. */ - private async resnapshot(batch: BucketStorageBatch, rows: MissingRow[]) { + private async resnapshot(writer: BucketDataWriter, rows: MissingRow[]) { const byTable = new Map(); for (let row of rows) { const relId = row.table.objectId as number; // always a number for postgres @@ -235,7 +236,7 @@ export class WalStream { for (let rows of byTable.values()) { const table = rows[0].table; await this.snapshotter.snapshotTableInTx( - batch, + writer, db, table, rows.map((r) => r.key) @@ -274,7 +275,7 @@ export class WalStream { } async writeChange( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, msg: pgwire.PgoutputMessage ): Promise { if (msg.lsn == null) { @@ -290,7 +291,7 @@ export class WalStream { if (msg.tag == 'insert') { this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); const baseRecord = this.syncRulesRecord(this.connections.types.constructAfterRecord(msg)); - return await batch.save({ + return await writer.save({ tag: storage.SaveOperationTag.INSERT, sourceTable: table, before: undefined, @@ -304,7 +305,7 @@ export class WalStream { // It's fine to treat that the same as an insert. const before = this.syncRulesRecord(this.connections.types.constructBeforeRecord(msg)); const after = this.toastableSyncRulesRecord(this.connections.types.constructAfterRecord(msg)); - return await batch.save({ + return await writer.save({ tag: storage.SaveOperationTag.UPDATE, sourceTable: table, before: before, @@ -316,7 +317,7 @@ export class WalStream { this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); const before = this.syncRulesRecord(this.connections.types.constructBeforeRecord(msg)!); - return await batch.save({ + return await writer.save({ tag: storage.SaveOperationTag.DELETE, sourceTable: table, before: before, @@ -331,7 +332,7 @@ export class WalStream { const table = this.getTable(getRelId(relation)); tables.push(table); } - return await batch.truncate(tables); + return await writer.truncate(tables); } return null; } @@ -487,138 +488,135 @@ export class WalStream { }); }; - await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: ZERO_LSN, - defaultSchema: POSTGRES_DEFAULT_SCHEMA, - storeCurrentData: true, - skipExistingRows: false, - markRecordUnavailable - }, - async (batch) => { - // We don't handle any plain keepalive messages while we have transactions. - // While we have transactions, we use that to advance the position. - // Replication never starts in the middle of a transaction, so this starts as false. - let skipKeepalive = false; - let count = 0; - - for await (const chunk of replicationStream.pgoutputDecode()) { - this.touch(); - - if (this.abortSignal.aborted) { - break; - } + await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: false, + markRecordUnavailable + }); - // chunkLastLsn may come from normal messages in the chunk, - // or from a PrimaryKeepalive message. - const { messages, lastLsn: chunkLastLsn } = chunk; + // We don't handle any plain keepalive messages while we have transactions. + // While we have transactions, we use that to advance the position. + // Replication never starts in the middle of a transaction, so this starts as false. + let skipKeepalive = false; + let count = 0; - /** - * We can check if an explicit keepalive was sent if `exposesLogicalMessages == true`. - * If we can't check the logical messages, we should assume a keepalive if we - * receive an empty array of messages in a replication event. - */ - const assumeKeepAlive = !exposesLogicalMessages; - let keepAliveDetected = false; - const lastCommit = messages.findLast((msg) => msg.tag == 'commit'); - - for (const msg of messages) { - if (msg.tag == 'relation') { - await this.handleRelation({ - batch, - descriptor: getPgOutputRelation(msg), - snapshot: true, - referencedTypeIds: referencedColumnTypeIds(msg) - }); - } else if (msg.tag == 'begin') { - // This may span multiple transactions in the same chunk, or even across chunks. - skipKeepalive = true; - if (this.oldestUncommittedChange == null) { - this.oldestUncommittedChange = new Date(Number(msg.commitTime / 1000n)); - } - } else if (msg.tag == 'commit') { - this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1); - if (msg == lastCommit) { - // Only commit if this is the last commit in the chunk. - // This effectively lets us batch multiple transactions within the same chunk - // into a single flush, increasing throughput for many small transactions. - skipKeepalive = false; - // flush() must be before the resnapshot check - that is - // typically what reports the resnapshot records. - await batch.flush({ oldestUncommittedChange: this.oldestUncommittedChange }); - // This _must_ be checked after the flush(), and before - // commit() or ack(). We never persist the resnapshot list, - // so we have to process it before marking our progress. - if (resnapshot.length > 0) { - await this.resnapshot(batch, resnapshot); - resnapshot = []; - } - const didCommit = await batch.commit(msg.lsn!, { - createEmptyCheckpoints, - oldestUncommittedChange: this.oldestUncommittedChange - }); - await this.ack(msg.lsn!, replicationStream); - if (didCommit) { - this.oldestUncommittedChange = null; - this.isStartingReplication = false; - } - } - } else { - if (count % 100 == 0) { - this.logger.info(`Replicating op ${count} ${msg.lsn}`); - } - - /** - * If we can see the contents of logical messages, then we can check if a keepalive - * message is present. We only perform a keepalive (below) if we explicitly detect a keepalive message. - * If we can't see the contents of logical messages, then we should assume a keepalive is required - * due to the default value of `assumeKeepalive`. - */ - if (exposesLogicalMessages && isKeepAliveMessage(msg)) { - keepAliveDetected = true; - } - - count += 1; - const flushResult = await this.writeChange(batch, msg); - if (flushResult != null && resnapshot.length > 0) { - // If we have large transactions, we also need to flush the resnapshot list - // periodically. - // TODO: make sure this bit is actually triggered - await this.resnapshot(batch, resnapshot); - resnapshot = []; - } + for await (const chunk of replicationStream.pgoutputDecode()) { + this.touch(); + + if (this.abortSignal.aborted) { + break; + } + + // chunkLastLsn may come from normal messages in the chunk, + // or from a PrimaryKeepalive message. + const { messages, lastLsn: chunkLastLsn } = chunk; + + /** + * We can check if an explicit keepalive was sent if `exposesLogicalMessages == true`. + * If we can't check the logical messages, we should assume a keepalive if we + * receive an empty array of messages in a replication event. + */ + const assumeKeepAlive = !exposesLogicalMessages; + let keepAliveDetected = false; + const lastCommit = messages.findLast((msg) => msg.tag == 'commit'); + + for (const msg of messages) { + if (msg.tag == 'relation') { + await this.handleRelation({ + writer, + descriptor: getPgOutputRelation(msg), + snapshot: true, + referencedTypeIds: referencedColumnTypeIds(msg) + }); + } else if (msg.tag == 'begin') { + // This may span multiple transactions in the same chunk, or even across chunks. + skipKeepalive = true; + if (this.oldestUncommittedChange == null) { + this.oldestUncommittedChange = new Date(Number(msg.commitTime / 1000n)); + } + } else if (msg.tag == 'commit') { + this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1); + if (msg == lastCommit) { + // Only commit if this is the last commit in the chunk. + // This effectively lets us batch multiple transactions within the same chunk + // into a single flush, increasing throughput for many small transactions. + skipKeepalive = false; + // flush() must be before the resnapshot check - that is + // typically what reports the resnapshot records. + await writer.flush({ oldestUncommittedChange: this.oldestUncommittedChange }); + // This _must_ be checked after the flush(), and before + // commit() or ack(). We never persist the resnapshot list, + // so we have to process it before marking our progress. + if (resnapshot.length > 0) { + await this.resnapshot(writer, resnapshot); + resnapshot = []; + } + const didCommit = await writer.commitAll(msg.lsn!, { + createEmptyCheckpoints, + oldestUncommittedChange: this.oldestUncommittedChange + }); + await this.ack(msg.lsn!, replicationStream); + if (didCommit) { + this.oldestUncommittedChange = null; + this.isStartingReplication = false; } } + } else { + if (count % 100 == 0) { + this.logger.info(`Replicating op ${count} ${msg.lsn}`); + } - if (!skipKeepalive) { - if (assumeKeepAlive || keepAliveDetected) { - // Reset the detection flag. - keepAliveDetected = false; + /** + * If we can see the contents of logical messages, then we can check if a keepalive + * message is present. We only perform a keepalive (below) if we explicitly detect a keepalive message. + * If we can't see the contents of logical messages, then we should assume a keepalive is required + * due to the default value of `assumeKeepalive`. + */ + if (exposesLogicalMessages && isKeepAliveMessage(msg)) { + keepAliveDetected = true; + } - // In a transaction, we ack and commit according to the transaction progress. - // Outside transactions, we use the PrimaryKeepalive messages to advance progress. - // Big caveat: This _must not_ be used to skip individual messages, since this LSN - // may be in the middle of the next transaction. - // It must only be used to associate checkpoints with LSNs. + count += 1; + const flushResult = await this.writeChange(writer, msg); + if (flushResult != null && resnapshot.length > 0) { + // If we have large transactions, we also need to flush the resnapshot list + // periodically. + // TODO: make sure this bit is actually triggered + await this.resnapshot(writer, resnapshot); + resnapshot = []; + } + } + } - const didCommit = await batch.keepalive(chunkLastLsn); - if (didCommit) { - this.oldestUncommittedChange = null; - } + if (!skipKeepalive) { + if (assumeKeepAlive || keepAliveDetected) { + // Reset the detection flag. + keepAliveDetected = false; - this.isStartingReplication = false; - } + // In a transaction, we ack and commit according to the transaction progress. + // Outside transactions, we use the PrimaryKeepalive messages to advance progress. + // Big caveat: This _must not_ be used to skip individual messages, since this LSN + // may be in the middle of the next transaction. + // It must only be used to associate checkpoints with LSNs. - // We receive chunks with empty messages often (about each second). - // Acknowledging here progresses the slot past these and frees up resources. - await this.ack(chunkLastLsn, replicationStream); + const didCommit = await writer.keepaliveAll(chunkLastLsn); + if (didCommit) { + this.oldestUncommittedChange = null; } - this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED).add(1); + this.isStartingReplication = false; } + + // We receive chunks with empty messages often (about each second). + // Acknowledging here progresses the slot past these and frees up resources. + await this.ack(chunkLastLsn, replicationStream); } - ); + + this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED).add(1); + } throw new ReplicationAbortedError(`Replication stream aborted`, this.abortSignal.reason); } From 8d9ca0d0511b2c901d1007941b4bd1a704ecff0b Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 16:33:33 +0200 Subject: [PATCH 063/101] Further compatibility fixes for postgres snapshotter. --- .../test/src/change_stream_utils.ts | 6 +- .../src/replication/PostgresSnapshotter.ts | 111 ++++++------ .../src/replication/WalStream.ts | 164 ++++++++++-------- .../test/src/wal_stream.test.ts | 6 +- 4 files changed, 144 insertions(+), 143 deletions(-) diff --git a/modules/module-mongodb/test/src/change_stream_utils.ts b/modules/module-mongodb/test/src/change_stream_utils.ts index e7c6c4607..1920bd993 100644 --- a/modules/module-mongodb/test/src/change_stream_utils.ts +++ b/modules/module-mongodb/test/src/change_stream_utils.ts @@ -165,10 +165,8 @@ export class ChangeStreamTestContext { */ async markSnapshotConsistent() { const checkpoint = await createCheckpoint(this.client, this.db, STANDALONE_CHECKPOINT_ID); - - await this.storage!.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - await batch.keepalive(checkpoint); - }); + await using writer = await this.storage!.factory.createCombinedWriter([this.storage!], test_utils.BATCH_OPTIONS); + await writer.keepaliveAll(checkpoint); } async getCheckpoint(options?: { timeout?: number }) { diff --git a/modules/module-postgres/src/replication/PostgresSnapshotter.ts b/modules/module-postgres/src/replication/PostgresSnapshotter.ts index ac64fec3d..7639109aa 100644 --- a/modules/module-postgres/src/replication/PostgresSnapshotter.ts +++ b/modules/module-postgres/src/replication/PostgresSnapshotter.ts @@ -25,7 +25,7 @@ import { import { ReplicationMetric } from '@powersync/service-types'; import * as timers from 'node:timers/promises'; -import pDefer from 'p-defer'; +import pDefer, { DeferredPromise } from 'p-defer'; import { PostgresTypeResolver } from '../types/resolver.js'; import { PgManager } from './PgManager.js'; import { @@ -83,6 +83,7 @@ export class PostgresSnapshotter { }); private queue = new Set(); + private nextItemQueued: DeferredPromise | null = null; private initialSnapshotDone = pDefer(); constructor(options: WalStreamOptions) { @@ -96,6 +97,11 @@ export class PostgresSnapshotter { this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000; this.abortSignal = options.abort_signal; + + this.abortSignal.addEventListener('abort', () => { + // Wake up the queue if is waiting for items + this.nextItemQueued?.resolve(); + }); } async getQualifiedTableNames( @@ -175,15 +181,33 @@ export class PostgresSnapshotter { this.logger.warn(`Could not check RLS access for ${tablePattern.schema}.${name}`, e); } - const table = await this.handleRelation({ - writer, - db, - name, - schema, - relId: relid + const cresult = await getReplicationIdentityColumns(db, relid); + const columnTypesResult = await db.query({ + statement: `SELECT DISTINCT atttypid + FROM pg_attribute + WHERE attnum > 0 AND NOT attisdropped AND attrelid = $1`, + params: [{ type: 'int4', value: relid }] + }); + + const columnTypes = columnTypesResult.rows.map((row) => Number(row.decodeWithoutCustomTypes(0))); + + const resolvedResult = await writer.resolveTables({ + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: { + schema: schema, + name: name, + objectId: relid, + replicaIdColumns: cresult.replicationColumns + }, + pattern: tablePattern }); - result.push(table); + // Ensure we have a description for custom types referenced in the table. + await this.connections.types.fetchTypes(columnTypes); + + // TODO: dropTables? + result.push(...resolvedResult.tables); } return result; } @@ -313,13 +337,13 @@ export class PostgresSnapshotter { }); // Get fresh table info, in case it was updated while queuing - const table = await this.handleRelation({ - writer, - db: db, - name: requestTable.name, - schema: requestTable.schema, - relId: requestTable.objectId as number - }); + const table = await writer.getTable(requestTable); + if (table == null) { + return; + } + if (table.snapshotComplete) { + return; + } await this.snapshotTableInTx(writer, db, table); // This commit ensures we set keepalive_op. // It may be better if that is automatically set when flushing. @@ -343,7 +367,12 @@ export class PostgresSnapshotter { const table = this.queue.values().next().value; if (table == null) { this.initialSnapshotDone.resolve(); - await timers.setTimeout(500, { signal: this.abortSignal }); + // There must be no await in between checking the queue above and creating this deferred promise, + // otherwise we may miss new items being queued. + this.nextItemQueued = pDefer(); + await this.nextItemQueued.promise; + this.nextItemQueued = null; + // At this point, either we have have a new item in the queue, or we are aborted. continue; } @@ -432,7 +461,7 @@ export class PostgresSnapshotter { this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`); - this.queue.add(table); + this.queueTable(table); } } } @@ -443,9 +472,14 @@ export class PostgresSnapshotter { } } + private queueTable(table: storage.SourceTable) { + this.queue.add(table); + this.nextItemQueued?.resolve(); + } + public async queueSnapshot(writer: storage.BucketDataWriter, table: storage.SourceTable) { await writer.markTableSnapshotRequired(table); - this.queue.add(table); + this.queueTable(table); } public async snapshotTableInTx( @@ -599,47 +633,6 @@ export class PostgresSnapshotter { } } - private async handleRelation(options: { - writer: storage.BucketDataWriter; - db: pgwire.PgConnection; - name: string; - schema: string; - relId: number; - }) { - const { writer, db, name, schema, relId } = options; - - const cresult = await getReplicationIdentityColumns(db, relId); - const columnTypesResult = await db.query({ - statement: `SELECT DISTINCT atttypid - FROM pg_attribute - WHERE attnum > 0 AND NOT attisdropped AND attrelid = $1`, - params: [{ type: 'int4', value: relId }] - }); - - const columnTypes = columnTypesResult.rows.map((row) => Number(row.decodeWithoutCustomTypes(0))); - - const result = await this.storage.resolveTable({ - connection_id: this.connection_id, - connection_tag: this.connections.connectionTag, - entity_descriptor: { - name, - schema, - objectId: relId, - replicaIdColumns: cresult.replicationColumns - }, - sync_rules: this.sync_rules - }); - this.relationCache.update(result.table); - - // Drop conflicting tables. This includes for example renamed tables. - await writer.drop(result.dropTables); - - // Ensure we have a description for custom types referenced in the table. - await this.connections.types.fetchTypes(columnTypes); - - return result.table; - } - private touch() { container.probes.touch().catch((e) => { this.logger.error(`Error touching probe`, e); diff --git a/modules/module-postgres/src/replication/WalStream.ts b/modules/module-postgres/src/replication/WalStream.ts index 209a6d911..5741eb3c1 100644 --- a/modules/module-postgres/src/replication/WalStream.ts +++ b/modules/module-postgres/src/replication/WalStream.ts @@ -109,12 +109,7 @@ export class WalStream { private initPromise: Promise | null = null; private snapshotter: PostgresSnapshotter; - private relationCache = new RelationCache((relation: number | SourceTable) => { - if (typeof relation == 'number') { - return relation; - } - return relation.objectId!; - }); + public readonly relationCache = new Map(); private startedStreaming = false; @@ -183,35 +178,51 @@ export class WalStream { if (!descriptor.objectId && typeof descriptor.objectId != 'number') { throw new ReplicationAssertionError(`objectId expected, got ${typeof descriptor.objectId}`); } - const result = await this.storage.resolveTable({ - connection_id: this.connection_id, - connection_tag: this.connections.connectionTag, - entity_descriptor: descriptor, - sync_rules: this.sync_rules + // In common cases, there would be at most one matching pattern, since patterns + // are de-duplicated. However, there may be multiple if: + // 1. There is overlap with direct name matching and wildcard matching. + // 2. There are multiple patterns with different replication config. + const patterns = writer.rowProcessor.getMatchingTablePatterns({ + connectionTag: this.connections.connectionTag, + schema: descriptor.schema, + name: descriptor.name }); - this.relationCache.update(result.table); - // Drop conflicting tables. This includes for example renamed tables. - if (result.dropTables.length > 0) { - this.logger.info(`Dropping conflicting tables: ${result.dropTables.map((t) => t.qualifiedName).join(', ')}`); - await writer.drop(result.dropTables); - } + let allTables: SourceTable[] = []; + for (let pattern of patterns) { + const result = await writer.resolveTables({ + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: descriptor, + pattern + }); - // Ensure we have a description for custom types referenced in the table. - await this.connections.types.fetchTypes(referencedTypeIds); + // Drop conflicting tables. This includes for example renamed tables. + if (result.dropTables.length > 0) { + this.logger.info(`Dropping conflicting tables: ${result.dropTables.map((t) => t.qualifiedName).join(', ')}`); + await writer.drop(result.dropTables); + } - // Snapshot if: - // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) - // 2. Snapshot is not already done, AND: - // 3. The table is used in sync rules. - const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny; + // Ensure we have a description for custom types referenced in the table. + await this.connections.types.fetchTypes(referencedTypeIds); + + // Snapshot if: + // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) + // 2. Snapshot is not already done, AND: + // 3. The table is used in sync rules. + for (let table of result.tables) { + const shouldSnapshot = snapshot && !table.snapshotComplete && table.syncAny; + if (shouldSnapshot) { + this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); + await this.snapshotter.queueSnapshot(writer, table); + } + } - if (shouldSnapshot) { - this.logger.info(`Queuing snapshot for new table ${result.table.qualifiedName}`); - await this.snapshotter.queueSnapshot(writer, result.table); + allTables.push(...result.tables); } + this.relationCache.set(descriptor.objectId, allTables); - return result.table; + return allTables; } /** @@ -250,14 +261,14 @@ export class WalStream { } } - private getTable(relationId: number): storage.SourceTable { - const table = this.relationCache.get(relationId); - if (table == null) { + private getTable(relationId: number): storage.SourceTable[] { + const tables = this.relationCache.get(relationId); + if (tables == null) { // We should always receive a replication message before the relation is used. // If we can't find it, it's a bug. throw new ReplicationAssertionError(`Missing relation cache for ${relationId}`); } - return table; + return tables; } private syncRulesRecord(row: SqliteInputRow): SqliteRow; @@ -282,55 +293,54 @@ export class WalStream { return null; } if (msg.tag == 'insert' || msg.tag == 'update' || msg.tag == 'delete') { - const table = this.getTable(getRelId(msg.relation)); - if (!table.syncAny) { - this.logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`); - return null; - } - - if (msg.tag == 'insert') { - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); - const baseRecord = this.syncRulesRecord(this.connections.types.constructAfterRecord(msg)); - return await writer.save({ - tag: storage.SaveOperationTag.INSERT, - sourceTable: table, - before: undefined, - beforeReplicaId: undefined, - after: baseRecord, - afterReplicaId: getUuidReplicaIdentityBson(baseRecord, table.replicaIdColumns) - }); - } else if (msg.tag == 'update') { - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); - // "before" may be null if the replica id columns are unchanged - // It's fine to treat that the same as an insert. - const before = this.syncRulesRecord(this.connections.types.constructBeforeRecord(msg)); - const after = this.toastableSyncRulesRecord(this.connections.types.constructAfterRecord(msg)); - return await writer.save({ - tag: storage.SaveOperationTag.UPDATE, - sourceTable: table, - before: before, - beforeReplicaId: before ? getUuidReplicaIdentityBson(before, table.replicaIdColumns) : undefined, - after: after, - afterReplicaId: getUuidReplicaIdentityBson(after, table.replicaIdColumns) - }); - } else if (msg.tag == 'delete') { - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); - const before = this.syncRulesRecord(this.connections.types.constructBeforeRecord(msg)!); - - return await writer.save({ - tag: storage.SaveOperationTag.DELETE, - sourceTable: table, - before: before, - beforeReplicaId: getUuidReplicaIdentityBson(before, table.replicaIdColumns), - after: undefined, - afterReplicaId: undefined - }); + const tables = this.getTable(getRelId(msg.relation)); + const filtered = tables.filter((t) => t.syncAny); + + for (let table of filtered) { + if (msg.tag == 'insert') { + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); + const baseRecord = this.syncRulesRecord(this.connections.types.constructAfterRecord(msg)); + return await writer.save({ + tag: storage.SaveOperationTag.INSERT, + sourceTable: table, + before: undefined, + beforeReplicaId: undefined, + after: baseRecord, + afterReplicaId: getUuidReplicaIdentityBson(baseRecord, table.replicaIdColumns) + }); + } else if (msg.tag == 'update') { + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); + // "before" may be null if the replica id columns are unchanged + // It's fine to treat that the same as an insert. + const before = this.syncRulesRecord(this.connections.types.constructBeforeRecord(msg)); + const after = this.toastableSyncRulesRecord(this.connections.types.constructAfterRecord(msg)); + return await writer.save({ + tag: storage.SaveOperationTag.UPDATE, + sourceTable: table, + before: before, + beforeReplicaId: before ? getUuidReplicaIdentityBson(before, table.replicaIdColumns) : undefined, + after: after, + afterReplicaId: getUuidReplicaIdentityBson(after, table.replicaIdColumns) + }); + } else if (msg.tag == 'delete') { + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); + const before = this.syncRulesRecord(this.connections.types.constructBeforeRecord(msg)!); + + return await writer.save({ + tag: storage.SaveOperationTag.DELETE, + sourceTable: table, + before: before, + beforeReplicaId: getUuidReplicaIdentityBson(before, table.replicaIdColumns), + after: undefined, + afterReplicaId: undefined + }); + } } } else if (msg.tag == 'truncate') { let tables: storage.SourceTable[] = []; for (let relation of msg.relations) { - const table = this.getTable(getRelId(relation)); - tables.push(table); + const relTables = this.getTable(getRelId(relation)); + tables.push(...relTables); } return await writer.truncate(tables); } diff --git a/modules/module-postgres/test/src/wal_stream.test.ts b/modules/module-postgres/test/src/wal_stream.test.ts index 3a225ecec..89cacc72d 100644 --- a/modules/module-postgres/test/src/wal_stream.test.ts +++ b/modules/module-postgres/test/src/wal_stream.test.ts @@ -502,7 +502,7 @@ config: await context.initializeReplication(); await pool.query(`INSERT INTO test_data(id, description) VALUES ('t1', '2025-09-10 15:17:14+02')`); - const data = await context.getBucketData('1#stream|0[]'); + const data = await context.getBucketData('stream|0[]'); expect(data).toMatchObject([putOp('test_data', { id: 't1', description: '2025-09-10T13:17:14.000000Z' })]); }); @@ -534,7 +534,7 @@ config: `INSERT INTO test_data(id, description, ts) VALUES ('t2', ROW(TRUE, 2)::composite, '2025-11-17T09:12:00Z')` ); - const data = await context.getBucketData('1#stream|0[]'); + const data = await context.getBucketData('stream|0[]'); expect(data).toMatchObject([ putOp('test_data', { id: 't1', description: '{"foo":1,"bar":1}', ts: '2025-11-17T09:11:00.000000Z' }), putOp('test_data', { id: 't2', description: '{"foo":1,"bar":2}', ts: '2025-11-17T09:12:00.000000Z' }) @@ -561,7 +561,7 @@ config: await context.initializeReplication(); await pool.query(`INSERT INTO test_data(id) VALUES ('t1')`); - const data = await context.getBucketData('1#stream|0[]'); + const data = await context.getBucketData('stream|0[]'); expect(data).toMatchObject([putOp('test_data', { id: 't1' })]); }); From 7aa1754c1ca1fe9578768a7608e38fe6e188808d Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 16:42:56 +0200 Subject: [PATCH 064/101] Partial SQL Server refactor to new APIs. --- .../module-mssql/src/replication/CDCStream.ts | 300 +++++++++--------- .../src/replication/PostgresSnapshotter.ts | 19 +- 2 files changed, 149 insertions(+), 170 deletions(-) diff --git a/modules/module-mssql/src/replication/CDCStream.ts b/modules/module-mssql/src/replication/CDCStream.ts index 98c277918..c0648f2b0 100644 --- a/modules/module-mssql/src/replication/CDCStream.ts +++ b/modules/module-mssql/src/replication/CDCStream.ts @@ -167,26 +167,23 @@ export class CDCStream { async populateTableCache() { const sourceTables = this.syncRules.getSourceTables(); - await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: LSN.ZERO, - defaultSchema: this.defaultSchema, - storeCurrentData: true - }, - async (batch) => { - for (let tablePattern of sourceTables) { - const tables = await this.getQualifiedTableNames(batch, tablePattern); - for (const table of tables) { - this.tableCache.set(table); - } - } + await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + logger: this.logger, + zeroLSN: LSN.ZERO, + defaultSchema: this.defaultSchema, + storeCurrentData: true + }); + + for (let tablePattern of sourceTables) { + const tables = await this.getQualifiedTableNames(writer, tablePattern); + for (const table of tables) { + this.tableCache.set(table); } - ); + } } async getQualifiedTableNames( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, tablePattern: TablePattern ): Promise { if (tablePattern.connectionTag != this.connections.connectionTag) { @@ -216,83 +213,93 @@ export class CDCStream { schema: matchedTable.schema }); - const table = await this.processTable( - batch, + const tables = await this.processTable( + writer, { name: matchedTable.name, schema: matchedTable.schema, objectId: matchedTable.objectId, replicaIdColumns: replicaIdColumns.columns }, - false + false, + tablePattern ); - tables.push(table); + tables.push(...tables); } return tables; } async processTable( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, table: SourceEntityDescriptor, - snapshot: boolean - ): Promise { + snapshot: boolean, + pattern: TablePattern + ): Promise { if (!table.objectId && typeof table.objectId != 'number') { throw new ReplicationAssertionError(`objectId expected, got ${typeof table.objectId}`); } - const resolved = await this.storage.resolveTable({ + + const resolved = await writer.resolveTables({ connection_id: this.connectionId, connection_tag: this.connectionTag, entity_descriptor: table, - sync_rules: this.syncRules - }); - const captureInstance = await getCaptureInstance({ - connectionManager: this.connections, - tableName: resolved.table.name, - schema: resolved.table.schema - }); - if (!captureInstance) { - throw new ServiceAssertionError( - `Missing capture instance for table ${toQualifiedTableName(resolved.table.schema, resolved.table.name)}` - ); - } - const resolvedTable = new MSSQLSourceTable({ - sourceTable: resolved.table, - captureInstance: captureInstance + pattern }); // Drop conflicting tables. This includes for example renamed tables. - await batch.drop(resolved.dropTables); - - // Snapshot if: - // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) - // 2. Snapshot is not already done, AND: - // 3. The table is used in sync rules. - const shouldSnapshot = snapshot && !resolved.table.snapshotComplete && resolved.table.syncAny; - - if (shouldSnapshot) { - // Truncate this table in case a previous snapshot was interrupted. - await batch.truncate([resolved.table]); - - // Start the snapshot inside a transaction. - try { - await this.snapshotTableInTx(batch, resolvedTable); - } finally { - // TODO Cleanup? + await writer.drop(resolved.dropTables); + + let resultingTables: MSSQLSourceTable[] = []; + + for (let table of resolved.tables) { + const captureInstance = await getCaptureInstance({ + connectionManager: this.connections, + tableName: table.name, + schema: table.schema + }); + if (!captureInstance) { + throw new ServiceAssertionError( + `Missing capture instance for table ${toQualifiedTableName(table.schema, table.name)}` + ); } + const resolvedTable = new MSSQLSourceTable({ + sourceTable: table, + captureInstance: captureInstance + }); + + // Snapshot if: + // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) + // 2. Snapshot is not already done, AND: + // 3. The table is used in sync rules. + const shouldSnapshot = snapshot && !table.snapshotComplete && table.syncAny; + + if (shouldSnapshot) { + // Truncate this table in case a previous snapshot was interrupted. + await writer.truncate([table]); + + // Start the snapshot inside a transaction. + try { + await this.snapshotTableInTx(writer, resolvedTable); + } finally { + // TODO Cleanup? + } + } + + resultingTables.push(resolvedTable); } - return resolvedTable; + return resultingTables; } - private async snapshotTableInTx(batch: storage.BucketStorageBatch, table: MSSQLSourceTable): Promise { + private async snapshotTableInTx(writer: storage.BucketDataWriter, table: MSSQLSourceTable): Promise { // Note: We use the "Read Committed" isolation level here, not snapshot isolation. // The data may change during the transaction, but that is compensated for in the streaming // replication afterward. const transaction = await this.connections.createTransaction(); await transaction.begin(sql.ISOLATION_LEVEL.READ_COMMITTED); try { - await this.snapshotTable(batch, transaction, table); + await this.snapshotTable(writer, transaction, table); // Get the current LSN. // The data will only be consistent once incremental replication has passed that point. @@ -309,7 +316,7 @@ export class CDCStream { const postSnapshotLSN = await getLatestLSN(this.connections); // Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction. await transaction.commit(); - const [updatedSourceTable] = await batch.markTableSnapshotDone([table.sourceTable], postSnapshotLSN.toString()); + const [updatedSourceTable] = await writer.markTableSnapshotDone([table.sourceTable], postSnapshotLSN.toString()); this.tableCache.updateSourceTable(updatedSourceTable); } catch (e) { await transaction.rollback(); @@ -317,11 +324,7 @@ export class CDCStream { } } - private async snapshotTable( - batch: storage.BucketStorageBatch, - transaction: sql.Transaction, - table: MSSQLSourceTable - ) { + private async snapshotTable(writer: storage.BucketDataWriter, transaction: sql.Transaction, table: MSSQLSourceTable) { let totalEstimatedCount = table.sourceTable.snapshotStatus?.totalEstimatedCount; let replicatedCount = table.sourceTable.snapshotStatus?.replicatedCount ?? 0; let lastCountTime = 0; @@ -377,7 +380,7 @@ export class CDCStream { const inputRow: SqliteInputRow = toSqliteInputRow(result, columns); const row = this.syncRules.applyRowContext(inputRow); // This auto-flushes when the batch reaches its size limit - await batch.save({ + await writer.save({ tag: storage.SaveOperationTag.INSERT, sourceTable: table.sourceTable, before: undefined, @@ -395,7 +398,7 @@ export class CDCStream { } // Important: flush before marking progress - await batch.flush(); + await writer.flush(); let lastKey: Uint8Array | undefined; if (query instanceof BatchedSnapshotQuery) { @@ -409,7 +412,7 @@ export class CDCStream { totalEstimatedCount = await this.estimatedCountNumber(table, transaction); lastCountTime = performance.now(); } - const updatedSourceTable = await batch.updateTableProgress(table.sourceTable, { + const updatedSourceTable = await writer.updateTableProgress(table.sourceTable, { lastKey: lastKey, replicatedCount: replicatedCount, totalEstimatedCount: totalEstimatedCount @@ -462,56 +465,52 @@ export class CDCStream { await this.storage.clear({ signal: this.abortSignal }); } - await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: LSN.ZERO, - defaultSchema: this.defaultSchema, - storeCurrentData: false, - skipExistingRows: true - }, - async (batch) => { - if (snapshotLSN == null) { - // First replication attempt - set the snapshot LSN to the current LSN before starting - snapshotLSN = (await getLatestReplicatedLSN(this.connections)).toString(); - await batch.setResumeLsn(snapshotLSN); - const latestLSN = (await getLatestLSN(this.connections)).toString(); - this.logger.info(`Marking snapshot at ${snapshotLSN}, Latest DB LSN ${latestLSN}.`); - } else { - this.logger.info(`Resuming snapshot at ${snapshotLSN}.`); - } + await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + logger: this.logger, + zeroLSN: LSN.ZERO, + defaultSchema: this.defaultSchema, + storeCurrentData: false, + skipExistingRows: true + }); + if (snapshotLSN == null) { + // First replication attempt - set the snapshot LSN to the current LSN before starting + snapshotLSN = (await getLatestReplicatedLSN(this.connections)).toString(); + await writer.setAllResumeLsn(snapshotLSN); + const latestLSN = (await getLatestLSN(this.connections)).toString(); + this.logger.info(`Marking snapshot at ${snapshotLSN}, Latest DB LSN ${latestLSN}.`); + } else { + this.logger.info(`Resuming snapshot at ${snapshotLSN}.`); + } - const tablesToSnapshot: MSSQLSourceTable[] = []; - for (const table of this.tableCache.getAll()) { - if (table.sourceTable.snapshotComplete) { - this.logger.info(`Skipping table [${table.toQualifiedName()}] - snapshot already done.`); - continue; - } + const tablesToSnapshot: MSSQLSourceTable[] = []; + for (const table of this.tableCache.getAll()) { + if (table.sourceTable.snapshotComplete) { + this.logger.info(`Skipping table [${table.toQualifiedName()}] - snapshot already done.`); + continue; + } - const count = await this.estimatedCountNumber(table); - const updatedSourceTable = await batch.updateTableProgress(table.sourceTable, { - totalEstimatedCount: count - }); - this.tableCache.updateSourceTable(updatedSourceTable); - tablesToSnapshot.push(table); + const count = await this.estimatedCountNumber(table); + const updatedSourceTable = await writer.updateTableProgress(table.sourceTable, { + totalEstimatedCount: count + }); + this.tableCache.updateSourceTable(updatedSourceTable); + tablesToSnapshot.push(table); - this.logger.info(`To replicate: ${table.toQualifiedName()} ${table.sourceTable.formatSnapshotProgress()}`); - } + this.logger.info(`To replicate: ${table.toQualifiedName()} ${table.sourceTable.formatSnapshotProgress()}`); + } - for (const table of tablesToSnapshot) { - await this.snapshotTableInTx(batch, table); - this.touch(); - } + for (const table of tablesToSnapshot) { + await this.snapshotTableInTx(writer, table); + this.touch(); + } - // This will not create a consistent checkpoint yet, but will persist the op. - // Actual checkpoint will be created when streaming replication caught up. - const postSnapshotLSN = await getLatestLSN(this.connections); - await batch.markAllSnapshotDone(postSnapshotLSN.toString()); - await batch.commit(snapshotLSN); + // This will not create a consistent checkpoint yet, but will persist the op. + // Actual checkpoint will be created when streaming replication caught up. + const postSnapshotLSN = await getLatestLSN(this.connections); + await writer.markAllSnapshotDone(postSnapshotLSN.toString()); + await writer.commitAll(snapshotLSN); - this.logger.info(`Snapshot done. Need to replicate from ${snapshotLSN} to ${postSnapshotLSN} to be consistent`); - } - ); + this.logger.info(`Snapshot done. Need to replicate from ${snapshotLSN} to ${postSnapshotLSN} to be consistent`); } async initReplication() { @@ -557,52 +556,49 @@ export class CDCStream { } async streamChanges() { - await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: LSN.ZERO, - defaultSchema: this.defaultSchema, - storeCurrentData: false, - skipExistingRows: false - }, - async (batch) => { - if (batch.resumeFromLsn == null) { - throw new ReplicationAssertionError(`No LSN found to resume replication from.`); - } - const startLSN = LSN.fromString(batch.resumeFromLsn); - const sourceTables: MSSQLSourceTable[] = this.tableCache.getAll(); - const eventHandler = this.createEventHandler(batch); - - const poller = new CDCPoller({ - connectionManager: this.connections, - eventHandler, - sourceTables, - startLSN, - logger: this.logger, - additionalConfig: this.options.additionalConfig - }); + await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + logger: this.logger, + zeroLSN: LSN.ZERO, + defaultSchema: this.defaultSchema, + storeCurrentData: false, + skipExistingRows: false + }); - this.abortSignal.addEventListener( - 'abort', - async () => { - await poller.stop(); - }, - { once: true } - ); + if (writer.resumeFromLsn == null) { + throw new ReplicationAssertionError(`No LSN found to resume replication from.`); + } + const startLSN = LSN.fromString(writer.resumeFromLsn); + const sourceTables: MSSQLSourceTable[] = this.tableCache.getAll(); + const eventHandler = this.createEventHandler(writer); - await createCheckpoint(this.connections); + const poller = new CDCPoller({ + connectionManager: this.connections, + eventHandler, + sourceTables, + startLSN, + logger: this.logger, + additionalConfig: this.options.additionalConfig + }); - this.logger.info(`Streaming changes from: ${startLSN}`); - await poller.replicateUntilStopped(); - } + this.abortSignal.addEventListener( + 'abort', + async () => { + await poller.stop(); + }, + { once: true } ); + + await createCheckpoint(this.connections); + + this.logger.info(`Streaming changes from: ${startLSN}`); + await poller.replicateUntilStopped(); } - private createEventHandler(batch: storage.BucketStorageBatch): CDCEventHandler { + private createEventHandler(writer: storage.BucketDataWriter): CDCEventHandler { return { onInsert: async (row: any, table: MSSQLSourceTable, columns: sql.IColumnMetadata) => { const afterRow = this.toSqliteRow(row, columns); - await batch.save({ + await writer.save({ tag: storage.SaveOperationTag.INSERT, sourceTable: table.sourceTable, before: undefined, @@ -615,7 +611,7 @@ export class CDCStream { onUpdate: async (rowAfter: any, rowBefore: any, table: MSSQLSourceTable, columns: sql.IColumnMetadata) => { const beforeRow = this.toSqliteRow(rowBefore, columns); const afterRow = this.toSqliteRow(rowAfter, columns); - await batch.save({ + await writer.save({ tag: storage.SaveOperationTag.UPDATE, sourceTable: table.sourceTable, before: beforeRow, @@ -627,7 +623,7 @@ export class CDCStream { }, onDelete: async (row: any, table: MSSQLSourceTable, columns: sql.IColumnMetadata) => { const beforeRow = this.toSqliteRow(row, columns); - await batch.save({ + await writer.save({ tag: storage.SaveOperationTag.DELETE, sourceTable: table.sourceTable, before: beforeRow, @@ -638,7 +634,7 @@ export class CDCStream { this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); }, onCommit: async (lsn: string, transactionCount: number) => { - await batch.commit(lsn); + await writer.commitAll(lsn); this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(transactionCount); this.isStartingReplication = false; }, diff --git a/modules/module-postgres/src/replication/PostgresSnapshotter.ts b/modules/module-postgres/src/replication/PostgresSnapshotter.ts index 7639109aa..9d45552d8 100644 --- a/modules/module-postgres/src/replication/PostgresSnapshotter.ts +++ b/modules/module-postgres/src/replication/PostgresSnapshotter.ts @@ -5,13 +5,7 @@ import { ReplicationAbortedError, ReplicationAssertionError } from '@powersync/lib-services-framework'; -import { - getUuidReplicaIdentityBson, - MetricsEngine, - RelationCache, - SourceTable, - storage -} from '@powersync/service-core'; +import { getUuidReplicaIdentityBson, MetricsEngine, SourceTable, storage } from '@powersync/service-core'; import * as pgwire from '@powersync/service-jpgwire'; import { DatabaseInputRow, @@ -24,7 +18,6 @@ import { } from '@powersync/service-sync-rules'; import { ReplicationMetric } from '@powersync/service-types'; -import * as timers from 'node:timers/promises'; import pDefer, { DeferredPromise } from 'p-defer'; import { PostgresTypeResolver } from '../types/resolver.js'; import { PgManager } from './PgManager.js'; @@ -75,13 +68,6 @@ export class PostgresSnapshotter { private snapshotChunkLength: number; - private relationCache = new RelationCache((relation: number | SourceTable) => { - if (typeof relation == 'number') { - return relation; - } - return relation.objectId!; - }); - private queue = new Set(); private nextItemQueued: DeferredPromise | null = null; private initialSnapshotDone = pDefer(); @@ -457,7 +443,6 @@ export class PostgresSnapshotter { } const count = await this.estimatedCountNumber(db, table); table = await writer.updateTableProgress(table, { totalEstimatedCount: count }); - this.relationCache.update(table); this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`); @@ -515,7 +500,6 @@ export class PostgresSnapshotter { await db.query('COMMIT'); this.logger.info(`Snapshot complete for table ${table.qualifiedName}, resume at ${tableLsnNotBefore}`); const [resultTable] = await writer.markTableSnapshotDone([table], tableLsnNotBefore); - this.relationCache.update(resultTable); return resultTable; } catch (e) { await db.query('ROLLBACK'); @@ -619,7 +603,6 @@ export class PostgresSnapshotter { replicatedCount: at, totalEstimatedCount: totalEstimatedCount }); - this.relationCache.update(table); this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`); } else { From 7d3a091b9c4f81938ada0914f3fe9033c84302be Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 17:25:12 +0200 Subject: [PATCH 065/101] Refactor BinLogStreams to use the new APIs (untested). --- .../src/replication/BinLogStream.ts | 338 ++++++++++-------- .../src/replication/WalStream.ts | 2 +- 2 files changed, 184 insertions(+), 156 deletions(-) diff --git a/modules/module-mysql/src/replication/BinLogStream.ts b/modules/module-mysql/src/replication/BinLogStream.ts index b7c0ce4f8..2490ba1d3 100644 --- a/modules/module-mysql/src/replication/BinLogStream.ts +++ b/modules/module-mysql/src/replication/BinLogStream.ts @@ -72,7 +72,7 @@ export class BinLogStream { private readonly logger: Logger; - private tableCache = new Map(); + private tableCache = new Map(); /** * Time of the oldest uncommitted change, according to the source db. @@ -126,58 +126,92 @@ export class BinLogStream { return this.connections.databaseName; } - async handleRelation(batch: storage.BucketStorageBatch, entity: storage.SourceEntityDescriptor, snapshot: boolean) { - const result = await this.storage.resolveTable({ + private async handleRelationSetup( + writer: storage.BucketDataWriter, + entity: storage.SourceEntityDescriptor, + pattern: sync_rules.TablePattern + ) { + const result = await writer.resolveTables({ connection_id: this.connectionId, connection_tag: this.connectionTag, entity_descriptor: entity, - sync_rules: this.syncRules + pattern }); - // Since we create the objectId ourselves, this is always defined - this.tableCache.set(entity.objectId!, result.table); // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage. - await batch.drop(result.dropTables); - - // Snapshot if: - // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) - // 2. Snapshot is not done yet, AND: - // 3. The table is used in sync rules. - const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny; - - if (shouldSnapshot) { - // Truncate this table in case a previous snapshot was interrupted. - await batch.truncate([result.table]); - - let gtid: common.ReplicatedGTID; - // Start the snapshot inside a transaction. - // We use a dedicated connection for this. - const connection = await this.connections.getStreamingConnection(); - - const promiseConnection = (connection as mysql.Connection).promise(); - try { - await promiseConnection.query(`SET time_zone = '+00:00'`); - await promiseConnection.query('START TRANSACTION'); - try { - gtid = await common.readExecutedGtid(promiseConnection); - await this.snapshotTable(connection as mysql.Connection, batch, result.table); - await promiseConnection.query('COMMIT'); - } catch (e) { - await this.tryRollback(promiseConnection); - throw e; + await writer.drop(result.dropTables); + + return result.tables; + } + + async handleChangeRelation(writer: storage.BucketDataWriter, entity: storage.SourceEntityDescriptor) { + // In common cases, there would be at most one matching pattern, since patterns + // are de-duplicated. However, there may be multiple if: + // 1. There is overlap with direct name matching and wildcard matching. + // 2. There are multiple patterns with different replication config. + const patterns = writer.rowProcessor.getMatchingTablePatterns({ + connectionTag: this.connections.connectionTag, + schema: entity.schema, + name: entity.name + }); + + let allTables: SourceTable[] = []; + for (let pattern of patterns) { + const result = await writer.resolveTables({ + connection_id: this.connectionId, + connection_tag: this.connectionTag, + entity_descriptor: entity, + pattern + }); + + // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage. + await writer.drop(result.dropTables); + + for (let table of result.tables) { + // Snapshot if: + // 1. Snapshot is not done yet, AND: + // 2. The table is used in sync rules. + const shouldSnapshot = !table.snapshotComplete && table.syncAny; + + if (shouldSnapshot) { + // Truncate this table in case a previous snapshot was interrupted. + await writer.truncate([table]); + + let gtid: common.ReplicatedGTID; + // Start the snapshot inside a transaction. + // We use a dedicated connection for this. + const connection = await this.connections.getStreamingConnection(); + + const promiseConnection = (connection as mysql.Connection).promise(); + try { + await promiseConnection.query(`SET time_zone = '+00:00'`); + await promiseConnection.query('START TRANSACTION'); + try { + gtid = await common.readExecutedGtid(promiseConnection); + await this.snapshotTable(connection as mysql.Connection, writer, table); + await promiseConnection.query('COMMIT'); + } catch (e) { + await this.tryRollback(promiseConnection); + throw e; + } + } finally { + connection.release(); + } + const [updatedTable] = await writer.markTableSnapshotDone([table], gtid.comparable); + allTables.push(updatedTable); + } else { + allTables.push(table); } - } finally { - connection.release(); } - const [table] = await batch.markTableSnapshotDone([result.table], gtid.comparable); - return table; } - return result.table; + // Since we create the objectId ourselves, this is always defined + this.tableCache.set(entity.objectId!, allTables); + return allTables; } async getQualifiedTableNames( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, tablePattern: sync_rules.TablePattern ): Promise { if (tablePattern.connectionTag != this.connectionTag) { @@ -188,24 +222,24 @@ export class BinLogStream { const matchedTables: string[] = await common.getTablesFromPattern(connection, tablePattern); connection.release(); - const tables: storage.SourceTable[] = []; + const allTables: storage.SourceTable[] = []; for (const matchedTable of matchedTables) { const replicaIdColumns = await this.getReplicaIdColumns(matchedTable, tablePattern.schema); - const table = await this.handleRelation( - batch, + const resolvedTables = await this.handleRelationSetup( + writer, { name: matchedTable, schema: tablePattern.schema, objectId: createTableId(tablePattern.schema, matchedTable), replicaIdColumns: replicaIdColumns }, - false + tablePattern ); - tables.push(table); + allTables.push(...resolvedTables); } - return tables; + return allTables; } /** @@ -262,27 +296,25 @@ export class BinLogStream { await promiseConnection.query(`SET time_zone = '+00:00'`); const sourceTables = this.syncRules.getSourceTables(); - const flushResults = await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: common.ReplicatedGTID.ZERO.comparable, - defaultSchema: this.defaultSchema, - storeCurrentData: true - }, - async (batch) => { - for (let tablePattern of sourceTables) { - const tables = await this.getQualifiedTableNames(batch, tablePattern); - for (let table of tables) { - await this.snapshotTable(connection as mysql.Connection, batch, table); - await batch.markTableSnapshotDone([table], headGTID.comparable); - await framework.container.probes.touch(); - } - } - const snapshotDoneGtid = await common.readExecutedGtid(promiseConnection); - await batch.markAllSnapshotDone(snapshotDoneGtid.comparable); - await batch.commit(headGTID.comparable); + await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + logger: this.logger, + zeroLSN: common.ReplicatedGTID.ZERO.comparable, + defaultSchema: this.defaultSchema, + storeCurrentData: true + }); + for (let tablePattern of sourceTables) { + const tables = await this.getQualifiedTableNames(writer, tablePattern); + for (let table of tables) { + await this.snapshotTable(connection as mysql.Connection, writer, table); + await writer.markTableSnapshotDone([table], headGTID.comparable); + await framework.container.probes.touch(); } - ); + } + const snapshotDoneGtid = await common.readExecutedGtid(promiseConnection); + await writer.markAllSnapshotDone(snapshotDoneGtid.comparable); + const flushResults = await writer.flush(); + await writer.commitAll(headGTID.comparable); + lastOp = flushResults?.flushed_op ?? null; this.logger.info(`Initial replication done`); await promiseConnection.query('COMMIT'); @@ -305,7 +337,7 @@ export class BinLogStream { private async snapshotTable( connection: mysql.Connection, - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, table: storage.SourceTable ) { this.logger.info(`Replicating ${qualifiedMySQLTable(table)}`); @@ -334,7 +366,7 @@ export class BinLogStream { } const record = this.toSQLiteRow(row, columns!); - await batch.save({ + await writer.save({ tag: storage.SaveOperationTag.INSERT, sourceTable: table, before: undefined, @@ -345,7 +377,7 @@ export class BinLogStream { this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); } - await batch.flush(); + await writer.flush(); } async replicate() { @@ -377,30 +409,26 @@ export class BinLogStream { // We need to find the existing tables, to populate our table cache. // This is needed for includeSchema to work correctly. const sourceTables = this.syncRules.getSourceTables(); - await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: common.ReplicatedGTID.ZERO.comparable, - defaultSchema: this.defaultSchema, - storeCurrentData: true - }, - async (batch) => { - for (let tablePattern of sourceTables) { - await this.getQualifiedTableNames(batch, tablePattern); - } - } - ); + await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + logger: this.logger, + zeroLSN: common.ReplicatedGTID.ZERO.comparable, + defaultSchema: this.defaultSchema, + storeCurrentData: true + }); + for (let tablePattern of sourceTables) { + await this.getQualifiedTableNames(writer, tablePattern); + } } } - private getTable(tableId: string): storage.SourceTable { - const table = this.tableCache.get(tableId); - if (table == null) { + private getTables(tableId: string): storage.SourceTable[] { + const tables = this.tableCache.get(tableId); + if (tables == null) { // We should always receive a replication message before the relation is used. // If we can't find it, it's a bug. throw new ReplicationAssertionError(`Missing relation cache for ${tableId}`); } - return table; + return tables; } async streamChanges() { @@ -417,38 +445,39 @@ export class BinLogStream { connection.release(); if (!this.stopped) { - await this.storage.startBatch( - { zeroLSN: common.ReplicatedGTID.ZERO.comparable, defaultSchema: this.defaultSchema, storeCurrentData: true }, - async (batch) => { - const binlogEventHandler = this.createBinlogEventHandler(batch); - const binlogListener = new BinLogListener({ - logger: this.logger, - sourceTables: this.syncRules.getSourceTables(), - startGTID: fromGTID, - connectionManager: this.connections, - serverId: serverId, - eventHandler: binlogEventHandler - }); - - this.abortSignal.addEventListener( - 'abort', - async () => { - await binlogListener.stop(); - }, - { once: true } - ); - - await binlogListener.start(); - await binlogListener.replicateUntilStopped(); - } + await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + zeroLSN: common.ReplicatedGTID.ZERO.comparable, + defaultSchema: this.defaultSchema, + storeCurrentData: true + }); + + const binlogEventHandler = this.createBinlogEventHandler(writer); + const binlogListener = new BinLogListener({ + logger: this.logger, + sourceTables: this.syncRules.getSourceTables(), + startGTID: fromGTID, + connectionManager: this.connections, + serverId: serverId, + eventHandler: binlogEventHandler + }); + + this.abortSignal.addEventListener( + 'abort', + async () => { + await binlogListener.stop(); + }, + { once: true } ); + + await binlogListener.start(); + await binlogListener.replicateUntilStopped(); } } - private createBinlogEventHandler(batch: storage.BucketStorageBatch): BinLogEventHandler { + private createBinlogEventHandler(writer: storage.BucketDataWriter): BinLogEventHandler { return { onWrite: async (rows: Row[], tableMap: TableMapEntry) => { - await this.writeChanges(batch, { + await this.writeChanges(writer, { type: storage.SaveOperationTag.INSERT, rows: rows, tableEntry: tableMap @@ -456,7 +485,7 @@ export class BinLogStream { }, onUpdate: async (rowsAfter: Row[], rowsBefore: Row[], tableMap: TableMapEntry) => { - await this.writeChanges(batch, { + await this.writeChanges(writer, { type: storage.SaveOperationTag.UPDATE, rows: rowsAfter, rows_before: rowsBefore, @@ -464,21 +493,21 @@ export class BinLogStream { }); }, onDelete: async (rows: Row[], tableMap: TableMapEntry) => { - await this.writeChanges(batch, { + await this.writeChanges(writer, { type: storage.SaveOperationTag.DELETE, rows: rows, tableEntry: tableMap }); }, onKeepAlive: async (lsn: string) => { - const didCommit = await batch.keepalive(lsn); + const didCommit = await writer.keepaliveAll(lsn); if (didCommit) { this.oldestUncommittedChange = null; } }, onCommit: async (lsn: string) => { this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1); - const didCommit = await batch.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); + const didCommit = await writer.commitAll(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); if (didCommit) { this.oldestUncommittedChange = null; this.isStartingReplication = false; @@ -493,41 +522,42 @@ export class BinLogStream { this.isStartingReplication = false; }, onSchemaChange: async (change: SchemaChange) => { - await this.handleSchemaChange(batch, change); + await this.handleSchemaChange(writer, change); } }; } - private async handleSchemaChange(batch: storage.BucketStorageBatch, change: SchemaChange): Promise { + private async handleSchemaChange(writer: storage.BucketDataWriter, change: SchemaChange): Promise { if (change.type === SchemaChangeType.RENAME_TABLE) { const fromTableId = createTableId(change.schema, change.table); - const fromTable = this.tableCache.get(fromTableId); + const fromTables = this.tableCache.get(fromTableId); // Old table needs to be cleaned up - if (fromTable) { - await batch.drop([fromTable]); + if (fromTables != null) { + await writer.drop(fromTables); this.tableCache.delete(fromTableId); } + // The new table matched a table in the sync rules if (change.newTable) { - await this.handleCreateOrUpdateTable(batch, change.newTable!, change.schema); + await this.handleCreateOrUpdateTable(writer, change.newTable!, change.schema); } } else { const tableId = createTableId(change.schema, change.table); - const table = this.getTable(tableId); + const tables = this.getTables(tableId); switch (change.type) { case SchemaChangeType.ALTER_TABLE_COLUMN: case SchemaChangeType.REPLICATION_IDENTITY: // For these changes, we need to update the table if the replication identity columns have changed. - await this.handleCreateOrUpdateTable(batch, change.table, change.schema); + await this.handleCreateOrUpdateTable(writer, change.table, change.schema); break; case SchemaChangeType.TRUNCATE_TABLE: - await batch.truncate([table]); + await writer.truncate(tables); break; case SchemaChangeType.DROP_TABLE: - await batch.drop([table]); + await writer.drop(tables); this.tableCache.delete(tableId); break; default: @@ -550,25 +580,21 @@ export class BinLogStream { } private async handleCreateOrUpdateTable( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, tableName: string, schema: string - ): Promise { + ): Promise { const replicaIdColumns = await this.getReplicaIdColumns(tableName, schema); - return await this.handleRelation( - batch, - { - name: tableName, - schema: schema, - objectId: createTableId(schema, tableName), - replicaIdColumns: replicaIdColumns - }, - true - ); + return await this.handleChangeRelation(writer, { + name: tableName, + schema: schema, + objectId: createTableId(schema, tableName), + replicaIdColumns: replicaIdColumns + }); } private async writeChanges( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, msg: { type: storage.SaveOperationTag; rows: Row[]; @@ -579,23 +605,25 @@ export class BinLogStream { const columns = common.toColumnDescriptors(msg.tableEntry); const tableId = createTableId(msg.tableEntry.parentSchema, msg.tableEntry.tableName); - let table = this.tableCache.get(tableId); - if (table == null) { + let tables = this.tableCache.get(tableId); + if (tables == null) { // This is an insert for a new table that matches a table in the sync rules // We need to create the table in the storage and cache it. - table = await this.handleCreateOrUpdateTable(batch, msg.tableEntry.tableName, msg.tableEntry.parentSchema); + tables = await this.handleCreateOrUpdateTable(writer, msg.tableEntry.tableName, msg.tableEntry.parentSchema); } for (const [index, row] of msg.rows.entries()) { - await this.writeChange(batch, { - type: msg.type, - database: msg.tableEntry.parentSchema, - sourceTable: table!, - table: msg.tableEntry.tableName, - columns: columns, - row: row, - previous_row: msg.rows_before?.[index] - }); + for (let table of tables) { + await this.writeChange(writer, { + type: msg.type, + database: msg.tableEntry.parentSchema, + sourceTable: table!, + table: msg.tableEntry.tableName, + columns: columns, + row: row, + previous_row: msg.rows_before?.[index] + }); + } } return null; } @@ -606,14 +634,14 @@ export class BinLogStream { } private async writeChange( - batch: storage.BucketStorageBatch, + writer: storage.BucketDataWriter, payload: WriteChangePayload ): Promise { switch (payload.type) { case storage.SaveOperationTag.INSERT: this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); const record = this.toSQLiteRow(payload.row, payload.columns); - return await batch.save({ + return await writer.save({ tag: storage.SaveOperationTag.INSERT, sourceTable: payload.sourceTable, before: undefined, @@ -630,7 +658,7 @@ export class BinLogStream { : undefined; const after = this.toSQLiteRow(payload.row, payload.columns); - return await batch.save({ + return await writer.save({ tag: storage.SaveOperationTag.UPDATE, sourceTable: payload.sourceTable, before: beforeUpdated, @@ -645,7 +673,7 @@ export class BinLogStream { this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); const beforeDeleted = this.toSQLiteRow(payload.row, payload.columns); - return await batch.save({ + return await writer.save({ tag: storage.SaveOperationTag.DELETE, sourceTable: payload.sourceTable, before: beforeDeleted, diff --git a/modules/module-postgres/src/replication/WalStream.ts b/modules/module-postgres/src/replication/WalStream.ts index 5741eb3c1..9998d06bb 100644 --- a/modules/module-postgres/src/replication/WalStream.ts +++ b/modules/module-postgres/src/replication/WalStream.ts @@ -124,7 +124,7 @@ export class WalStream { */ private isStartingReplication = true; - constructor(private options: WalStreamOptions) { + constructor(options: WalStreamOptions) { this.logger = options.logger ?? defaultLogger; this.storage = options.storage; this.metrics = options.metrics; From a209643265e421b86a420a36cf6be3181f8ce295 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 17:27:56 +0200 Subject: [PATCH 066/101] Remove startBatch. --- .../implementation/MongoSyncBucketStorage.ts | 15 ------ .../src/storage/PostgresSyncRulesStorage.ts | 15 ------ .../src/replication/PostgresSnapshotter.ts | 46 +++++++++---------- .../src/storage/SyncRulesBucketStorage.ts | 12 +---- 4 files changed, 23 insertions(+), 65 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 41889308c..be9df13c0 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -174,21 +174,6 @@ export class MongoSyncBucketStorage return writer.subWriters[0]; } - async startBatch( - options: storage.StartBatchOptions, - callback: (batch: storage.BucketStorageBatch) => Promise - ): Promise { - await using batch = await this.createWriter(options); - - await callback(batch); - await batch.flush(); - if (batch.last_flushed_op != null) { - return { flushed_op: batch.last_flushed_op }; - } else { - return null; - } - } - async resolveTable(options: storage.ResolveTableOptions): Promise { throw new Error('Method deprecated and not implemented.'); } diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index 8f39b0c99..b965a421b 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -377,21 +377,6 @@ export class PostgresSyncRulesStorage return batch; } - async startBatch( - options: storage.StartBatchOptions, - callback: (batch: storage.BucketStorageBatch) => Promise - ): Promise { - await using batch = await this.createWriter(options); - - await callback(batch); - await batch.flush(); - if (batch.last_flushed_op != null) { - return { flushed_op: batch.last_flushed_op }; - } else { - return null; - } - } - async getParameterSets( checkpoint: ReplicationCheckpoint, lookups: sync_rules.ScopedParameterLookup[] diff --git a/modules/module-postgres/src/replication/PostgresSnapshotter.ts b/modules/module-postgres/src/replication/PostgresSnapshotter.ts index 9d45552d8..c8b6bc7b3 100644 --- a/modules/module-postgres/src/replication/PostgresSnapshotter.ts +++ b/modules/module-postgres/src/replication/PostgresSnapshotter.ts @@ -380,20 +380,17 @@ export class PostgresSnapshotter { const db = await this.connections.snapshotConnection(); await using _ = { [Symbol.asyncDispose]: () => db.end() }; - const flushResults = await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: ZERO_LSN, - defaultSchema: POSTGRES_DEFAULT_SCHEMA, - storeCurrentData: true, - skipExistingRows: true - }, - async (batch) => { - const rs = await db.query(`select pg_current_wal_lsn() as lsn`); - const globalLsnNotBefore = rs.rows[0].decodeWithoutCustomTypes(0); - await batch.markAllSnapshotDone(globalLsnNotBefore); - } - ); + await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true + }); + + const rs = await db.query(`select pg_current_wal_lsn() as lsn`); + const globalLsnNotBefore = rs.rows[0].decodeWithoutCustomTypes(0); + await writer.markAllSnapshotDone(globalLsnNotBefore); + /** * Send a keepalive message after initial replication. * In some edge cases we wait for a keepalive after the initial snapshot. @@ -404,16 +401,17 @@ export class PostgresSnapshotter { */ await sendKeepAlive(db); - const lastOp = flushResults?.flushed_op; - if (lastOp != null) { - // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules. - // TODO: only run this after initial replication, not after each table. - await this.storage.populatePersistentChecksumCache({ - // No checkpoint yet, but we do have the opId. - maxOpId: lastOp, - signal: this.abortSignal - }); - } + // FIXME: Implement this again + // const lastOp = flushResults?.flushed_op; + // if (lastOp != null) { + // // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules. + // // TODO: only run this after initial replication, not after each table. + // await this.storage.populatePersistentChecksumCache({ + // // No checkpoint yet, but we do have the opId. + // maxOpId: lastOp, + // signal: this.abortSignal + // }); + // } } /** diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index d275f2760..a7e764f68 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -26,23 +26,13 @@ export interface SyncRulesBucketStorage readonly factory: BucketStorageFactory; - /** - * Use this to get access to update storage data. - * - * @deprecated Use `createWriter` instead. - */ - startBatch( - options: StartBatchOptions, - callback: (batch: BucketStorageBatch) => Promise - ): Promise; - /** * @deprecated use `createWriter()` instead, with its `resolveTables` method. */ resolveTable(options: ResolveTableOptions): Promise; /** - * Create a new writer - an alternative to `startBatch`. + * Create a new writer. * * The writer is stateful. It is not safe to use the same writer concurrently from multiple places, * but different writers can be used concurrently. From a30c8256fb81d59ba5fec76af119ee3d66638a7d Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 21 Jan 2026 17:29:15 +0200 Subject: [PATCH 067/101] Remove resolveTable from the public API. --- .../src/storage/implementation/MongoSyncBucketStorage.ts | 4 ---- packages/service-core/src/storage/SyncRulesBucketStorage.ts | 5 ----- 2 files changed, 9 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index be9df13c0..a225f1115 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -174,10 +174,6 @@ export class MongoSyncBucketStorage return writer.subWriters[0]; } - async resolveTable(options: storage.ResolveTableOptions): Promise { - throw new Error('Method deprecated and not implemented.'); - } - async getParameterSets( checkpoint: MongoReplicationCheckpoint, lookups: ScopedParameterLookup[] diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index a7e764f68..fc9423218 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -26,11 +26,6 @@ export interface SyncRulesBucketStorage readonly factory: BucketStorageFactory; - /** - * @deprecated use `createWriter()` instead, with its `resolveTables` method. - */ - resolveTable(options: ResolveTableOptions): Promise; - /** * Create a new writer. * From 93f6103b6c2e5a5d09a3f67384e95e03355c9c65 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 22 Jan 2026 10:40:01 +0200 Subject: [PATCH 068/101] Remove some BucketStorageBatch usages. --- .../implementation/MongoBucketBatch.ts | 10 +++++++ .../implementation/MongoSyncBucketStorage.ts | 7 ++--- .../storage/PostgresBucketStorageFactory.ts | 2 +- .../src/storage/PostgresSyncRulesStorage.ts | 7 ++++- .../src/storage/batch/PostgresWriter.ts | 30 +++++++++++-------- .../src/replication/WalStream.ts | 2 -- .../register-data-storage-checkpoint-tests.ts | 10 +++---- .../src/storage/BucketStorageBatch.ts | 5 ++++ .../src/storage/SyncRulesBucketStorage.ts | 4 +-- 9 files changed, 49 insertions(+), 28 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 6cae94f84..1cf80cc88 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -13,6 +13,7 @@ import { ServiceError } from '@powersync/lib-services-framework'; import { + BatchedCustomWriteCheckpointOptions, BucketStorageMarkRecordUnavailable, deserializeBson, InternalOpId, @@ -440,6 +441,15 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { return result!; } + /** + * Queues the creation of a custom Write Checkpoint. This will be persisted after operations are flushed. + */ + addCustomWriteCheckpoint(checkpoint: BatchedCustomWriteCheckpointOptions): void { + for (let writer of this.subWriters) { + writer.addCustomWriteCheckpoint(checkpoint); + } + } + async flush(options?: storage.BatchBucketFlushOptions): Promise { let result: storage.FlushedResult | null = null; // One flush may be split over multiple transactions. diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index a225f1115..69fe38e2c 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -35,8 +35,7 @@ import { MongoBucketStorage } from '../MongoBucketStorage.js'; import { MongoPersistedSyncRules } from '../storage-index.js'; import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; import { PowerSyncMongo } from './db.js'; -import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey, SourceTableDocument } from './models.js'; -import { MongoBucketBatch } from './MongoBucketBatch.js'; +import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey } from './models.js'; import { MongoChecksumOptions, MongoChecksums } from './MongoChecksums.js'; import { MongoCompactor } from './MongoCompactor.js'; import { MongoParameterCompactor } from './MongoParameterCompactor.js'; @@ -169,9 +168,9 @@ export class MongoSyncBucketStorage }); } - async createWriter(options: storage.StartBatchOptions): Promise { + async createWriter(options: storage.StartBatchOptions): Promise { const writer = await this.factory.createCombinedWriter([this], options); - return writer.subWriters[0]; + return writer; } async getParameterSets( diff --git a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts index 8ba87be60..e49826944 100644 --- a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts +++ b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts @@ -60,7 +60,7 @@ export class PostgresBucketStorageFactory }); for (let storage of storages) { - const bucketBatch = (await storage.createWriter(options)) as PostgresBucketBatch; + const bucketBatch = await (storage as PostgresSyncRulesStorage).createBucketBatch(options); writer.addSubWriter(bucketBatch); } diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index b965a421b..84d77663c 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -4,6 +4,7 @@ import { BucketChecksum, BucketChecksumRequest, BucketDataRequest, + BucketDataWriter, CHECKPOINT_INVALIDATE_ALL, CheckpointChanges, GetCheckpointChangesOptions, @@ -341,7 +342,11 @@ export class PostgresSyncRulesStorage }); } - async createWriter(options: storage.StartBatchOptions): Promise { + async createWriter(options: storage.StartBatchOptions): Promise { + return await this.factory.createCombinedWriter([this], options); + } + + async createBucketBatch(options: storage.StartBatchOptions): Promise { const syncRules = await this.db.sql` SELECT last_checkpoint_lsn, diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts index 3cbe8d153..41137a29f 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts @@ -1,10 +1,14 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; -import { Logger, ReplicationAssertionError, logger as defaultLogger } from '@powersync/lib-services-framework'; -import { BucketStorageMarkRecordUnavailable, maxLsn, storage } from '@powersync/service-core'; +import { Logger, ReplicationAssertionError } from '@powersync/lib-services-framework'; +import { + BatchedCustomWriteCheckpointOptions, + BucketStorageMarkRecordUnavailable, + maxLsn, + storage +} from '@powersync/service-core'; import { RowProcessor } from '@powersync/service-sync-rules'; -import { OperationBatch } from './OperationBatch.js'; -import { PostgresBucketBatch } from './PostgresBucketBatch.js'; import { models } from '../../types/types.js'; +import { PostgresBucketBatch } from './PostgresBucketBatch.js'; import { postgresTableId } from './PostgresPersistedBatch.js'; export interface PostgresWriterOptions { @@ -17,16 +21,11 @@ export interface PostgresWriterOptions { } export class PostgresWriter implements storage.BucketDataWriter { - private batch: OperationBatch | null = null; public readonly rowProcessor: RowProcessor; write_checkpoint_batch: storage.CustomWriteCheckpointOptions[] = []; protected db: lib_postgres.DatabaseClient; - private readonly logger: Logger; - private readonly storeCurrentData: boolean; - private readonly skipExistingRows: boolean; - private markRecordUnavailable: BucketStorageMarkRecordUnavailable | undefined; public subWriters: PostgresBucketBatch[] = []; private sourceTableMap = new WeakMap(); @@ -34,10 +33,6 @@ export class PostgresWriter implements storage.BucketDataWriter { constructor(options: PostgresWriterOptions) { this.db = options.db; this.rowProcessor = options.rowProcessor; - this.storeCurrentData = options.storeCurrentData; - this.skipExistingRows = options.skipExistingRows; - this.logger = options.logger ?? defaultLogger; - this.markRecordUnavailable = options.markRecordUnavailable; } addSubWriter(subWriter: PostgresBucketBatch) { @@ -225,6 +220,15 @@ export class PostgresWriter implements storage.BucketDataWriter { return updatedTable; } + /** + * Queues the creation of a custom Write Checkpoint. This will be persisted after operations are flushed. + */ + addCustomWriteCheckpoint(checkpoint: BatchedCustomWriteCheckpointOptions): void { + for (let writer of this.subWriters) { + writer.addCustomWriteCheckpoint(checkpoint); + } + } + async [Symbol.asyncDispose]() { for (let writer of this.subWriters) { await writer[Symbol.asyncDispose](); diff --git a/modules/module-postgres/src/replication/WalStream.ts b/modules/module-postgres/src/replication/WalStream.ts index 9998d06bb..be7ee49e7 100644 --- a/modules/module-postgres/src/replication/WalStream.ts +++ b/modules/module-postgres/src/replication/WalStream.ts @@ -8,10 +8,8 @@ import { } from '@powersync/lib-services-framework'; import { BucketDataWriter, - BucketStorageBatch, getUuidReplicaIdentityBson, MetricsEngine, - RelationCache, SaveUpdate, SourceEntityDescriptor, SourceTable, diff --git a/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts index 548dd435b..7251a4a0d 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts @@ -144,7 +144,7 @@ bucket_definitions: user_id: 'user1' }); await writer.flush(); - await writer.keepalive('5/0'); + await writer.keepaliveAll('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -188,7 +188,7 @@ bucket_definitions: user_id: 'user1' }); await writer.flush(); - await writer.keepalive('5/0'); + await writer.keepaliveAll('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -224,7 +224,7 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await writer.keepalive('5/0'); + await writer.keepaliveAll('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -242,7 +242,7 @@ bucket_definitions: user_id: 'user1' }); await writer.flush(); - await writer.keepalive('6/0'); + await writer.keepaliveAll('6/0'); let result2 = await iter.next(); expect(result2).toMatchObject({ @@ -261,7 +261,7 @@ bucket_definitions: user_id: 'user1' }); await writer.flush(); - await writer.keepalive('7/0'); + await writer.keepaliveAll('7/0'); let result3 = await iter.next(); expect(result3).toMatchObject({ diff --git a/packages/service-core/src/storage/BucketStorageBatch.ts b/packages/service-core/src/storage/BucketStorageBatch.ts index 73528e12f..c13b814af 100644 --- a/packages/service-core/src/storage/BucketStorageBatch.ts +++ b/packages/service-core/src/storage/BucketStorageBatch.ts @@ -68,6 +68,11 @@ export interface BucketDataWriterBase { markAllSnapshotDone(no_checkpoint_before_lsn: string): Promise; updateTableProgress(table: SourceTable, progress: Partial): Promise; + + /** + * Queues the creation of a custom Write Checkpoint. This will be persisted after operations are flushed. + */ + addCustomWriteCheckpoint(checkpoint: BatchedCustomWriteCheckpointOptions): void; } export interface BucketStorageBatch diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index fc9423218..cffbf5b44 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -7,7 +7,7 @@ import { TablePattern } from '@powersync/service-sync-rules'; import * as util from '../util/util-index.js'; -import { BucketStorageBatch, FlushedResult, SaveUpdate } from './BucketStorageBatch.js'; +import { BucketDataWriter, BucketStorageBatch, FlushedResult, SaveUpdate } from './BucketStorageBatch.js'; import { BucketStorageFactory } from './BucketStorageFactory.js'; import { ParseSyncRulesOptions, PersistedSyncRules } from './PersistedSyncRulesContent.js'; import { SourceEntityDescriptor } from './SourceEntity.js'; @@ -34,7 +34,7 @@ export interface SyncRulesBucketStorage * * The writer must be flushed and disposed when done. */ - createWriter(options: StartBatchOptions): Promise; + createWriter(options: StartBatchOptions): Promise; getHydratedSyncRules(options: ParseSyncRulesOptions): HydratedSyncRules; From 50b4b5b2299273460c1274201e72bb20b63ba6ae Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 22 Jan 2026 10:48:22 +0200 Subject: [PATCH 069/101] Simplify createWriter usage. --- .../src/storage/MongoBucketStorage.ts | 4 +- .../implementation/MongoSyncBucketStorage.ts | 5 +-- .../test/src/storage_compacting.test.ts | 2 +- .../test/src/storage_sync.test.ts | 2 +- .../module-mssql/src/replication/CDCStream.ts | 6 +-- .../src/replication/BinLogStream.ts | 6 +-- .../storage/PostgresBucketStorageFactory.ts | 2 +- .../src/storage/PostgresSyncRulesStorage.ts | 4 +- .../test/src/storage.test.ts | 2 +- .../src/replication/PostgresSnapshotter.ts | 6 +-- .../src/replication/WalStream.ts | 2 +- .../src/test-utils/general-utils.ts | 2 +- .../src/tests/register-compacting-tests.ts | 12 ++--- .../register-data-storage-checkpoint-tests.ts | 4 +- .../tests/register-data-storage-data-tests.ts | 44 +++++++++---------- .../register-data-storage-parameter-tests.ts | 18 ++++---- .../register-parameter-compacting-tests.ts | 4 +- .../src/tests/register-sync-tests.ts | 30 ++++++------- .../src/storage/BucketStorageFactory.ts | 40 +++++++++++++++-- .../src/storage/SyncRulesBucketStorage.ts | 36 +-------------- 20 files changed, 115 insertions(+), 116 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts index 5587f1bdb..187b2e7d1 100644 --- a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts @@ -1,6 +1,6 @@ import { SqlSyncRules } from '@powersync/service-sync-rules'; -import { GetIntanceOptions, maxLsn, StartBatchOptions, storage } from '@powersync/service-core'; +import { GetIntanceOptions, maxLsn, CreateWriterOptions, storage } from '@powersync/service-core'; import { BaseObserver, ErrorCode, logger, ServiceError } from '@powersync/lib-services-framework'; import { v4 as uuid } from 'uuid'; @@ -76,7 +76,7 @@ export class MongoBucketStorage async createCombinedWriter( storages: storage.SyncRulesBucketStorage[], - options: StartBatchOptions + options: CreateWriterOptions ): Promise { const mongoStorages = storages as MongoSyncBucketStorage[]; const mappings = mongoStorages.map((s) => s.sync_rules.mapping); diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 69fe38e2c..4501e0e2d 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -168,9 +168,8 @@ export class MongoSyncBucketStorage }); } - async createWriter(options: storage.StartBatchOptions): Promise { - const writer = await this.factory.createCombinedWriter([this], options); - return writer; + async createWriter(options: storage.CreateWriterOptions): Promise { + return await this.factory.createCombinedWriter([this], options); } async getParameterSets( diff --git a/modules/module-mongodb-storage/test/src/storage_compacting.test.ts b/modules/module-mongodb-storage/test/src/storage_compacting.test.ts index 908826034..b4cbad28f 100644 --- a/modules/module-mongodb-storage/test/src/storage_compacting.test.ts +++ b/modules/module-mongodb-storage/test/src/storage_compacting.test.ts @@ -9,7 +9,7 @@ describe('Mongo Sync Bucket Storage Compact', () => { describe('with blank bucket_state', () => { // This can happen when migrating from older service versions, that did not populate bucket_state yet. const populate = async (bucketStorage: SyncRulesBucketStorage, sourceTableIndex: number) => { - await using writer = await bucketStorage.factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const sourceTable = await test_utils.resolveTestTable( writer, diff --git a/modules/module-mongodb-storage/test/src/storage_sync.test.ts b/modules/module-mongodb-storage/test/src/storage_sync.test.ts index 02037c6f4..178aacb3e 100644 --- a/modules/module-mongodb-storage/test/src/storage_sync.test.ts +++ b/modules/module-mongodb-storage/test/src/storage_sync.test.ts @@ -22,7 +22,7 @@ describe('sync - mongodb', () => { ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], INITIALIZED_MONGO_STORAGE_FACTORY); diff --git a/modules/module-mssql/src/replication/CDCStream.ts b/modules/module-mssql/src/replication/CDCStream.ts index c0648f2b0..91034be81 100644 --- a/modules/module-mssql/src/replication/CDCStream.ts +++ b/modules/module-mssql/src/replication/CDCStream.ts @@ -167,7 +167,7 @@ export class CDCStream { async populateTableCache() { const sourceTables = this.syncRules.getSourceTables(); - await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + await using writer = await this.storage.createWriter({ logger: this.logger, zeroLSN: LSN.ZERO, defaultSchema: this.defaultSchema, @@ -465,7 +465,7 @@ export class CDCStream { await this.storage.clear({ signal: this.abortSignal }); } - await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + await using writer = await await this.storage.createWriter({ logger: this.logger, zeroLSN: LSN.ZERO, defaultSchema: this.defaultSchema, @@ -556,7 +556,7 @@ export class CDCStream { } async streamChanges() { - await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + await using writer = await this.storage.createWriter({ logger: this.logger, zeroLSN: LSN.ZERO, defaultSchema: this.defaultSchema, diff --git a/modules/module-mysql/src/replication/BinLogStream.ts b/modules/module-mysql/src/replication/BinLogStream.ts index 2490ba1d3..2e7e9d05b 100644 --- a/modules/module-mysql/src/replication/BinLogStream.ts +++ b/modules/module-mysql/src/replication/BinLogStream.ts @@ -296,7 +296,7 @@ export class BinLogStream { await promiseConnection.query(`SET time_zone = '+00:00'`); const sourceTables = this.syncRules.getSourceTables(); - await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + await using writer = await this.storage.createWriter({ logger: this.logger, zeroLSN: common.ReplicatedGTID.ZERO.comparable, defaultSchema: this.defaultSchema, @@ -409,7 +409,7 @@ export class BinLogStream { // We need to find the existing tables, to populate our table cache. // This is needed for includeSchema to work correctly. const sourceTables = this.syncRules.getSourceTables(); - await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + await using writer = await this.storage.createWriter({ logger: this.logger, zeroLSN: common.ReplicatedGTID.ZERO.comparable, defaultSchema: this.defaultSchema, @@ -445,7 +445,7 @@ export class BinLogStream { connection.release(); if (!this.stopped) { - await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + await using writer = await this.storage.createWriter({ zeroLSN: common.ReplicatedGTID.ZERO.comparable, defaultSchema: this.defaultSchema, storeCurrentData: true diff --git a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts index e49826944..2440b1f65 100644 --- a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts +++ b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts @@ -46,7 +46,7 @@ export class PostgresBucketStorageFactory async createCombinedWriter( storages: SyncRulesBucketStorage[], - options: storage.StartBatchOptions + options: storage.CreateWriterOptions ): Promise { const syncRules = storages.map((s) => s.getHydratedSyncRules(options)); diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index 84d77663c..86aef4c86 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -342,11 +342,11 @@ export class PostgresSyncRulesStorage }); } - async createWriter(options: storage.StartBatchOptions): Promise { + async createWriter(options: storage.CreateWriterOptions): Promise { return await this.factory.createCombinedWriter([this], options); } - async createBucketBatch(options: storage.StartBatchOptions): Promise { + async createBucketBatch(options: storage.CreateWriterOptions): Promise { const syncRules = await this.db.sql` SELECT last_checkpoint_lsn, diff --git a/modules/module-postgres-storage/test/src/storage.test.ts b/modules/module-postgres-storage/test/src/storage.test.ts index 2d4f43944..ca7cf7e2d 100644 --- a/modules/module-postgres-storage/test/src/storage.test.ts +++ b/modules/module-postgres-storage/test/src/storage.test.ts @@ -34,7 +34,7 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], POSTGRES_STORAGE_FACTORY); const largeDescription = '0123456789'.repeat(2_000_00); diff --git a/modules/module-postgres/src/replication/PostgresSnapshotter.ts b/modules/module-postgres/src/replication/PostgresSnapshotter.ts index c8b6bc7b3..2c583dea3 100644 --- a/modules/module-postgres/src/replication/PostgresSnapshotter.ts +++ b/modules/module-postgres/src/replication/PostgresSnapshotter.ts @@ -314,7 +314,7 @@ export class PostgresSnapshotter { async replicateTable(requestTable: SourceTable) { const db = await this.connections.snapshotConnection(); await using _ = { [Symbol.asyncDispose]: () => db.end() }; - await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + await using writer = await this.storage.createWriter({ logger: this.logger, zeroLSN: ZERO_LSN, defaultSchema: POSTGRES_DEFAULT_SCHEMA, @@ -380,7 +380,7 @@ export class PostgresSnapshotter { const db = await this.connections.snapshotConnection(); await using _ = { [Symbol.asyncDispose]: () => db.end() }; - await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + await using writer = await this.storage.createWriter({ logger: this.logger, zeroLSN: ZERO_LSN, defaultSchema: POSTGRES_DEFAULT_SCHEMA, @@ -423,7 +423,7 @@ export class PostgresSnapshotter { async queueSnapshotTables(db: pgwire.PgConnection) { const sourceTables = this.sync_rules.getSourceTables(); - await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + await using writer = await this.storage.createWriter({ logger: this.logger, zeroLSN: ZERO_LSN, defaultSchema: POSTGRES_DEFAULT_SCHEMA, diff --git a/modules/module-postgres/src/replication/WalStream.ts b/modules/module-postgres/src/replication/WalStream.ts index be7ee49e7..2eacbb99c 100644 --- a/modules/module-postgres/src/replication/WalStream.ts +++ b/modules/module-postgres/src/replication/WalStream.ts @@ -496,7 +496,7 @@ export class WalStream { }); }; - await using writer = await this.storage.factory.createCombinedWriter([this.storage], { + await using writer = await this.storage.createWriter({ logger: this.logger, zeroLSN: ZERO_LSN, defaultSchema: POSTGRES_DEFAULT_SCHEMA, diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index 2f72a58fc..26ee99da9 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -15,7 +15,7 @@ export const PARSE_OPTIONS: storage.ParseSyncRulesOptions = { defaultSchema: 'public' }; -export const BATCH_OPTIONS: storage.StartBatchOptions = { +export const BATCH_OPTIONS: storage.CreateWriterOptions = { ...PARSE_OPTIONS, zeroLSN: ZERO_LSN, storeCurrentData: true diff --git a/packages/service-core-tests/src/tests/register-compacting-tests.ts b/packages/service-core-tests/src/tests/register-compacting-tests.ts index 4b6f7b5e1..5e6638437 100644 --- a/packages/service-core-tests/src/tests/register-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-compacting-tests.ts @@ -16,7 +16,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); const request = bucketRequest(syncRules); @@ -122,7 +122,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -237,7 +237,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -324,7 +324,7 @@ bucket_definitions: - select * from test where b = bucket.b` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -445,7 +445,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -515,7 +515,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); const request = bucketRequest(syncRules); diff --git a/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts index 7251a4a0d..4d3da1d6a 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts @@ -33,7 +33,7 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); await writer.markAllSnapshotDone('1/1'); const writeCheckpoint = await bucketStorage.createManagedWriteCheckpoint({ @@ -68,7 +68,7 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(r.persisted_sync_rules!); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); await writer.markAllSnapshotDone('1/1'); const abortController = new AbortController(); diff --git a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts index 0b625fc43..d5c646da4 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts @@ -37,7 +37,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -99,7 +99,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -161,7 +161,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -226,7 +226,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -284,7 +284,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -349,7 +349,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -421,7 +421,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -549,7 +549,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); // Pre-setup @@ -704,7 +704,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id', 'description'], config); // Pre-setup @@ -811,7 +811,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id', 'description'], config); // Pre-setup @@ -908,7 +908,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -1015,7 +1015,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -1093,7 +1093,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -1240,7 +1240,7 @@ bucket_definitions: const r = await f.configureSyncRules({ content: 'bucket_definitions: {}', validate: false }); const storage = f.getInstance(r.persisted_sync_rules!); - await using writer = await f.createCombinedWriter([storage], test_utils.BATCH_OPTIONS); + await using writer = await storage.createWriter(test_utils.BATCH_OPTIONS); await writer.markAllSnapshotDone('1/0'); await writer.keepaliveAll('1/0'); @@ -1265,7 +1265,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config, 1); const sourceTableIgnore = await test_utils.resolveTestTable(writer, 'test_ignore', ['id'], config, 2); @@ -1310,7 +1310,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -1346,7 +1346,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); await writer.markAllSnapshotDone('1/1'); await writer.commitAll('1/1'); @@ -1379,8 +1379,8 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer1 = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); - await using writer2 = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer1 = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + await using writer2 = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer2, 'test', ['id'], config); // We simulate two concurrent batches, but sequential calls are enough for this test. @@ -1424,11 +1424,11 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using snapshotWriter = await factory.createCombinedWriter([bucketStorage], { + await using snapshotWriter = await bucketStorage.createWriter({ ...test_utils.BATCH_OPTIONS, skipExistingRows: true }); - await using streamingWriter = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using streamingWriter = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const snapshotTable = await test_utils.resolveTestTable(snapshotWriter, 'test', ['id'], config, 1); const streamingTable = await test_utils.resolveTestTable(streamingWriter, 'test', ['id'], config, 1); @@ -1488,7 +1488,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); diff --git a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts index ae3944ca8..3acd35b86 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts @@ -30,7 +30,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); const MYBUCKET_1: ParameterLookupScope = { lookupName: '20002', queryId: '', source: null as any }; @@ -89,7 +89,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -147,7 +147,7 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const table = await test_utils.resolveTestTable(writer, 'todos', ['id', 'list_id'], config); await writer.markAllSnapshotDone('1/1'); @@ -217,7 +217,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -268,7 +268,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -322,7 +322,7 @@ bucket_definitions: }); const sync_rules = syncRules.parsed(test_utils.PARSE_OPTIONS).hydratedSyncRules(); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const workspaceTable = await test_utils.resolveTestTable(writer, 'workspace', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -374,7 +374,7 @@ bucket_definitions: }); const sync_rules = syncRules.parsed(test_utils.PARSE_OPTIONS).hydratedSyncRules(); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const workspaceTable = await test_utils.resolveTestTable(writer, 'workspace', undefined, config); await writer.markAllSnapshotDone('1/1'); @@ -463,7 +463,7 @@ bucket_definitions: }); const sync_rules = syncRules.parsed(test_utils.PARSE_OPTIONS).hydratedSyncRules(); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const workspaceTable = await test_utils.resolveTestTable(writer, 'workspace', undefined, config); await writer.markAllSnapshotDone('1/1'); @@ -552,7 +552,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); diff --git a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts index c8ab8dc22..bf76d2e67 100644 --- a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts @@ -17,7 +17,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -99,7 +99,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); - await using writer = await factory.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); diff --git a/packages/service-core-tests/src/tests/register-sync-tests.ts b/packages/service-core-tests/src/tests/register-sync-tests.ts index 9a5ac7f07..eaca6aa48 100644 --- a/packages/service-core-tests/src/tests/register-sync-tests.ts +++ b/packages/service-core-tests/src/tests/register-sync-tests.ts @@ -53,7 +53,7 @@ export function registerSyncTests(config: storage.TestStorageConfig) { }); const bucketStorage = f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const sourceTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('0/1'); @@ -116,7 +116,7 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('0/1'); @@ -178,7 +178,7 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('0/1'); @@ -287,7 +287,7 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('0/1'); @@ -425,7 +425,7 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('0/1'); @@ -553,7 +553,7 @@ bucket_definitions: content: BASIC_SYNC_RULES }); const bucketStorage = f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('0/1'); @@ -616,7 +616,7 @@ bucket_definitions: }); const bucketStorage = await f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('0/1'); @@ -688,7 +688,7 @@ bucket_definitions: }); const bucketStorage = await f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); // Activate await writer.markAllSnapshotDone('0/0'); @@ -756,7 +756,7 @@ bucket_definitions: }); const bucketStorage = await f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const usersTable = await test_utils.resolveTestTable(writer, 'users', ['id'], config, 1); // Activate @@ -821,7 +821,7 @@ bucket_definitions: }); const bucketStorage = await f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const usersTable = await test_utils.resolveTestTable(writer, 'users', ['id'], config, 1); const listsTable = await test_utils.resolveTestTable(writer, 'lists', ['id'], config, 2); @@ -895,7 +895,7 @@ bucket_definitions: }); const bucketStorage = await f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const usersTable = await test_utils.resolveTestTable(writer, 'users', ['id'], config, 1); const listsTable = await test_utils.resolveTestTable(writer, 'lists', ['id'], config, 2); // Activate @@ -964,7 +964,7 @@ bucket_definitions: }); const bucketStorage = await f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); // Activate await writer.markAllSnapshotDone('0/0'); await writer.keepaliveAll('0/0'); @@ -1009,7 +1009,7 @@ bucket_definitions: }); const bucketStorage = await f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('0/1'); @@ -1151,7 +1151,7 @@ bucket_definitions: }); const bucketStorage = f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); await writer.markAllSnapshotDone('0/1'); // <= the managed write checkpoint LSN below @@ -1233,7 +1233,7 @@ config: content: rules[i] }); const bucketStorage = f.getInstance(syncRules); - await using writer = await f.createCombinedWriter([bucketStorage], test_utils.BATCH_OPTIONS); + await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config, i + 1); diff --git a/packages/service-core/src/storage/BucketStorageFactory.ts b/packages/service-core/src/storage/BucketStorageFactory.ts index 500466d33..c3e860086 100644 --- a/packages/service-core/src/storage/BucketStorageFactory.ts +++ b/packages/service-core/src/storage/BucketStorageFactory.ts @@ -1,10 +1,10 @@ -import { ObserverClient } from '@powersync/lib-services-framework'; +import { Logger, ObserverClient } from '@powersync/lib-services-framework'; +import { BucketDataWriter, SaveUpdate } from './BucketStorageBatch.js'; import { ParseSyncRulesOptions, PersistedSyncRules, PersistedSyncRulesContent } from './PersistedSyncRulesContent.js'; import { ReplicationEventPayload } from './ReplicationEventPayload.js'; import { ReplicationLock } from './ReplicationLock.js'; -import { StartBatchOptions, SyncRulesBucketStorage } from './SyncRulesBucketStorage.js'; import { ReportStorage } from './ReportStorage.js'; -import { BucketDataWriter } from './BucketStorageBatch.js'; +import { SyncRulesBucketStorage } from './SyncRulesBucketStorage.js'; /** * Represents a configured storage provider. @@ -27,7 +27,7 @@ export interface BucketStorageFactory extends ObserverClient; + createCombinedWriter(storage: SyncRulesBucketStorage[], options: CreateWriterOptions): Promise; /** * Deploy new sync rules. @@ -174,3 +174,35 @@ export interface TestStorageConfig { factory: TestStorageFactory; tableIdStrings: boolean; } + +export interface CreateWriterOptions extends ParseSyncRulesOptions { + zeroLSN: string; + /** + * Whether or not to store a copy of the current data. + * + * This is needed if we need to apply partial updates, for example + * when we get TOAST values from Postgres. + * + * This is not needed when we get the full document from the source + * database, for example from MongoDB. + */ + storeCurrentData: boolean; + + /** + * Set to true for initial replication. + * + * This will avoid creating new operations for rows previously replicated. + */ + skipExistingRows?: boolean; + + /** + * Callback called if we streamed an update to a record that we don't have yet. + * + * This is expected to happen in some initial replication edge cases, only if storeCurrentData = true. + */ + markRecordUnavailable?: BucketStorageMarkRecordUnavailable; + + logger?: Logger; +} + +export type BucketStorageMarkRecordUnavailable = (record: SaveUpdate) => void; diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index cffbf5b44..fa6ea5d17 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -8,7 +8,7 @@ import { } from '@powersync/service-sync-rules'; import * as util from '../util/util-index.js'; import { BucketDataWriter, BucketStorageBatch, FlushedResult, SaveUpdate } from './BucketStorageBatch.js'; -import { BucketStorageFactory } from './BucketStorageFactory.js'; +import { BucketStorageFactory, CreateWriterOptions } from './BucketStorageFactory.js'; import { ParseSyncRulesOptions, PersistedSyncRules } from './PersistedSyncRulesContent.js'; import { SourceEntityDescriptor } from './SourceEntity.js'; import { SourceTable } from './SourceTable.js'; @@ -34,7 +34,7 @@ export interface SyncRulesBucketStorage * * The writer must be flushed and disposed when done. */ - createWriter(options: StartBatchOptions): Promise; + createWriter(options: CreateWriterOptions): Promise; getHydratedSyncRules(options: ParseSyncRulesOptions): HydratedSyncRules; @@ -184,36 +184,6 @@ export interface ResolveTableResult { dropTables: SourceTable[]; } -export interface StartBatchOptions extends ParseSyncRulesOptions { - zeroLSN: string; - /** - * Whether or not to store a copy of the current data. - * - * This is needed if we need to apply partial updates, for example - * when we get TOAST values from Postgres. - * - * This is not needed when we get the full document from the source - * database, for example from MongoDB. - */ - storeCurrentData: boolean; - - /** - * Set to true for initial replication. - * - * This will avoid creating new operations for rows previously replicated. - */ - skipExistingRows?: boolean; - - /** - * Callback called if we streamed an update to a record that we don't have yet. - * - * This is expected to happen in some initial replication edge cases, only if storeCurrentData = true. - */ - markRecordUnavailable?: BucketStorageMarkRecordUnavailable; - - logger?: Logger; -} - export interface CompactOptions { /** * Heap memory limit for the compact process. @@ -365,5 +335,3 @@ export const CHECKPOINT_INVALIDATE_ALL: CheckpointChanges = { updatedParameterLookups: new Set(), invalidateParameterBuckets: true }; - -export type BucketStorageMarkRecordUnavailable = (record: SaveUpdate) => void; From fdd7668f60fc1d6ea6de8311ff132889e57246f6 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 22 Jan 2026 10:50:01 +0200 Subject: [PATCH 070/101] Rename methods. --- .../implementation/MongoBucketBatch.ts | 6 +-- .../test/src/storage_compacting.test.ts | 2 +- .../src/replication/ChangeStream.ts | 6 +-- .../src/replication/MongoSnapshotter.ts | 4 +- .../test/src/change_stream_utils.ts | 4 +- .../module-mssql/src/replication/CDCStream.ts | 6 +-- .../src/replication/BinLogStream.ts | 6 +-- .../src/storage/batch/PostgresWriter.ts | 6 +-- .../src/replication/PostgresSnapshotter.ts | 2 +- .../src/replication/WalStream.ts | 4 +- .../src/tests/register-compacting-tests.ts | 18 +++---- .../register-data-storage-checkpoint-tests.ts | 16 +++--- .../tests/register-data-storage-data-tests.ts | 54 +++++++++---------- .../register-data-storage-parameter-tests.ts | 20 +++---- .../register-parameter-compacting-tests.ts | 10 ++-- .../src/tests/register-sync-tests.ts | 54 +++++++++---------- .../src/storage/BucketStorageBatch.ts | 17 ++++-- 17 files changed, 123 insertions(+), 112 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 1cf80cc88..81458898a 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -148,7 +148,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { return lsn; } - async keepaliveAll(lsn: string): Promise { + async keepalive(lsn: string): Promise { let didAny = false; for (let batch of this.subWriters) { const didBatchKeepalive = await batch.keepalive(lsn); @@ -157,7 +157,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { return didAny; } - async commitAll(lsn: string, options?: storage.BucketBatchCommitOptions): Promise { + async commit(lsn: string, options?: storage.BucketBatchCommitOptions): Promise { let didCommit = false; for (let batch of this.subWriters) { const didWriterCommit = await batch.commit(lsn, options); @@ -166,7 +166,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { return didCommit; } - async setAllResumeLsn(lsn: string): Promise { + async setResumeLsn(lsn: string): Promise { for (let batch of this.subWriters) { await batch.setResumeLsn(lsn); } diff --git a/modules/module-mongodb-storage/test/src/storage_compacting.test.ts b/modules/module-mongodb-storage/test/src/storage_compacting.test.ts index b4cbad28f..9af36079e 100644 --- a/modules/module-mongodb-storage/test/src/storage_compacting.test.ts +++ b/modules/module-mongodb-storage/test/src/storage_compacting.test.ts @@ -40,7 +40,7 @@ describe('Mongo Sync Bucket Storage Compact', () => { afterReplicaId: test_utils.rid('t2') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); return bucketStorage.getCheckpoint(); }; diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 0b5275416..3a01e7f4e 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -650,7 +650,7 @@ export class ChangeStream { // doing a keepalive in the middle of a transaction. if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) { const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken); - await writer.keepaliveAll(lsn); + await writer.keepalive(lsn); this.touch(); lastEmptyResume = performance.now(); // Log the token update. This helps as a general "replication is still active" message in the logs. @@ -780,7 +780,7 @@ export class ChangeStream { if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) { waitForCheckpointLsn = null; } - const didCommit = await writer.commitAll(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); + const didCommit = await writer.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); if (didCommit) { this.oldestUncommittedChange = null; @@ -826,7 +826,7 @@ export class ChangeStream { resume_token: changeDocument._id }); this.logger.info(`Updating resume LSN to ${lsn} after ${changesSinceLastCheckpoint} changes`); - await writer.setAllResumeLsn(lsn); + await writer.setResumeLsn(lsn); changesSinceLastCheckpoint = 0; } } diff --git a/modules/module-mongodb/src/replication/MongoSnapshotter.ts b/modules/module-mongodb/src/replication/MongoSnapshotter.ts index 0b6b75372..8ae7fb89b 100644 --- a/modules/module-mongodb/src/replication/MongoSnapshotter.ts +++ b/modules/module-mongodb/src/replication/MongoSnapshotter.ts @@ -98,7 +98,7 @@ export class MongoSnapshotter { // First replication attempt - get a snapshot and store the timestamp snapshotLsn = await this.getSnapshotLsn(writer); // FIXME: check the logic for resumeLSN. - await writer.setAllResumeLsn(snapshotLsn); + await writer.setResumeLsn(snapshotLsn); this.logger.info(`Marking snapshot at ${snapshotLsn}`); } else { this.logger.info(`Resuming snapshot at ${snapshotLsn}`); @@ -216,7 +216,7 @@ export class MongoSnapshotter { const resumeLsn = writer.resumeFromLsn ?? MongoLSN.ZERO.comparable; // FIXME: Only commit on relevant syncRules? - await writer.commitAll(resumeLsn); + await writer.commit(resumeLsn); // FIXME: check this // if (flushResults?.flushed_op != null) { diff --git a/modules/module-mongodb/test/src/change_stream_utils.ts b/modules/module-mongodb/test/src/change_stream_utils.ts index 1920bd993..20a04636e 100644 --- a/modules/module-mongodb/test/src/change_stream_utils.ts +++ b/modules/module-mongodb/test/src/change_stream_utils.ts @@ -165,8 +165,8 @@ export class ChangeStreamTestContext { */ async markSnapshotConsistent() { const checkpoint = await createCheckpoint(this.client, this.db, STANDALONE_CHECKPOINT_ID); - await using writer = await this.storage!.factory.createCombinedWriter([this.storage!], test_utils.BATCH_OPTIONS); - await writer.keepaliveAll(checkpoint); + await using writer = await this.storage!.createWriter(test_utils.BATCH_OPTIONS); + await writer.keepalive(checkpoint); } async getCheckpoint(options?: { timeout?: number }) { diff --git a/modules/module-mssql/src/replication/CDCStream.ts b/modules/module-mssql/src/replication/CDCStream.ts index 91034be81..27a44f8fd 100644 --- a/modules/module-mssql/src/replication/CDCStream.ts +++ b/modules/module-mssql/src/replication/CDCStream.ts @@ -475,7 +475,7 @@ export class CDCStream { if (snapshotLSN == null) { // First replication attempt - set the snapshot LSN to the current LSN before starting snapshotLSN = (await getLatestReplicatedLSN(this.connections)).toString(); - await writer.setAllResumeLsn(snapshotLSN); + await writer.setResumeLsn(snapshotLSN); const latestLSN = (await getLatestLSN(this.connections)).toString(); this.logger.info(`Marking snapshot at ${snapshotLSN}, Latest DB LSN ${latestLSN}.`); } else { @@ -508,7 +508,7 @@ export class CDCStream { // Actual checkpoint will be created when streaming replication caught up. const postSnapshotLSN = await getLatestLSN(this.connections); await writer.markAllSnapshotDone(postSnapshotLSN.toString()); - await writer.commitAll(snapshotLSN); + await writer.commit(snapshotLSN); this.logger.info(`Snapshot done. Need to replicate from ${snapshotLSN} to ${postSnapshotLSN} to be consistent`); } @@ -634,7 +634,7 @@ export class CDCStream { this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1); }, onCommit: async (lsn: string, transactionCount: number) => { - await writer.commitAll(lsn); + await writer.commit(lsn); this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(transactionCount); this.isStartingReplication = false; }, diff --git a/modules/module-mysql/src/replication/BinLogStream.ts b/modules/module-mysql/src/replication/BinLogStream.ts index 2e7e9d05b..76d17ccfa 100644 --- a/modules/module-mysql/src/replication/BinLogStream.ts +++ b/modules/module-mysql/src/replication/BinLogStream.ts @@ -313,7 +313,7 @@ export class BinLogStream { const snapshotDoneGtid = await common.readExecutedGtid(promiseConnection); await writer.markAllSnapshotDone(snapshotDoneGtid.comparable); const flushResults = await writer.flush(); - await writer.commitAll(headGTID.comparable); + await writer.commit(headGTID.comparable); lastOp = flushResults?.flushed_op ?? null; this.logger.info(`Initial replication done`); @@ -500,14 +500,14 @@ export class BinLogStream { }); }, onKeepAlive: async (lsn: string) => { - const didCommit = await writer.keepaliveAll(lsn); + const didCommit = await writer.keepalive(lsn); if (didCommit) { this.oldestUncommittedChange = null; } }, onCommit: async (lsn: string) => { this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1); - const didCommit = await writer.commitAll(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); + const didCommit = await writer.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange }); if (didCommit) { this.oldestUncommittedChange = null; this.isStartingReplication = false; diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts index 41137a29f..c8f393baf 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts @@ -49,7 +49,7 @@ export class PostgresWriter implements storage.BucketDataWriter { return lsn; } - async keepaliveAll(lsn: string): Promise { + async keepalive(lsn: string): Promise { let didAny = false; for (let batch of this.subWriters) { const didBatchKeepalive = await batch.keepalive(lsn); @@ -58,7 +58,7 @@ export class PostgresWriter implements storage.BucketDataWriter { return didAny; } - async commitAll(lsn: string, options?: storage.BucketBatchCommitOptions): Promise { + async commit(lsn: string, options?: storage.BucketBatchCommitOptions): Promise { let didCommit = false; for (let batch of this.subWriters) { const didWriterCommit = await batch.commit(lsn, options); @@ -67,7 +67,7 @@ export class PostgresWriter implements storage.BucketDataWriter { return didCommit; } - async setAllResumeLsn(lsn: string): Promise { + async setResumeLsn(lsn: string): Promise { for (let batch of this.subWriters) { await batch.setResumeLsn(lsn); } diff --git a/modules/module-postgres/src/replication/PostgresSnapshotter.ts b/modules/module-postgres/src/replication/PostgresSnapshotter.ts index 2c583dea3..509f979a6 100644 --- a/modules/module-postgres/src/replication/PostgresSnapshotter.ts +++ b/modules/module-postgres/src/replication/PostgresSnapshotter.ts @@ -334,7 +334,7 @@ export class PostgresSnapshotter { // This commit ensures we set keepalive_op. // It may be better if that is automatically set when flushing. const flushResults = await writer.flush(); - await writer.commitAll(ZERO_LSN); + await writer.commit(ZERO_LSN); this.logger.info(`Flushed snapshot at ${flushResults?.flushed_op}`); } diff --git a/modules/module-postgres/src/replication/WalStream.ts b/modules/module-postgres/src/replication/WalStream.ts index 2eacbb99c..61565cefb 100644 --- a/modules/module-postgres/src/replication/WalStream.ts +++ b/modules/module-postgres/src/replication/WalStream.ts @@ -562,7 +562,7 @@ export class WalStream { await this.resnapshot(writer, resnapshot); resnapshot = []; } - const didCommit = await writer.commitAll(msg.lsn!, { + const didCommit = await writer.commit(msg.lsn!, { createEmptyCheckpoints, oldestUncommittedChange: this.oldestUncommittedChange }); @@ -610,7 +610,7 @@ export class WalStream { // may be in the middle of the next transaction. // It must only be used to associate checkpoints with LSNs. - const didCommit = await writer.keepaliveAll(chunkLastLsn); + const didCommit = await writer.keepalive(chunkLastLsn); if (didCommit) { this.oldestUncommittedChange = null; } diff --git a/packages/service-core-tests/src/tests/register-compacting-tests.ts b/packages/service-core-tests/src/tests/register-compacting-tests.ts index 5e6638437..6c3410c7f 100644 --- a/packages/service-core-tests/src/tests/register-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-compacting-tests.ts @@ -49,7 +49,7 @@ bucket_definitions: }); const result = await writer.flush(); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const checkpoint = result!.flushed_op; const request2 = bucketRequest(syncRules); @@ -163,7 +163,7 @@ bucket_definitions: }); const result = await writer.flush(); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const checkpoint = result!.flushed_op; const request = bucketRequest(syncRules); @@ -269,7 +269,7 @@ bucket_definitions: }); const result = await writer.flush(); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const checkpoint1 = result!.flushed_op; const request = bucketRequest(syncRules); @@ -284,7 +284,7 @@ bucket_definitions: beforeReplicaId: 't2' }); const result2 = await writer.flush(); - await writer.commitAll('2/1'); + await writer.commit('2/1'); const checkpoint2 = result2!.flushed_op; await bucketStorage.compact({ @@ -391,7 +391,7 @@ bucket_definitions: afterReplicaId: test_utils.rid('t2') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); } const checkpoint = (await bucketStorage.getCheckpoint()).checkpoint; @@ -476,7 +476,7 @@ bucket_definitions: beforeReplicaId: 't1' }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); await bucketStorage.compact({ clearBatchLimit: 2, @@ -494,7 +494,7 @@ bucket_definitions: beforeReplicaId: 't2' }); const result2 = await writer.flush(); - await writer.commitAll('2/1'); + await writer.commit('2/1'); const checkpoint2 = result2!.flushed_op; await bucketStorage.clearChecksumCache(); const request = bucketRequest(syncRules); @@ -539,7 +539,7 @@ bucket_definitions: }); const result = await writer.flush(); - await writer.commitAll('1/1'); + await writer.commit('1/1'); // Get checksums here just to populate the cache await bucketStorage.getChecksums(result!.flushed_op, [request]); @@ -552,7 +552,7 @@ bucket_definitions: beforeReplicaId: 't1' }); const result2 = await writer.flush(); - await writer.commitAll('2/1'); + await writer.commit('2/1'); await bucketStorage.compact({ clearBatchLimit: 20, diff --git a/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts index 4d3da1d6a..c991b99f2 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts @@ -41,7 +41,7 @@ bucket_definitions: user_id: 'user1' }); - await writer.keepaliveAll('5/0'); + await writer.keepalive('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -77,7 +77,7 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await writer.keepaliveAll('5/0'); + await writer.keepalive('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -98,7 +98,7 @@ bucket_definitions: // We have to trigger a new keepalive after the checkpoint, at least to cover postgres storage. // This is what is effetively triggered with RouteAPI.createReplicationHead(). // MongoDB storage doesn't explicitly need this anymore. - await writer.keepaliveAll('6/0'); + await writer.keepalive('6/0'); let result2 = await iter.next(); if (result2.value?.base?.lsn == '5/0') { @@ -144,7 +144,7 @@ bucket_definitions: user_id: 'user1' }); await writer.flush(); - await writer.keepaliveAll('5/0'); + await writer.keepalive('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -188,7 +188,7 @@ bucket_definitions: user_id: 'user1' }); await writer.flush(); - await writer.keepaliveAll('5/0'); + await writer.keepalive('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -224,7 +224,7 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); - await writer.keepaliveAll('5/0'); + await writer.keepalive('5/0'); const result = await iter.next(); expect(result).toMatchObject({ @@ -242,7 +242,7 @@ bucket_definitions: user_id: 'user1' }); await writer.flush(); - await writer.keepaliveAll('6/0'); + await writer.keepalive('6/0'); let result2 = await iter.next(); expect(result2).toMatchObject({ @@ -261,7 +261,7 @@ bucket_definitions: user_id: 'user1' }); await writer.flush(); - await writer.keepaliveAll('7/0'); + await writer.keepalive('7/0'); let result3 = await iter.next(); expect(result3).toMatchObject({ diff --git a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts index d5c646da4..247f67339 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts @@ -56,7 +56,7 @@ bucket_definitions: tag: storage.SaveOperationTag.DELETE, beforeReplicaId: test_utils.rid('test1') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); @@ -110,7 +110,7 @@ bucket_definitions: beforeReplicaId: test_utils.rid('test1') }); - await writer.commitAll('0/1'); + await writer.commit('0/1'); await writer.save({ sourceTable: testTable, @@ -121,7 +121,7 @@ bucket_definitions: }, afterReplicaId: test_utils.rid('test1') }); - await writer.commitAll('2/1'); + await writer.commit('2/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); @@ -172,7 +172,7 @@ bucket_definitions: beforeReplicaId: test_utils.rid('test1') }); - await writer.commitAll('0/1'); + await writer.commit('0/1'); await writer.save({ sourceTable: testTable, @@ -187,7 +187,7 @@ bucket_definitions: beforeReplicaId: test_utils.rid('test1'), afterReplicaId: test_utils.rid('test1') }); - await writer.commitAll('2/1'); + await writer.commit('2/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const request = bucketRequest(syncRules); @@ -245,7 +245,7 @@ bucket_definitions: }, afterReplicaId: test_utils.rid('test1') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const request = bucketRequest(syncRules); @@ -320,7 +320,7 @@ bucket_definitions: afterReplicaId: test_utils.rid('test2') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, [bucketRequest(syncRules)])); const data = batch[0].chunkData.data.map((d) => { @@ -371,7 +371,7 @@ bucket_definitions: beforeReplicaId: test_utils.rid('test1') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); await writer.save({ sourceTable: testTable, @@ -465,7 +465,7 @@ bucket_definitions: beforeReplicaId: test_utils.rid('test1') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); await writer.markAllSnapshotDone('1/1'); @@ -495,7 +495,7 @@ bucket_definitions: beforeReplicaId: test_utils.rid('test1') }); - await writer.commitAll('2/1'); + await writer.commit('2/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const request = bucketRequest(syncRules); @@ -955,7 +955,7 @@ bucket_definitions: afterReplicaId: test_utils.rid('test3') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const request = bucketRequest(syncRules); @@ -1032,7 +1032,7 @@ bucket_definitions: }); } - await writer.commitAll('1/1'); + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const request = bucketRequest(syncRules); @@ -1111,7 +1111,7 @@ bucket_definitions: }); } - await writer.commitAll('1/1'); + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const global1Request = bucketRequest(syncRules, 'global1[]', 0n); @@ -1242,7 +1242,7 @@ bucket_definitions: const storage = f.getInstance(r.persisted_sync_rules!); await using writer = await storage.createWriter(test_utils.BATCH_OPTIONS); await writer.markAllSnapshotDone('1/0'); - await writer.keepaliveAll('1/0'); + await writer.keepalive('1/0'); const metrics2 = await f.getStorageMetrics(); expect(metrics2.operations_size_bytes).toBeLessThanOrEqual(20_000); @@ -1323,7 +1323,7 @@ bucket_definitions: }, afterReplicaId: test_utils.rid('test1') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); const request = bucketRequest(syncRules); @@ -1348,22 +1348,22 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); await writer.markAllSnapshotDone('1/1'); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const cp1 = await bucketStorage.getCheckpoint(); expect(cp1.lsn).toEqual('1/1'); - await writer.commitAll('2/1', { createEmptyCheckpoints: true }); + await writer.commit('2/1', { createEmptyCheckpoints: true }); const cp2 = await bucketStorage.getCheckpoint(); expect(cp2.lsn).toEqual('2/1'); - await writer.keepaliveAll('3/1'); + await writer.keepalive('3/1'); const cp3 = await bucketStorage.getCheckpoint(); expect(cp3.lsn).toEqual('3/1'); // For the last one, we skip creating empty checkpoints // This means the LSN stays at 3/1. - await writer.commitAll('4/1', { createEmptyCheckpoints: false }); + await writer.commit('4/1', { createEmptyCheckpoints: false }); const cp4 = await bucketStorage.getCheckpoint(); expect(cp4.lsn).toEqual('3/1'); }); @@ -1385,9 +1385,9 @@ bucket_definitions: // We simulate two concurrent batches, but sequential calls are enough for this test. await writer1.markAllSnapshotDone('1/1'); - await writer1.commitAll('1/1'); + await writer1.commit('1/1'); - await writer1.commitAll('2/1', { createEmptyCheckpoints: false }); + await writer1.commit('2/1', { createEmptyCheckpoints: false }); const cp2 = await bucketStorage.getCheckpoint(); expect(cp2.lsn).toEqual('1/1'); // checkpoint 2/1 skipped @@ -1402,13 +1402,13 @@ bucket_definitions: }); // This simulates what happens on a snapshot processor. // This may later change to a flush() rather than commit(). - await writer2.commitAll(test_utils.BATCH_OPTIONS.zeroLSN); + await writer2.commit(test_utils.BATCH_OPTIONS.zeroLSN); const cp3 = await bucketStorage.getCheckpoint(); expect(cp3.lsn).toEqual('1/1'); // Still unchanged // This now needs to advance the LSN, despite {createEmptyCheckpoints: false} - await writer1.commitAll('4/1', { createEmptyCheckpoints: false }); + await writer1.commit('4/1', { createEmptyCheckpoints: false }); const cp4 = await bucketStorage.getCheckpoint(); expect(cp4.lsn).toEqual('4/1'); }); @@ -1444,7 +1444,7 @@ bucket_definitions: }, beforeReplicaId: test_utils.rid('test1') }); - await streamingWriter.commitAll('2/1'); + await streamingWriter.commit('2/1'); await snapshotWriter.save({ sourceTable: snapshotTable, @@ -1456,9 +1456,9 @@ bucket_definitions: afterReplicaId: test_utils.rid('test1') }); await snapshotWriter.markAllSnapshotDone('3/1'); - await snapshotWriter.commitAll('1/1'); + await snapshotWriter.commit('1/1'); - await streamingWriter.keepaliveAll('3/1'); + await streamingWriter.keepalive('3/1'); const cp = await bucketStorage.getCheckpoint(); expect(cp.lsn).toEqual('3/1'); @@ -1507,7 +1507,7 @@ bucket_definitions: }); } } - await writer.commitAll('1/1'); + await writer.commit('1/1'); const { checkpoint } = await bucketStorage.getCheckpoint(); bucketStorage.clearChecksumCache(); diff --git a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts index 3acd35b86..50528a676 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts @@ -66,7 +66,7 @@ bucket_definitions: afterReplicaId: test_utils.rid('t1') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const checkpoint = await bucketStorage.getCheckpoint(); const parameters = await checkpoint.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, ['user1'])]); @@ -102,7 +102,7 @@ bucket_definitions: }, afterReplicaId: test_utils.rid('user1') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const checkpoint1 = await bucketStorage.getCheckpoint(); await writer.save({ sourceTable: testTable, @@ -113,7 +113,7 @@ bucket_definitions: }, afterReplicaId: test_utils.rid('user1') }); - await writer.commitAll('1/2'); + await writer.commit('1/2'); const checkpoint2 = await bucketStorage.getCheckpoint(); const parameters = await checkpoint2.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, ['user1'])]); @@ -171,7 +171,7 @@ bucket_definitions: afterReplicaId: test_utils.rid('todo2') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); // Update the second todo item to now belong to list 1 await writer.save({ @@ -184,7 +184,7 @@ bucket_definitions: afterReplicaId: test_utils.rid('todo2') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); // We specifically request the todo_ids for both lists. // There removal operation for the association of `list2`::`todo2` should not interfere with the new @@ -234,7 +234,7 @@ bucket_definitions: afterReplicaId: test_utils.rid('t1') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const TEST_PARAMS = { group_id: 'group1' }; @@ -296,7 +296,7 @@ bucket_definitions: afterReplicaId: test_utils.rid('t1') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const TEST_PARAMS = { group_id: 'group1' }; @@ -335,7 +335,7 @@ bucket_definitions: }, afterReplicaId: test_utils.rid('workspace1') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const checkpoint = await bucketStorage.getCheckpoint(); const parameters = new RequestParameters({ sub: 'u1' }, {}); @@ -408,7 +408,7 @@ bucket_definitions: afterReplicaId: test_utils.rid('workspace3') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const checkpoint = await bucketStorage.getCheckpoint(); @@ -509,7 +509,7 @@ bucket_definitions: afterReplicaId: test_utils.rid('workspace4') }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const checkpoint = await bucketStorage.getCheckpoint(); diff --git a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts index bf76d2e67..cde9e710a 100644 --- a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts @@ -39,7 +39,7 @@ bucket_definitions: afterReplicaId: 't2' }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); const lookup = ScopedParameterLookup.direct({ lookupName: '20002', queryId: '', source: null as any }, ['t1']); @@ -68,7 +68,7 @@ bucket_definitions: }, beforeReplicaId: 't1' }); - await writer.commitAll('1/2'); + await writer.commit('1/2'); const checkpoint2 = await bucketStorage.getCheckpoint(); const parameters2 = await checkpoint2.getParameterSets([lookup]); expect(parameters2).toEqual([]); @@ -123,7 +123,7 @@ bucket_definitions: afterReplicaId: 't2' }); - await writer.commitAll('1/1'); + await writer.commit('1/1'); await writer.save({ sourceTable: testTable, @@ -134,7 +134,7 @@ bucket_definitions: }, beforeReplicaId: 't1' }); - await writer.commitAll('2/1'); + await writer.commit('2/1'); await writer.save({ sourceTable: testTable, @@ -145,7 +145,7 @@ bucket_definitions: }, afterReplicaId: 't2' }); - await writer.commitAll('3/1'); + await writer.commit('3/1'); const lookup = ScopedParameterLookup.direct({ lookupName: 'test', queryId: '1', source: null as any }, ['u1']); diff --git a/packages/service-core-tests/src/tests/register-sync-tests.ts b/packages/service-core-tests/src/tests/register-sync-tests.ts index eaca6aa48..995b455bd 100644 --- a/packages/service-core-tests/src/tests/register-sync-tests.ts +++ b/packages/service-core-tests/src/tests/register-sync-tests.ts @@ -78,7 +78,7 @@ export function registerSyncTests(config: storage.TestStorageConfig) { afterReplicaId: 't2' }); - await writer.commitAll('0/1'); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, @@ -140,7 +140,7 @@ bucket_definitions: afterReplicaId: 'earlier' }); - await writer.commitAll('0/1'); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, @@ -204,7 +204,7 @@ bucket_definitions: }); } - await writer.commitAll('0/1'); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, @@ -243,7 +243,7 @@ bucket_definitions: afterReplicaId: 'highprio2' }); - await writer.commitAll('0/2'); + await writer.commit('0/2'); } else { // Low-priority sync from the first checkpoint was interrupted. This should not happen before // 1000 low-priority items were synchronized. @@ -313,7 +313,7 @@ bucket_definitions: }); } - await writer.commitAll('0/1'); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, @@ -357,7 +357,7 @@ bucket_definitions: afterReplicaId: 'highprio2' }); - await writer.commitAll('0/2'); + await writer.commit('0/2'); } else { expect(sentCheckpoints).toBe(2); expect(sentRows).toBe(10002); @@ -389,7 +389,7 @@ bucket_definitions: afterReplicaId: 'highprio3' }); - await writer.commitAll('0/3'); + await writer.commit('0/3'); } } } @@ -451,7 +451,7 @@ bucket_definitions: }); } - await writer.commitAll('0/1'); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, @@ -510,7 +510,7 @@ bucket_definitions: afterReplicaId: '2001' }); - await writer.commitAll('0/2'); + await writer.commit('0/2'); } if (sentRows >= 1000 && sentRows <= 2001) { @@ -566,7 +566,7 @@ bucket_definitions: }, afterReplicaId: 't1' }); - await writer.commitAll('0/1'); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, @@ -597,7 +597,7 @@ bucket_definitions: if (receivedCompletions == 1) { // Trigger an empty bucket update. await bucketStorage.createManagedWriteCheckpoint({ user_id: '', heads: { '1': '1/0' } }); - await writer.commitAll('1/0'); + await writer.commit('1/0'); } else { break; } @@ -631,7 +631,7 @@ bucket_definitions: afterReplicaId: 't1' }); - await writer.commitAll('0/1'); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, @@ -692,7 +692,7 @@ bucket_definitions: const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); // Activate await writer.markAllSnapshotDone('0/0'); - await writer.keepaliveAll('0/0'); + await writer.keepalive('0/0'); const stream = sync.streamResponse({ syncContext, @@ -724,7 +724,7 @@ bucket_definitions: afterReplicaId: 't1' }); - await writer.commitAll('0/1'); + await writer.commit('0/1'); expect(await getCheckpointLines(iter)).toMatchSnapshot(); @@ -738,7 +738,7 @@ bucket_definitions: afterReplicaId: 't2' }); - await writer.commitAll('0/2'); + await writer.commit('0/2'); expect(await getCheckpointLines(iter)).toMatchSnapshot(); }); @@ -761,7 +761,7 @@ bucket_definitions: // Activate await writer.markAllSnapshotDone('0/0'); - await writer.keepaliveAll('0/0'); + await writer.keepalive('0/0'); const stream = sync.streamResponse({ syncContext, @@ -797,7 +797,7 @@ bucket_definitions: afterReplicaId: 'user1' }); - await writer.commitAll('0/1'); + await writer.commit('0/1'); const checkpoint2 = await getCheckpointLines(iter); @@ -836,7 +836,7 @@ bucket_definitions: afterReplicaId: 'user1' }); - await writer.commitAll('0/1'); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, @@ -873,7 +873,7 @@ bucket_definitions: afterReplicaId: 'list1' }); - await writer.commitAll('0/1'); + await writer.commit('0/1'); const checkpoint2 = await getCheckpointLines(iter); expect( @@ -900,7 +900,7 @@ bucket_definitions: const listsTable = await test_utils.resolveTestTable(writer, 'lists', ['id'], config, 2); // Activate await writer.markAllSnapshotDone('0/0'); - await writer.keepaliveAll('0/0'); + await writer.keepalive('0/0'); const stream = sync.streamResponse({ syncContext, @@ -945,7 +945,7 @@ bucket_definitions: afterReplicaId: 'user1' }); - await writer.commitAll('0/1'); + await writer.commit('0/1'); const { bucket } = test_utils.bucketRequest(syncRules, 'by_user["user1"]'); @@ -967,7 +967,7 @@ bucket_definitions: await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); // Activate await writer.markAllSnapshotDone('0/0'); - await writer.keepaliveAll('0/0'); + await writer.keepalive('0/0'); const exp = Date.now() / 1000 + 0.1; @@ -1033,7 +1033,7 @@ bucket_definitions: afterReplicaId: 't2' }); - await writer.commitAll('0/1'); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, @@ -1087,7 +1087,7 @@ bucket_definitions: afterReplicaId: 't2' }); - await writer.commitAll('0/2'); + await writer.commit('0/2'); await bucketStorage.compact({ minBucketChanges: 1 @@ -1155,7 +1155,7 @@ bucket_definitions: await writer.markAllSnapshotDone('0/1'); // <= the managed write checkpoint LSN below - await writer.commitAll('0/1'); + await writer.commit('0/1'); const checkpoint = await bucketStorage.createManagedWriteCheckpoint({ user_id: 'test', @@ -1189,7 +1189,7 @@ bucket_definitions: await writer.markAllSnapshotDone('0/1'); // must be >= the managed write checkpoint LSN - await writer.commitAll('1/0'); + await writer.commit('1/0'); // At this point the LSN has advanced, so the write checkpoint should be // included in the next checkpoint message. @@ -1247,7 +1247,7 @@ config: }, afterReplicaId: 't1' }); - await writer.commitAll('0/1'); + await writer.commit('0/1'); const stream = sync.streamResponse({ syncContext, diff --git a/packages/service-core/src/storage/BucketStorageBatch.ts b/packages/service-core/src/storage/BucketStorageBatch.ts index c13b814af..0a57ffcbf 100644 --- a/packages/service-core/src/storage/BucketStorageBatch.ts +++ b/packages/service-core/src/storage/BucketStorageBatch.ts @@ -20,9 +20,20 @@ export const DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS: ResolvedBucketBatchCommitOptio export interface BucketDataWriter extends BucketDataWriterBase, AsyncDisposable { readonly rowProcessor: RowProcessor; - keepaliveAll(lsn: string): Promise; - commitAll(lsn: string, options?: BucketBatchCommitOptions): Promise; - setAllResumeLsn(lsn: string): Promise; + /** + * Perform a keepalive on every replication stream. + */ + keepalive(lsn: string): Promise; + + /** + * Performs a commit on every replication stream. + */ + commit(lsn: string, options?: BucketBatchCommitOptions): Promise; + + /** + * Set resume LSN on every replication stream. + */ + setResumeLsn(lsn: string): Promise; /** * Resolve a table, keeping track of it internally. From 99a75902cfab4d4db78363155817dc4cdbfac651 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 22 Jan 2026 10:51:49 +0200 Subject: [PATCH 071/101] Rename BucketDataWriters. --- .../{MongoBucketBatch.ts => MongoBucketDataWriter.ts} | 0 .../src/storage/implementation/PersistedBatch.ts | 2 +- modules/module-mongodb-storage/src/storage/storage-index.ts | 2 +- .../src/storage/PostgresBucketStorageFactory.ts | 4 ++-- .../batch/{PostgresWriter.ts => PostgresBucketDataWriter.ts} | 2 +- .../storage/{BucketStorageBatch.ts => BucketDataWriter.ts} | 3 +++ packages/service-core/src/storage/BucketStorageFactory.ts | 2 +- packages/service-core/src/storage/ReplicationEventPayload.ts | 2 +- packages/service-core/src/storage/SyncRulesBucketStorage.ts | 2 +- packages/service-core/src/storage/bson.ts | 2 +- packages/service-core/src/storage/storage-index.ts | 2 +- 11 files changed, 13 insertions(+), 10 deletions(-) rename modules/module-mongodb-storage/src/storage/implementation/{MongoBucketBatch.ts => MongoBucketDataWriter.ts} (100%) rename modules/module-postgres-storage/src/storage/batch/{PostgresWriter.ts => PostgresBucketDataWriter.ts} (99%) rename packages/service-core/src/storage/{BucketStorageBatch.ts => BucketDataWriter.ts} (99%) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts similarity index 100% rename from modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts rename to modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts diff --git a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts index c2e660f96..08f64ca28 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts @@ -6,7 +6,7 @@ import * as bson from 'bson'; import { Logger, ReplicationAssertionError } from '@powersync/lib-services-framework'; import { InternalOpId, storage, utils } from '@powersync/service-core'; import { mongoTableId, replicaIdToSubkey } from '../../utils/util.js'; -import { currentBucketKey, EMPTY_DATA, MAX_ROW_SIZE } from './MongoBucketBatch.js'; +import { currentBucketKey, EMPTY_DATA, MAX_ROW_SIZE } from './MongoBucketDataWriter.js'; import { MongoIdSequence } from './MongoIdSequence.js'; import { PowerSyncMongo } from './db.js'; import { diff --git a/modules/module-mongodb-storage/src/storage/storage-index.ts b/modules/module-mongodb-storage/src/storage/storage-index.ts index cfb1d4ad0..fbd83d295 100644 --- a/modules/module-mongodb-storage/src/storage/storage-index.ts +++ b/modules/module-mongodb-storage/src/storage/storage-index.ts @@ -1,6 +1,6 @@ export * from './implementation/db.js'; export * from './implementation/models.js'; -export * from './implementation/MongoBucketBatch.js'; +export * from './implementation/MongoBucketDataWriter.js'; export * from './implementation/MongoIdSequence.js'; export * from './implementation/MongoPersistedSyncRules.js'; export * from './implementation/MongoPersistedSyncRulesContent.js'; diff --git a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts index 2440b1f65..e49f49b2d 100644 --- a/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts +++ b/modules/module-postgres-storage/src/storage/PostgresBucketStorageFactory.ts @@ -13,7 +13,7 @@ import { notifySyncRulesUpdate, PostgresBucketBatch } from './batch/PostgresBuck import { PostgresSyncRulesStorage } from './PostgresSyncRulesStorage.js'; import { PostgresPersistedSyncRulesContent } from './sync-rules/PostgresPersistedSyncRulesContent.js'; import { getStorageApplicationName } from '../utils/application-name.js'; -import { PostgresWriter } from './batch/PostgresWriter.js'; +import { PostgresBucketDataWriter } from './batch/PostgresBucketDataWriter.js'; export type PostgresBucketStorageOptions = { config: NormalizedPostgresStorageConfig; @@ -51,7 +51,7 @@ export class PostgresBucketStorageFactory const syncRules = storages.map((s) => s.getHydratedSyncRules(options)); const rowProcessor = new sync_rules.MultiSyncRules(syncRules); - const writer = new PostgresWriter({ + const writer = new PostgresBucketDataWriter({ ...options, db: this.db, rowProcessor, diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts b/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts similarity index 99% rename from modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts rename to modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts index c8f393baf..74127abf8 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresWriter.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts @@ -20,7 +20,7 @@ export interface PostgresWriterOptions { markRecordUnavailable?: BucketStorageMarkRecordUnavailable; } -export class PostgresWriter implements storage.BucketDataWriter { +export class PostgresBucketDataWriter implements storage.BucketDataWriter { public readonly rowProcessor: RowProcessor; write_checkpoint_batch: storage.CustomWriteCheckpointOptions[] = []; diff --git a/packages/service-core/src/storage/BucketStorageBatch.ts b/packages/service-core/src/storage/BucketDataWriter.ts similarity index 99% rename from packages/service-core/src/storage/BucketStorageBatch.ts rename to packages/service-core/src/storage/BucketDataWriter.ts index 0a57ffcbf..a970b047a 100644 --- a/packages/service-core/src/storage/BucketStorageBatch.ts +++ b/packages/service-core/src/storage/BucketDataWriter.ts @@ -86,6 +86,9 @@ export interface BucketDataWriterBase { addCustomWriteCheckpoint(checkpoint: BatchedCustomWriteCheckpointOptions): void; } +/** + * @deprecated Use BucketDataWriter instead. + */ export interface BucketStorageBatch extends ObserverClient, AsyncDisposable, diff --git a/packages/service-core/src/storage/BucketStorageFactory.ts b/packages/service-core/src/storage/BucketStorageFactory.ts index c3e860086..98b03bf3f 100644 --- a/packages/service-core/src/storage/BucketStorageFactory.ts +++ b/packages/service-core/src/storage/BucketStorageFactory.ts @@ -1,5 +1,5 @@ import { Logger, ObserverClient } from '@powersync/lib-services-framework'; -import { BucketDataWriter, SaveUpdate } from './BucketStorageBatch.js'; +import { BucketDataWriter, SaveUpdate } from './BucketDataWriter.js'; import { ParseSyncRulesOptions, PersistedSyncRules, PersistedSyncRulesContent } from './PersistedSyncRulesContent.js'; import { ReplicationEventPayload } from './ReplicationEventPayload.js'; import { ReplicationLock } from './ReplicationLock.js'; diff --git a/packages/service-core/src/storage/ReplicationEventPayload.ts b/packages/service-core/src/storage/ReplicationEventPayload.ts index d86ea50ef..ccffc3dbd 100644 --- a/packages/service-core/src/storage/ReplicationEventPayload.ts +++ b/packages/service-core/src/storage/ReplicationEventPayload.ts @@ -1,6 +1,6 @@ import * as sync_rules from '@powersync/service-sync-rules'; import { SourceTable } from './SourceTable.js'; -import { BucketStorageBatch, SaveOp } from './BucketStorageBatch.js'; +import { BucketStorageBatch, SaveOp } from './BucketDataWriter.js'; export type EventData = { op: SaveOp; diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index fa6ea5d17..616f47827 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -7,7 +7,7 @@ import { TablePattern } from '@powersync/service-sync-rules'; import * as util from '../util/util-index.js'; -import { BucketDataWriter, BucketStorageBatch, FlushedResult, SaveUpdate } from './BucketStorageBatch.js'; +import { BucketDataWriter, BucketStorageBatch, FlushedResult, SaveUpdate } from './BucketDataWriter.js'; import { BucketStorageFactory, CreateWriterOptions } from './BucketStorageFactory.js'; import { ParseSyncRulesOptions, PersistedSyncRules } from './PersistedSyncRulesContent.js'; import { SourceEntityDescriptor } from './SourceEntity.js'; diff --git a/packages/service-core/src/storage/bson.ts b/packages/service-core/src/storage/bson.ts index ad7ee3e16..69c5fffec 100644 --- a/packages/service-core/src/storage/bson.ts +++ b/packages/service-core/src/storage/bson.ts @@ -1,7 +1,7 @@ import * as bson from 'bson'; import { ScopedParameterLookup, SqliteJsonValue } from '@powersync/service-sync-rules'; -import { ReplicaId } from './BucketStorageBatch.js'; +import { ReplicaId } from './BucketDataWriter.js'; type NodeBuffer = Buffer; diff --git a/packages/service-core/src/storage/storage-index.ts b/packages/service-core/src/storage/storage-index.ts index b83a2fb2f..9348a7e8f 100644 --- a/packages/service-core/src/storage/storage-index.ts +++ b/packages/service-core/src/storage/storage-index.ts @@ -9,7 +9,7 @@ export * from './StorageProvider.js'; export * from './storage-metrics.js'; export * from './WriteCheckpointAPI.js'; export * from './BucketStorageFactory.js'; -export * from './BucketStorageBatch.js'; +export * from './BucketDataWriter.js'; export * from './SyncRulesBucketStorage.js'; export * from './PersistedSyncRulesContent.js'; export * from './ReplicationLock.js'; From 422d90f5f65e7f58c4c94f71eb6bf9b6c3d529f4 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 22 Jan 2026 11:05:38 +0200 Subject: [PATCH 072/101] Map by table id instead of reference. --- .../src/storage/batch/PostgresBucketDataWriter.ts | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts b/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts index 74127abf8..e5a551e08 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts @@ -28,7 +28,10 @@ export class PostgresBucketDataWriter implements storage.BucketDataWriter { public subWriters: PostgresBucketBatch[] = []; - private sourceTableMap = new WeakMap(); + /** + * Map table id => relevant PostgresBucketBatch + */ + private sourceTableMap = new Map(); constructor(options: PostgresWriterOptions) { this.db = options.db; @@ -87,7 +90,7 @@ export class PostgresBucketDataWriter implements storage.BucketDataWriter { idGenerator: options.idGenerator }); result.tables.push(subResult.table); - this.sourceTableMap.set(subResult.table, subWriter); + this.sourceTableMap.set(postgresTableId(subResult.table.id), subWriter); result.dropTables.push(...subResult.dropTables); } return result; @@ -95,7 +98,7 @@ export class PostgresBucketDataWriter implements storage.BucketDataWriter { private subWriterForTable(table: storage.SourceTable): PostgresBucketBatch { // FIXME: store on the SourceTable instead? - const mapped = this.sourceTableMap.get(table); + const mapped = this.sourceTableMap.get(postgresTableId(table.id)); if (mapped != null) { return mapped; } @@ -148,7 +151,7 @@ export class PostgresBucketDataWriter implements storage.BucketDataWriter { sourceTable.syncEvent = ref.syncEvent; sourceTable.syncData = ref.syncData; sourceTable.syncParameters = ref.syncParameters; - this.sourceTableMap.set(sourceTable, subWriter); + this.sourceTableMap.set(postgresTableId(sourceTable.id), subWriter); return sourceTable; } @@ -215,9 +218,7 @@ export class PostgresBucketDataWriter implements storage.BucketDataWriter { progress: Partial ): Promise { const writer = this.subWriterForTable(table); - const updatedTable = await writer.updateTableProgress(table, progress); - this.sourceTableMap.set(updatedTable, writer); - return updatedTable; + return await writer.updateTableProgress(table, progress); } /** From fb1bb7fe00785d0d07eed99e6b75e1916783bb1c Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 22 Jan 2026 11:32:54 +0200 Subject: [PATCH 073/101] Remove mapping for source tables - it is not reliable enough. --- .../src/replication/BinLogStream.ts | 9 ++- .../test/src/schema-changes.test.ts | 59 +++++++++---------- .../src/storage/PostgresSourceTable.ts | 41 +++++++++++++ .../src/storage/PostgresSyncRulesStorage.ts | 53 +++++++++-------- .../src/storage/batch/PostgresBucketBatch.ts | 5 ++ .../storage/batch/PostgresBucketDataWriter.ts | 51 ++++++++-------- 6 files changed, 139 insertions(+), 79 deletions(-) create mode 100644 modules/module-postgres-storage/src/storage/PostgresSourceTable.ts diff --git a/modules/module-mysql/src/replication/BinLogStream.ts b/modules/module-mysql/src/replication/BinLogStream.ts index 76d17ccfa..78b5302ea 100644 --- a/modules/module-mysql/src/replication/BinLogStream.ts +++ b/modules/module-mysql/src/replication/BinLogStream.ts @@ -138,9 +138,12 @@ export class BinLogStream { pattern }); - // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage. + // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage, + // so this should be a no-op. await writer.drop(result.dropTables); + this.tableCache.set(entity.objectId!, result.tables); + return result.tables; } @@ -164,7 +167,8 @@ export class BinLogStream { pattern }); - // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage. + // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage, + // so this should be a no-op. await writer.drop(result.dropTables); for (let table of result.tables) { @@ -531,6 +535,7 @@ export class BinLogStream { if (change.type === SchemaChangeType.RENAME_TABLE) { const fromTableId = createTableId(change.schema, change.table); + // FIXME: we should use tables from the storage, not from the cache. const fromTables = this.tableCache.get(fromTableId); // Old table needs to be cleaned up if (fromTables != null) { diff --git a/modules/module-mysql/test/src/schema-changes.test.ts b/modules/module-mysql/test/src/schema-changes.test.ts index 32d840e30..95efeb52c 100644 --- a/modules/module-mysql/test/src/schema-changes.test.ts +++ b/modules/module-mysql/test/src/schema-changes.test.ts @@ -78,7 +78,7 @@ function defineTests(config: storage.TestStorageConfig) { ]); }); - test('Create table: New table in is in the sync rules', async () => { + test('Create table: New table is in the sync rules', async () => { await using context = await BinlogStreamTestContext.open(factory); const { connectionManager } = context; await context.updateSyncRules(BASIC_SYNC_RULES); @@ -96,45 +96,44 @@ function defineTests(config: storage.TestStorageConfig) { expect(data).toMatchObject([PUT_T1, PUT_T1]); }); - test('Create table: New table is created from existing data', async () => { + test.skipIf(isMySQL57)('Create table: New table is created from existing data', async () => { // Create table with select from is not allowed in MySQL 5.7 when enforce_gtid_consistency=ON - if (!isMySQL57) { - await using context = await BinlogStreamTestContext.open(factory); - const { connectionManager } = context; - await context.updateSyncRules(BASIC_SYNC_RULES); - await connectionManager.query(`CREATE TABLE test_data_from + await using context = await BinlogStreamTestContext.open(factory); + const { connectionManager } = context; + await context.updateSyncRules(BASIC_SYNC_RULES); + + await connectionManager.query(`CREATE TABLE test_data_from ( id CHAR(36) PRIMARY KEY, description TEXT )`); - await connectionManager.query(`INSERT INTO test_data_from(id, description) + await connectionManager.query(`INSERT INTO test_data_from(id, description) VALUES ('t1', 'test1')`); - await connectionManager.query(`INSERT INTO test_data_from(id, description) + await connectionManager.query(`INSERT INTO test_data_from(id, description) VALUES ('t2', 'test2')`); - await connectionManager.query(`INSERT INTO test_data_from(id, description) + await connectionManager.query(`INSERT INTO test_data_from(id, description) VALUES ('t3', 'test3')`); - await context.replicateSnapshot(); - await context.startStreaming(); - - // Add table after initial replication - await connectionManager.query(`CREATE TABLE test_data SELECT * FROM test_data_from`); - - const data = await context.getBucketData('global[]'); - - // Interestingly, the create with select triggers binlog row write events - expect(data).toMatchObject([ - // From snapshot - PUT_T1, - PUT_T2, - PUT_T3, - // From replication stream - PUT_T1, - PUT_T2, - PUT_T3 - ]); - } + await context.replicateSnapshot(); + await context.startStreaming(); + + // Add table after initial replication + await connectionManager.query(`CREATE TABLE test_data SELECT * FROM test_data_from`); + + const data = await context.getBucketData('global[]'); + + // Interestingly, the create with select triggers binlog row write events + expect(data).toMatchObject([ + // From snapshot + PUT_T1, + PUT_T2, + PUT_T3, + // From replication stream + PUT_T1, + PUT_T2, + PUT_T3 + ]); }); test('Create table: New table is not in the sync rules', async () => { diff --git a/modules/module-postgres-storage/src/storage/PostgresSourceTable.ts b/modules/module-postgres-storage/src/storage/PostgresSourceTable.ts new file mode 100644 index 000000000..3eafe949c --- /dev/null +++ b/modules/module-postgres-storage/src/storage/PostgresSourceTable.ts @@ -0,0 +1,41 @@ +import { ReplicationAssertionError } from '@powersync/lib-services-framework'; +import { SourceTable, SourceTableOptions } from '@powersync/service-core'; + +export class PostgresSourceTable extends SourceTable { + public readonly groupId: number; + + constructor(options: SourceTableOptions, postgresOptions: { groupId: number }) { + super(options); + this.groupId = postgresOptions.groupId; + + if (typeof options.id != 'string') { + throw new ReplicationAssertionError('PostgresSourceTable id must be a string'); + } + } + + get id() { + return this.options.id as string; + } + + clone(): PostgresSourceTable { + const copy = new PostgresSourceTable( + { + id: this.id, + connectionTag: this.connectionTag, + objectId: this.objectId, + schema: this.schema, + name: this.name, + replicaIdColumns: this.replicaIdColumns, + snapshotComplete: this.snapshotComplete, + pattern: this.pattern, + bucketDataSourceIds: this.bucketDataSourceIds, + parameterLookupSourceIds: this.parameterLookupSourceIds + }, + { groupId: this.groupId } + ); + copy.syncData = this.syncData; + copy.syncParameters = this.syncParameters; + copy.snapshotStatus = this.snapshotStatus; + return copy; + } +} diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index 86aef4c86..8e22026b4 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -40,6 +40,7 @@ import { PostgresWriteCheckpointAPI } from './checkpoints/PostgresWriteCheckpoin import { PostgresBucketStorageFactory } from './PostgresBucketStorageFactory.js'; import { PostgresCompactor } from './PostgresCompactor.js'; import { postgresTableId } from './batch/PostgresPersistedBatch.js'; +import { PostgresSourceTable } from './PostgresSourceTable.js'; export type PostgresSyncRulesStorageOptions = { factory: PostgresBucketStorageFactory; @@ -257,15 +258,18 @@ export class PostgresSyncRulesStorage sourceTableRow = row; } - const sourceTable = new storage.SourceTable({ - id: sourceTableRow!.id, - connectionTag: connection_tag, - objectId: objectId, - schema: schema, - name: table, - replicaIdColumns: replicaIdColumns, - snapshotComplete: sourceTableRow!.snapshot_done ?? true - }); + const sourceTable = new PostgresSourceTable( + { + id: sourceTableRow!.id, + connectionTag: connection_tag, + objectId: objectId, + schema: schema, + name: table, + replicaIdColumns: replicaIdColumns, + snapshotComplete: sourceTableRow!.snapshot_done ?? true + }, + { groupId: group_id } + ); if (!sourceTable.snapshotComplete) { sourceTable.snapshotStatus = { totalEstimatedCount: Number(sourceTableRow!.snapshot_total_estimated_count ?? -1n), @@ -323,20 +327,23 @@ export class PostgresSyncRulesStorage table: sourceTable, dropTables: truncatedTables.map( (doc) => - new storage.SourceTable({ - id: doc.id, - connectionTag: connection_tag, - objectId: doc.relation_id?.object_id ?? 0, - schema: doc.schema_name, - name: doc.table_name, - replicaIdColumns: - doc.replica_id_columns?.map((c) => ({ - name: c.name, - typeOid: c.typeId, - type: c.type - })) ?? [], - snapshotComplete: doc.snapshot_done ?? true - }) + new PostgresSourceTable( + { + id: doc.id, + connectionTag: connection_tag, + objectId: doc.relation_id?.object_id ?? 0, + schema: doc.schema_name, + name: doc.table_name, + replicaIdColumns: + doc.replica_id_columns?.map((c) => ({ + name: c.name, + typeOid: c.typeId, + type: c.type + })) ?? [], + snapshotComplete: doc.snapshot_done ?? true + }, + { groupId: group_id } + ) ) }; }); diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts index f3611070e..afd4a879a 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts @@ -178,6 +178,9 @@ export class PostgresBucketBatch } } + /** + * No-op for tables we do not own, although it does still have some overhead. + */ protected async truncateSingle(sourceTable: storage.SourceTable) { // To avoid too large transactions, we limit the amount of data we delete per transaction. // Since we don't use the record data here, we don't have explicit size limits per batch. @@ -255,10 +258,12 @@ export class PostgresBucketBatch await this.db.transaction(async (db) => { for (const table of sourceTables) { + // Only delete tables we own await db.sql` DELETE FROM source_tables WHERE id = ${{ type: 'varchar', value: table.id }} + AND group_id = ${{ type: 'int4', value: this.group_id }} `.execute(); } }); diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts b/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts index e5a551e08..a6f37cecd 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts @@ -10,6 +10,7 @@ import { RowProcessor } from '@powersync/service-sync-rules'; import { models } from '../../types/types.js'; import { PostgresBucketBatch } from './PostgresBucketBatch.js'; import { postgresTableId } from './PostgresPersistedBatch.js'; +import { PostgresSourceTable } from '../PostgresSourceTable.js'; export interface PostgresWriterOptions { db: lib_postgres.DatabaseClient; @@ -28,11 +29,6 @@ export class PostgresBucketDataWriter implements storage.BucketDataWriter { public subWriters: PostgresBucketBatch[] = []; - /** - * Map table id => relevant PostgresBucketBatch - */ - private sourceTableMap = new Map(); - constructor(options: PostgresWriterOptions) { this.db = options.db; this.rowProcessor = options.rowProcessor; @@ -90,7 +86,6 @@ export class PostgresBucketDataWriter implements storage.BucketDataWriter { idGenerator: options.idGenerator }); result.tables.push(subResult.table); - this.sourceTableMap.set(postgresTableId(subResult.table.id), subWriter); result.dropTables.push(...subResult.dropTables); } return result; @@ -98,11 +93,17 @@ export class PostgresBucketDataWriter implements storage.BucketDataWriter { private subWriterForTable(table: storage.SourceTable): PostgresBucketBatch { // FIXME: store on the SourceTable instead? - const mapped = this.sourceTableMap.get(postgresTableId(table.id)); - if (mapped != null) { - return mapped; + if (!(table instanceof PostgresSourceTable)) { + throw new ReplicationAssertionError(`Source table is not a PostgresSourceTable`); } - throw new ReplicationAssertionError(`No sub-writer found for source table ${table.qualifiedName}`); + const subWriter = this.subWriters.find((sw) => sw.storage.group_id === table.groupId); + if (subWriter == null) { + throw new ReplicationAssertionError( + `No sub-writer found for source table ${table.qualifiedName} with group ID ${table.groupId}` + ); + } + + return subWriter; } async getTable(ref: storage.SourceTable): Promise { @@ -127,19 +128,22 @@ export class PostgresBucketDataWriter implements storage.BucketDataWriter { ); } - const sourceTable = new storage.SourceTable({ - // Immutable values - id: sourceTableRow.id, - connectionTag: ref.connectionTag, - objectId: ref.objectId, - schema: ref.schema, - name: ref.name, - replicaIdColumns: ref.replicaIdColumns, - pattern: ref.pattern, - - // Table state - snapshotComplete: sourceTableRow!.snapshot_done ?? true - }); + const sourceTable = new PostgresSourceTable( + { + // Immutable values + id: sourceTableRow.id, + connectionTag: ref.connectionTag, + objectId: ref.objectId, + schema: ref.schema, + name: ref.name, + replicaIdColumns: ref.replicaIdColumns, + pattern: ref.pattern, + + // Table state + snapshotComplete: sourceTableRow!.snapshot_done ?? true + }, + { groupId: sourceTableRow.group_id } + ); if (!sourceTable.snapshotComplete) { sourceTable.snapshotStatus = { totalEstimatedCount: Number(sourceTableRow!.snapshot_total_estimated_count ?? -1n), @@ -151,7 +155,6 @@ export class PostgresBucketDataWriter implements storage.BucketDataWriter { sourceTable.syncEvent = ref.syncEvent; sourceTable.syncData = ref.syncData; sourceTable.syncParameters = ref.syncParameters; - this.sourceTableMap.set(postgresTableId(sourceTable.id), subWriter); return sourceTable; } From 21517d7c87061ab136076dda4e83c42470a2bd83 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 22 Jan 2026 14:04:37 +0200 Subject: [PATCH 074/101] Fix dropping tables. --- .../implementation/MongoBucketDataWriter.ts | 69 ++++++++++--------- .../src/replication/BinLogStream.ts | 4 +- 2 files changed, 38 insertions(+), 35 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts index 81458898a..4dfd6521b 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts @@ -313,6 +313,8 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { type_oid: column.typeId })); let result: storage.ResolveTablesResult | null = null; + + let currentTableIds: bson.ObjectId[] = []; await this.db.client.withSession(async (session) => { const col = this.db.source_tables; let filter: mongo.Filter = { @@ -330,6 +332,8 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { let coveredBucketDataSourceIds = new Set(); let coveredParameterLookupSourceIds = new Set(); + // Use _all_ docs that match the basic table definition, not only ones that match data sources. + currentTableIds = docs.map((doc) => doc._id); for (let doc of docs) { const matchingBucketDataSourceIds = doc.bucket_data_source_ids.filter((id) => bucketDataSourceIds.includes(id)); const matchingParameterLookupSourceIds = doc.parameter_lookup_source_ids.filter((id) => @@ -367,6 +371,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { bucket_data_source_ids: pendingBucketDataSourceIds, parameter_lookup_source_ids: pendingParameterLookupSourceIds }; + currentTableIds.push(doc._id); await col.insertOne(doc, { session }); matchingDocs.push(doc); @@ -400,42 +405,40 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { return sourceTable; }); - // FIXME: dropTables - // let dropTables: storage.SourceTable[] = []; - // // Detect tables that are either renamed, or have different replica_id_columns - // let truncateFilter = [{ schema_name: schema, table_name: name }] as any[]; - // if (objectId != null) { - // // Only detect renames if the source uses relation ids. - // truncateFilter.push({ relation_id: objectId }); - // } - // const truncate = await col - // .find( - // { - // group_id: group_id, - // connection_id: connection_id, - // _id: { $ne: doc._id }, - // $or: truncateFilter - // }, - // { session } - // ) - // .toArray(); - // dropTables = truncate.map( - // (doc) => - // new storage.SourceTable({ - // id: doc._id, - // connectionTag: connection_tag, - // objectId: doc.relation_id, - // schema: doc.schema_name, - // name: doc.table_name, - // replicaIdColumns: - // doc.replica_id_columns2?.map((c) => ({ name: c.name, typeOid: c.type_oid, type: c.type })) ?? [], - // snapshotComplete: doc.snapshot_done ?? true - // }) - // ); + // Detect tables that are either renamed, or have different replica_id_columns + let truncateFilter: mongo.Filter[] = [{ schema_name: schema, table_name: name }]; + if (objectId != null) { + // Only detect renames if the source uses relation ids. + truncateFilter.push({ relation_id: objectId }); + } + const truncate = await col + .find( + { + connection_id: connection_id, + _id: { $nin: currentTableIds }, + $or: truncateFilter + }, + { session } + ) + .toArray(); + const dropTables = truncate.map( + (doc) => + new storage.SourceTable({ + id: doc._id, + connectionTag: connection_tag, + objectId: doc.relation_id, + schema: doc.schema_name, + name: doc.table_name, + replicaIdColumns: + doc.replica_id_columns2?.map((c) => ({ name: c.name, typeOid: c.type_oid, type: c.type })) ?? [], + snapshotComplete: doc.snapshot_done ?? true, + pattern: options.pattern + }) + ); result = { tables: sourceTables, - dropTables: [] + dropTables: dropTables }; }); return result!; diff --git a/modules/module-mysql/src/replication/BinLogStream.ts b/modules/module-mysql/src/replication/BinLogStream.ts index 78b5302ea..af87921b9 100644 --- a/modules/module-mysql/src/replication/BinLogStream.ts +++ b/modules/module-mysql/src/replication/BinLogStream.ts @@ -139,7 +139,7 @@ export class BinLogStream { }); // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage, - // so this should be a no-op. + // but changes in replication identity columns can, so this is needed. await writer.drop(result.dropTables); this.tableCache.set(entity.objectId!, result.tables); @@ -168,7 +168,7 @@ export class BinLogStream { }); // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage, - // so this should be a no-op. + // but changes in replication identity columns can, so this is needed. await writer.drop(result.dropTables); for (let table of result.tables) { From 5b30f548bd20e5daa5b8195ddaac6ddbaa202553 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 22 Jan 2026 14:22:13 +0200 Subject: [PATCH 075/101] Fix skipIf. --- .../test/src/BinLogListener.test.ts | 33 +++++++++---------- .../test/src/schema-changes.test.ts | 9 ++--- modules/module-mysql/test/src/util.ts | 4 +-- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/modules/module-mysql/test/src/BinLogListener.test.ts b/modules/module-mysql/test/src/BinLogListener.test.ts index 9fb75fc5a..ddb9386ba 100644 --- a/modules/module-mysql/test/src/BinLogListener.test.ts +++ b/modules/module-mysql/test/src/BinLogListener.test.ts @@ -13,7 +13,7 @@ import { getMySQLVersion, qualifiedMySQLTable, satisfiesVersion } from '@module/ import crypto from 'crypto'; import { TablePattern } from '@powersync/service-sync-rules'; -describe('BinlogListener tests', () => { +describe('BinlogListener tests', async () => { const MAX_QUEUE_CAPACITY_MB = 1; const BINLOG_LISTENER_CONNECTION_OPTIONS = { ...TEST_CONNECTION_OPTIONS, @@ -23,15 +23,15 @@ describe('BinlogListener tests', () => { let connectionManager: MySQLConnectionManager; let eventHandler: TestBinLogEventHandler; let binLogListener: BinLogListener; - let isMySQL57: boolean = false; + let isMySQL57: boolean; - beforeAll(async () => { + { connectionManager = new MySQLConnectionManager(BINLOG_LISTENER_CONNECTION_OPTIONS, {}); const connection = await connectionManager.getConnection(); const version = await getMySQLVersion(connection); isMySQL57 = satisfiesVersion(version, '5.7.x'); connection.release(); - }); + } beforeEach(async () => { const connection = await connectionManager.getConnection(); @@ -232,20 +232,19 @@ describe('BinlogListener tests', () => { ); }); - test('Schema change event: Rename column via rename statement', async () => { + test.skipIf(isMySQL57)('Schema change event: Rename column via rename statement', async () => { // Syntax ALTER TABLE RENAME COLUMN was only introduced in MySQL 8.0.0 - if (!isMySQL57) { - await binLogListener.start(); - await connectionManager.query(`ALTER TABLE test_DATA RENAME COLUMN description TO description_new`); - await vi.waitFor(() => expect(eventHandler.schemaChanges.length).toBe(1), { timeout: 5000 }); - await binLogListener.stop(); - assertSchemaChange( - eventHandler.schemaChanges[0], - SchemaChangeType.ALTER_TABLE_COLUMN, - connectionManager.databaseName, - 'test_DATA' - ); - } + + await binLogListener.start(); + await connectionManager.query(`ALTER TABLE test_DATA RENAME COLUMN description TO description_new`); + await vi.waitFor(() => expect(eventHandler.schemaChanges.length).toBe(1), { timeout: 5000 }); + await binLogListener.stop(); + assertSchemaChange( + eventHandler.schemaChanges[0], + SchemaChangeType.ALTER_TABLE_COLUMN, + connectionManager.databaseName, + 'test_DATA' + ); }); test('Schema change event: Multiple column changes', async () => { diff --git a/modules/module-mysql/test/src/schema-changes.test.ts b/modules/module-mysql/test/src/schema-changes.test.ts index 95efeb52c..ebfb26f6c 100644 --- a/modules/module-mysql/test/src/schema-changes.test.ts +++ b/modules/module-mysql/test/src/schema-changes.test.ts @@ -26,18 +26,19 @@ const PUT_T3 = test_utils.putOp('test_data', { id: 't3', description: 'test3' }) const REMOVE_T1 = test_utils.removeOp('test_data', 't1'); const REMOVE_T2 = test_utils.removeOp('test_data', 't2'); -function defineTests(config: storage.TestStorageConfig) { +async function defineTests(config: storage.TestStorageConfig) { const factory = config.factory; - let isMySQL57: boolean = false; - beforeAll(async () => { + let isMySQL57: boolean; + { + // This is similar to a beforeAll() block, but doing it this way ensures the flag is available for skipIf(). const connectionManager = new MySQLConnectionManager(TEST_CONNECTION_OPTIONS, {}); const connection = await connectionManager.getConnection(); const version = await getMySQLVersion(connection); isMySQL57 = satisfiesVersion(version, '5.7.x'); connection.release(); await connectionManager.end(); - }); + } test('Re-create table', async () => { await using context = await BinlogStreamTestContext.open(factory); diff --git a/modules/module-mysql/test/src/util.ts b/modules/module-mysql/test/src/util.ts index 23eb076bc..58a031e41 100644 --- a/modules/module-mysql/test/src/util.ts +++ b/modules/module-mysql/test/src/util.ts @@ -30,11 +30,11 @@ export const INITIALIZED_POSTGRES_STORAGE_FACTORY = postgres_storage.test_utils. export function describeWithStorage(options: TestOptions, fn: (factory: TestStorageConfig) => void) { describe.skipIf(!env.TEST_MONGO_STORAGE)(`mongodb storage`, options, function () { - fn(INITIALIZED_MONGO_STORAGE_FACTORY); + return fn(INITIALIZED_MONGO_STORAGE_FACTORY); }); describe.skipIf(!env.TEST_POSTGRES_STORAGE)(`postgres storage`, options, function () { - fn(INITIALIZED_POSTGRES_STORAGE_FACTORY); + return fn(INITIALIZED_POSTGRES_STORAGE_FACTORY); }); } From 2489278b7459a9d2527016ea910aee320bd73a26 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 22 Jan 2026 14:26:47 +0200 Subject: [PATCH 076/101] Fix sorting in test. --- modules/module-mongodb/test/src/change_stream.test.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/module-mongodb/test/src/change_stream.test.ts b/modules/module-mongodb/test/src/change_stream.test.ts index c6b9dd8ee..593715541 100644 --- a/modules/module-mongodb/test/src/change_stream.test.ts +++ b/modules/module-mongodb/test/src/change_stream.test.ts @@ -443,7 +443,9 @@ bucket_definitions: const data = await context.getBucketData('global[]'); // Either case is valid here if (data.length == 3) { - expect(data.sort((a, b) => a.object_id?.localeCompare(b.object_id!) ?? 0)).toMatchObject([ + expect( + data.sort((a, b) => JSON.parse(a.data!).description.localeCompare(JSON.parse(b.data!).description) ?? 0) + ).toMatchObject([ // An extra op here, since this triggers a snapshot in addition to getting the event. // Can be either test1, test2, test2 or test2, test1, test2 test_utils.putOp('test_data', { id: test_id!.toHexString(), description: 'test1' }), From b4e03b7262adf8df661a687cfb5a436a5cb04235 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 22 Jan 2026 15:05:50 +0200 Subject: [PATCH 077/101] Restructure logic to drop tables. --- .../implementation/MongoBucketDataWriter.ts | 110 ++++++++++++------ .../src/replication/ChangeStream.ts | 15 +-- .../src/replication/MongoSnapshotter.ts | 4 +- .../module-mssql/src/replication/CDCStream.ts | 21 +++- .../src/replication/BinLogStream.ts | 30 +++-- .../storage/batch/PostgresBucketDataWriter.ts | 25 ++-- .../src/replication/PostgresSnapshotter.ts | 12 +- .../src/replication/WalStream.ts | 24 ++-- .../test/src/schema_changes.test.ts | 2 +- .../src/test-utils/general-utils.ts | 4 +- .../src/storage/BucketDataWriter.ts | 16 ++- .../src/storage/SyncRulesBucketStorage.ts | 6 + 12 files changed, 179 insertions(+), 90 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts index 4dfd6521b..6457de1f2 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts @@ -19,6 +19,7 @@ import { InternalOpId, isCompleteRow, maxLsn, + ResolveTableToDropsOptions, SaveOperationTag, SourceTable, storage, @@ -296,7 +297,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { return sourceTable; } - async resolveTables(options: storage.ResolveTablesOptions): Promise { + async resolveTables(options: storage.ResolveTablesOptions): Promise { const sources = this.rowProcessor.getMatchingSources(options.pattern); const bucketDataSourceIds = sources.bucketDataSources.map((source) => this.mapping.bucketSourceId(source)); const parameterLookupSourceIds = sources.parameterIndexLookupCreators.map((source) => @@ -312,7 +313,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { type: column.type, type_oid: column.typeId })); - let result: storage.ResolveTablesResult | null = null; + let result: SourceTable[] = []; let currentTableIds: bson.ObjectId[] = []; await this.db.client.withSession(async (session) => { @@ -406,44 +407,81 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { }); // Detect tables that are either renamed, or have different replica_id_columns - let truncateFilter: mongo.Filter[] = [{ schema_name: schema, table_name: name }]; - if (objectId != null) { - // Only detect renames if the source uses relation ids. - truncateFilter.push({ relation_id: objectId }); - } - const truncate = await col - .find( - { - connection_id: connection_id, - _id: { $nin: currentTableIds }, - $or: truncateFilter - }, - { session } - ) - .toArray(); - const dropTables = truncate.map( - (doc) => - new storage.SourceTable({ - id: doc._id, - connectionTag: connection_tag, - objectId: doc.relation_id, - schema: doc.schema_name, - name: doc.table_name, - replicaIdColumns: - doc.replica_id_columns2?.map((c) => ({ name: c.name, typeOid: c.type_oid, type: c.type })) ?? [], - snapshotComplete: doc.snapshot_done ?? true, - pattern: options.pattern - }) - ); - result = { - tables: sourceTables, - dropTables: dropTables - }; + result = sourceTables; }); - return result!; + return result; } + async resolveTablesToDrop(options: ResolveTableToDropsOptions): Promise { + const { connection_id, connection_tag, entity_descriptor } = options; + const { schema, name, objectId, replicaIdColumns } = entity_descriptor; + const normalizedReplicaIdColumns = replicaIdColumns.map((column) => ({ + name: column.name, + type: column.type, + type_oid: column.typeId + })); + const col = this.db.source_tables; + let filter: mongo.Filter = { + connection_id: connection_id, + schema_name: schema, + table_name: name, + replica_id_columns2: normalizedReplicaIdColumns + }; + if (objectId != null) { + filter.relation_id = objectId; + } + + let filters: mongo.Filter[] = []; + // Case 1: name matches, but replica_id_columns2 differs + filters.push({ + connection_id: connection_id, + schema_name: schema, + table_name: name, + replica_id_columns2: { $ne: normalizedReplicaIdColumns } + }); + if (objectId != null) { + // Case 2: relation_id differs + filters.push({ + connection_id: connection_id, + schema_name: schema, + table_name: name, + relation_id: { $ne: objectId } + }); + // Case 3: relation_id matches, but name differs + filters.push({ + $nor: [ + { + connection_id: connection_id, + schema_name: schema, + table_name: name + } + ], + relation_id: objectId + }); + } + + const truncate = await col + .find({ + $or: filters, + connection_id: connection_id + }) + .toArray(); + const dropTables = truncate.map( + (doc) => + new storage.SourceTable({ + id: doc._id, + connectionTag: connection_tag, + objectId: doc.relation_id, + schema: doc.schema_name, + name: doc.table_name, + replicaIdColumns: + doc.replica_id_columns2?.map((c) => ({ name: c.name, typeOid: c.type_oid, type: c.type })) ?? [], + snapshotComplete: doc.snapshot_done ?? true + }) + ); + return dropTables; + } /** * Queues the creation of a custom Write Checkpoint. This will be persisted after operations are flushed. */ diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 3a01e7f4e..40b7db919 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -486,7 +486,7 @@ export class ChangeStream { let allTables: SourceTable[] = []; for (let pattern of patterns) { - const result = await writer.resolveTables({ + const resolvedTables = await writer.resolveTables({ connection_id: this.connection_id, connection_tag: this.connections.connectionTag, entity_descriptor: descriptor, @@ -495,27 +495,18 @@ export class ChangeStream { const snapshot = options.snapshot; - // Drop conflicting collections. - // This is generally not expected for MongoDB source dbs, so we log an error. - if (result.dropTables.length > 0) { - this.logger.error( - `Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}` - ); - await writer.drop(result.dropTables); - } - // Snapshot if: // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) // 2. Snapshot is not already done, AND: // 3. The table is used in sync rules. - for (let table of result.tables) { + for (let table of resolvedTables) { const shouldSnapshot = snapshot && !table.snapshotComplete && table.syncAny; if (shouldSnapshot) { this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); await this.snapshotter.queueSnapshot(writer, table); } } - allTables.push(...result.tables); + allTables.push(...resolvedTables); } this.relationCache.set(getCacheIdentifier(descriptor), allTables); diff --git a/modules/module-mongodb/src/replication/MongoSnapshotter.ts b/modules/module-mongodb/src/replication/MongoSnapshotter.ts index 8ae7fb89b..7469c454f 100644 --- a/modules/module-mongodb/src/replication/MongoSnapshotter.ts +++ b/modules/module-mongodb/src/replication/MongoSnapshotter.ts @@ -286,8 +286,8 @@ export class MongoSnapshotter { entity_descriptor: getMongoRelation({ db: schema, coll: collection.name }), pattern: tablePattern }); - // TODO: dropTables? - result.push(...sourceTables.tables); + + result.push(...sourceTables); } return result; diff --git a/modules/module-mssql/src/replication/CDCStream.ts b/modules/module-mssql/src/replication/CDCStream.ts index 27a44f8fd..e09fb82d7 100644 --- a/modules/module-mssql/src/replication/CDCStream.ts +++ b/modules/module-mssql/src/replication/CDCStream.ts @@ -225,8 +225,22 @@ export class CDCStream { tablePattern ); + // Drop conflicting tables. This includes for example renamed tables. + const dropTables = await writer.resolveTablesToDrop({ + connection_id: this.connectionId, + connection_tag: this.connectionTag, + entity_descriptor: { + name: matchedTable.name, + schema: matchedTable.schema, + objectId: matchedTable.objectId, + replicaIdColumns: replicaIdColumns.columns + } + }); + await writer.drop(dropTables); + tables.push(...tables); } + return tables; } @@ -240,19 +254,16 @@ export class CDCStream { throw new ReplicationAssertionError(`objectId expected, got ${typeof table.objectId}`); } - const resolved = await writer.resolveTables({ + const resolvedTables = await writer.resolveTables({ connection_id: this.connectionId, connection_tag: this.connectionTag, entity_descriptor: table, pattern }); - // Drop conflicting tables. This includes for example renamed tables. - await writer.drop(resolved.dropTables); - let resultingTables: MSSQLSourceTable[] = []; - for (let table of resolved.tables) { + for (let table of resolvedTables) { const captureInstance = await getCaptureInstance({ connectionManager: this.connections, tableName: table.name, diff --git a/modules/module-mysql/src/replication/BinLogStream.ts b/modules/module-mysql/src/replication/BinLogStream.ts index af87921b9..e82a343d2 100644 --- a/modules/module-mysql/src/replication/BinLogStream.ts +++ b/modules/module-mysql/src/replication/BinLogStream.ts @@ -131,7 +131,7 @@ export class BinLogStream { entity: storage.SourceEntityDescriptor, pattern: sync_rules.TablePattern ) { - const result = await writer.resolveTables({ + const resolvedTables = await writer.resolveTables({ connection_id: this.connectionId, connection_tag: this.connectionTag, entity_descriptor: entity, @@ -140,11 +140,16 @@ export class BinLogStream { // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage, // but changes in replication identity columns can, so this is needed. - await writer.drop(result.dropTables); + const dropTables = await writer.resolveTablesToDrop({ + connection_id: this.connectionId, + connection_tag: this.connectionTag, + entity_descriptor: entity + }); + await writer.drop(dropTables); - this.tableCache.set(entity.objectId!, result.tables); + this.tableCache.set(entity.objectId!, resolvedTables); - return result.tables; + return resolvedTables; } async handleChangeRelation(writer: storage.BucketDataWriter, entity: storage.SourceEntityDescriptor) { @@ -160,18 +165,14 @@ export class BinLogStream { let allTables: SourceTable[] = []; for (let pattern of patterns) { - const result = await writer.resolveTables({ + const resolvedTables = await writer.resolveTables({ connection_id: this.connectionId, connection_tag: this.connectionTag, entity_descriptor: entity, pattern }); - // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage, - // but changes in replication identity columns can, so this is needed. - await writer.drop(result.dropTables); - - for (let table of result.tables) { + for (let table of resolvedTables) { // Snapshot if: // 1. Snapshot is not done yet, AND: // 2. The table is used in sync rules. @@ -209,6 +210,15 @@ export class BinLogStream { } } + // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage, + // but changes in replication identity columns can, so this is needed. + const dropTables = await writer.resolveTablesToDrop({ + connection_id: this.connectionId, + connection_tag: this.connectionTag, + entity_descriptor: entity + }); + await writer.drop(dropTables); + // Since we create the objectId ourselves, this is always defined this.tableCache.set(entity.objectId!, allTables); return allTables; diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts b/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts index a6f37cecd..47879161e 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresBucketDataWriter.ts @@ -72,11 +72,8 @@ export class PostgresBucketDataWriter implements storage.BucketDataWriter { } } - async resolveTables(options: storage.ResolveTablesOptions): Promise { - let result: storage.ResolveTablesResult = { - tables: [], - dropTables: [] - }; + async resolveTables(options: storage.ResolveTablesOptions): Promise { + let result: storage.SourceTable[] = []; for (let subWriter of this.subWriters) { const subResult = await subWriter.storage.resolveTable({ connection_id: options.connection_id, @@ -85,8 +82,22 @@ export class PostgresBucketDataWriter implements storage.BucketDataWriter { sync_rules: subWriter.sync_rules, idGenerator: options.idGenerator }); - result.tables.push(subResult.table); - result.dropTables.push(...subResult.dropTables); + result.push(subResult.table); + } + return result; + } + + async resolveTablesToDrop(options: storage.ResolveTableToDropsOptions): Promise { + // FIXME: remove the duplicate work between this and resolveTables() + let result: storage.SourceTable[] = []; + for (let subWriter of this.subWriters) { + const subResult = await subWriter.storage.resolveTable({ + connection_id: options.connection_id, + connection_tag: options.connection_tag, + entity_descriptor: options.entity_descriptor, + sync_rules: subWriter.sync_rules + }); + result.push(...subResult.dropTables); } return result; } diff --git a/modules/module-postgres/src/replication/PostgresSnapshotter.ts b/modules/module-postgres/src/replication/PostgresSnapshotter.ts index 509f979a6..367e1155b 100644 --- a/modules/module-postgres/src/replication/PostgresSnapshotter.ts +++ b/modules/module-postgres/src/replication/PostgresSnapshotter.ts @@ -177,7 +177,7 @@ export class PostgresSnapshotter { const columnTypes = columnTypesResult.rows.map((row) => Number(row.decodeWithoutCustomTypes(0))); - const resolvedResult = await writer.resolveTables({ + const resolveOptions = { connection_id: this.connection_id, connection_tag: this.connections.connectionTag, entity_descriptor: { @@ -187,13 +187,17 @@ export class PostgresSnapshotter { replicaIdColumns: cresult.replicationColumns }, pattern: tablePattern - }); + }; + const resolvedResult = await writer.resolveTables(resolveOptions); // Ensure we have a description for custom types referenced in the table. await this.connections.types.fetchTypes(columnTypes); - // TODO: dropTables? - result.push(...resolvedResult.tables); + result.push(...resolvedResult); + + const dropTables = await writer.resolveTablesToDrop(resolveOptions); + // TODO: Do this in the replication loop, not when listing the tables + await writer.drop(dropTables); } return result; } diff --git a/modules/module-postgres/src/replication/WalStream.ts b/modules/module-postgres/src/replication/WalStream.ts index 61565cefb..05ee68580 100644 --- a/modules/module-postgres/src/replication/WalStream.ts +++ b/modules/module-postgres/src/replication/WalStream.ts @@ -188,19 +188,13 @@ export class WalStream { let allTables: SourceTable[] = []; for (let pattern of patterns) { - const result = await writer.resolveTables({ + const resolvedTables = await writer.resolveTables({ connection_id: this.connection_id, connection_tag: this.connections.connectionTag, entity_descriptor: descriptor, pattern }); - // Drop conflicting tables. This includes for example renamed tables. - if (result.dropTables.length > 0) { - this.logger.info(`Dropping conflicting tables: ${result.dropTables.map((t) => t.qualifiedName).join(', ')}`); - await writer.drop(result.dropTables); - } - // Ensure we have a description for custom types referenced in the table. await this.connections.types.fetchTypes(referencedTypeIds); @@ -208,7 +202,7 @@ export class WalStream { // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere) // 2. Snapshot is not already done, AND: // 3. The table is used in sync rules. - for (let table of result.tables) { + for (let table of resolvedTables) { const shouldSnapshot = snapshot && !table.snapshotComplete && table.syncAny; if (shouldSnapshot) { this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`); @@ -216,8 +210,20 @@ export class WalStream { } } - allTables.push(...result.tables); + allTables.push(...resolvedTables); + } + + const dropTables = await writer.resolveTablesToDrop({ + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: descriptor + }); + // Drop conflicting tables. This includes for example renamed tables. + this.logger.info(`Dropping conflicting tables: ${dropTables.map((t) => t.qualifiedName).join(', ')}`); + if (dropTables.length > 0) { + await writer.drop(dropTables); } + this.relationCache.set(descriptor.objectId, allTables); return allTables; diff --git a/modules/module-postgres/test/src/schema_changes.test.ts b/modules/module-postgres/test/src/schema_changes.test.ts index 7c6671488..451b310f2 100644 --- a/modules/module-postgres/test/src/schema_changes.test.ts +++ b/modules/module-postgres/test/src/schema_changes.test.ts @@ -629,7 +629,7 @@ config: { statement: `UPDATE test_data SET other = ROW(TRUE, 2)::composite;` } ); - const data = await context.getBucketData('1#stream|0[]'); + const data = await context.getBucketData('stream|0[]'); expect(data).toMatchObject([ putOp('test_data', { id: 't1' }), putOp('test_data', { id: 't1', other: '{"foo":1,"bar":2}' }) diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index 26ee99da9..60d4640e1 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -66,11 +66,11 @@ export async function resolveTestTable( return id; } }); - const table = result.tables[0]; + const table = result[0]; if (table == null) { throw new Error(`Failed to resolve test table ${name}`); } - return result.tables[0]; + return result[0]; } export function getBatchData( diff --git a/packages/service-core/src/storage/BucketDataWriter.ts b/packages/service-core/src/storage/BucketDataWriter.ts index a970b047a..0c7479200 100644 --- a/packages/service-core/src/storage/BucketDataWriter.ts +++ b/packages/service-core/src/storage/BucketDataWriter.ts @@ -10,7 +10,12 @@ import { BSON } from 'bson'; import { InternalOpId } from '../util/utils.js'; import { ReplicationEventPayload } from './ReplicationEventPayload.js'; import { SourceTable, TableSnapshotStatus } from './SourceTable.js'; -import { BatchedCustomWriteCheckpointOptions, ResolveTablesOptions, ResolveTablesResult } from './storage-index.js'; +import { + BatchedCustomWriteCheckpointOptions, + ResolveTablesOptions, + ResolveTablesResult, + ResolveTableToDropsOptions +} from './storage-index.js'; export const DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS: ResolvedBucketBatchCommitOptions = { createEmptyCheckpoints: true, @@ -38,8 +43,15 @@ export interface BucketDataWriter extends BucketDataWriterBase, AsyncDisposable /** * Resolve a table, keeping track of it internally. */ - resolveTables(options: ResolveTablesOptions): Promise; + resolveTables(options: ResolveTablesOptions): Promise; getTable(ref: SourceTable): Promise; + + /** + * Given a replicated table, return a list of tables that should be dropped due to conflicts. + * + * This can be due to renames, or replica id changes. + */ + resolveTablesToDrop(options: ResolveTableToDropsOptions): Promise; } export interface BucketDataWriterBase { diff --git a/packages/service-core/src/storage/SyncRulesBucketStorage.ts b/packages/service-core/src/storage/SyncRulesBucketStorage.ts index 616f47827..29e4ee8b5 100644 --- a/packages/service-core/src/storage/SyncRulesBucketStorage.ts +++ b/packages/service-core/src/storage/SyncRulesBucketStorage.ts @@ -163,6 +163,12 @@ export interface ResolveTablesOptions { idGenerator?: () => string | bson.ObjectId; } +export interface ResolveTableToDropsOptions { + connection_id: number; + connection_tag: string; + entity_descriptor: SourceEntityDescriptor; +} + export interface ResolveTableOptions { connection_id: number; connection_tag: string; From e32357dfa189632a56656a1f956015b7fffdabaa Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Thu, 22 Jan 2026 15:45:59 +0200 Subject: [PATCH 078/101] Change drop/snapshot order for test compatibility. --- .../src/replication/BinLogStream.ts | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/modules/module-mysql/src/replication/BinLogStream.ts b/modules/module-mysql/src/replication/BinLogStream.ts index e82a343d2..84df36748 100644 --- a/modules/module-mysql/src/replication/BinLogStream.ts +++ b/modules/module-mysql/src/replication/BinLogStream.ts @@ -163,6 +163,16 @@ export class BinLogStream { name: entity.name }); + // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage, + // but changes in replication identity columns can, so this is needed. + // While order of drop / snapshots shouldn't matter, tests expect drops to happen first. + const dropTables = await writer.resolveTablesToDrop({ + connection_id: this.connectionId, + connection_tag: this.connectionTag, + entity_descriptor: entity + }); + await writer.drop(dropTables); + let allTables: SourceTable[] = []; for (let pattern of patterns) { const resolvedTables = await writer.resolveTables({ @@ -210,15 +220,6 @@ export class BinLogStream { } } - // Drop conflicting tables. In the MySQL case with ObjectIds created from the table name, renames cannot be detected by the storage, - // but changes in replication identity columns can, so this is needed. - const dropTables = await writer.resolveTablesToDrop({ - connection_id: this.connectionId, - connection_tag: this.connectionTag, - entity_descriptor: entity - }); - await writer.drop(dropTables); - // Since we create the objectId ourselves, this is always defined this.tableCache.set(entity.objectId!, allTables); return allTables; From c2cfe8da7cef8978909c43936550a0cc0fc2d4f9 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 2 Feb 2026 12:39:36 +0200 Subject: [PATCH 079/101] Fix more tests. --- .../test/src/large_batch.test.ts | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/modules/module-postgres/test/src/large_batch.test.ts b/modules/module-postgres/test/src/large_batch.test.ts index e22d5d890..306d90152 100644 --- a/modules/module-postgres/test/src/large_batch.test.ts +++ b/modules/module-postgres/test/src/large_batch.test.ts @@ -60,11 +60,6 @@ function defineBatchTests(config: storage.TestStorageConfig) { await using context = await WalStreamTestContext.open(factory); // Manual test to check initial replication performance and memory usage await context.updateSyncRules(BASIC_SYNC_RULES); - const syncRules = await context.factory.getActiveSyncRulesContent(); - if (!syncRules) { - throw new Error('Active sync rules not available'); - } - const request = bucketRequest(syncRules); const { pool } = context; await pool.query(`CREATE TABLE test_data(id text primary key, description text, other text)`); @@ -99,6 +94,12 @@ function defineBatchTests(config: storage.TestStorageConfig) { await context.replicateSnapshot(); + const syncRules = await context.factory.getActiveSyncRulesContent(); + if (!syncRules) { + throw new Error('Active sync rules not available'); + } + const request = bucketRequest(syncRules); + const checkpoint = await context.getCheckpoint({ timeout: 100_000 }); const duration = Date.now() - start; const checksum = await context.storage!.getChecksums(checkpoint, [request]); @@ -115,11 +116,6 @@ function defineBatchTests(config: storage.TestStorageConfig) { await using context = await WalStreamTestContext.open(factory); // This just tests performance of a large number of operations inside a transaction. await context.updateSyncRules(BASIC_SYNC_RULES); - const syncRules = await context.factory.getActiveSyncRulesContent(); - if (!syncRules) { - throw new Error('Active sync rules not available'); - } - const request = bucketRequest(syncRules); const { pool } = context; await pool.query(`CREATE TABLE test_data(id text primary key, description text, other text)`); @@ -152,6 +148,12 @@ function defineBatchTests(config: storage.TestStorageConfig) { operationCount += perTransaction * 2; } + const syncRules = await context.factory.getActiveSyncRulesContent(); + if (!syncRules) { + throw new Error('Active sync rules not available'); + } + const request = bucketRequest(syncRules); + const start = Date.now(); const checkpoint = await context.getCheckpoint({ timeout: 50_000 }); @@ -206,11 +208,6 @@ function defineBatchTests(config: storage.TestStorageConfig) { - SELECT * FROM test_data - SELECT * FROM test_data `); - const syncRules = await context.factory.getActiveSyncRulesContent(); - if (!syncRules) { - throw new Error('Active sync rules not available'); - } - const request = bucketRequest(syncRules); const { pool } = context; await pool.query(`CREATE TABLE test_data(id serial primary key, description text)`); @@ -244,6 +241,11 @@ function defineBatchTests(config: storage.TestStorageConfig) { }); await context.replicateSnapshot(); + const syncRules = await context.factory.getActiveSyncRulesContent(); + if (!syncRules) { + throw new Error('Active sync rules not available'); + } + const request = bucketRequest(syncRules); const checkpoint = await context.getCheckpoint({ timeout: 50_000 }); const checksum = await context.storage!.getChecksums(checkpoint, [request]); expect(checksum.get(request.bucket)!.count).toEqual((numDocs + 2) * 4); From a1aebc2366c310d64e3319dee8882aa715fa72b9 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 2 Feb 2026 13:01:38 +0200 Subject: [PATCH 080/101] Remove duplicate processing issue. --- .../src/storage/implementation/MergedSyncRules.ts | 7 +++++++ modules/module-postgres/test/src/large_batch.test.ts | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts index 340c2557d..0413b57b2 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts @@ -123,6 +123,13 @@ export class MergedSyncRules implements RowProcessor { } } } + + for (let value of this.tableDataSources.values()) { + // Make the arrays unique / remove duplicates: + value.bucketDataSources = Array.from(new Set(value.bucketDataSources)); + value.parameterIndexLookupCreators = Array.from(new Set(value.parameterIndexLookupCreators)); + } + this.resolvedDataSources = resolvedDataSources; this.resolvedParameterLookupSources = resolvedParameterLookupSources; this.sourcePatterns = Array.from(sourcePatternMap.values()); diff --git a/modules/module-postgres/test/src/large_batch.test.ts b/modules/module-postgres/test/src/large_batch.test.ts index 306d90152..476ac4db1 100644 --- a/modules/module-postgres/test/src/large_batch.test.ts +++ b/modules/module-postgres/test/src/large_batch.test.ts @@ -241,12 +241,12 @@ function defineBatchTests(config: storage.TestStorageConfig) { }); await context.replicateSnapshot(); + const checkpoint = await context.getCheckpoint({ timeout: 50_000 }); const syncRules = await context.factory.getActiveSyncRulesContent(); if (!syncRules) { throw new Error('Active sync rules not available'); } const request = bucketRequest(syncRules); - const checkpoint = await context.getCheckpoint({ timeout: 50_000 }); const checksum = await context.storage!.getChecksums(checkpoint, [request]); expect(checksum.get(request.bucket)!.count).toEqual((numDocs + 2) * 4); }); From a759c74d1638d1077950a862b710e6bd8fd60ba0 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 2 Feb 2026 13:10:02 +0200 Subject: [PATCH 081/101] Another test fix. --- modules/module-postgres/test/src/large_batch.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/module-postgres/test/src/large_batch.test.ts b/modules/module-postgres/test/src/large_batch.test.ts index 476ac4db1..d67f88784 100644 --- a/modules/module-postgres/test/src/large_batch.test.ts +++ b/modules/module-postgres/test/src/large_batch.test.ts @@ -94,13 +94,13 @@ function defineBatchTests(config: storage.TestStorageConfig) { await context.replicateSnapshot(); + const checkpoint = await context.getCheckpoint({ timeout: 100_000 }); + const syncRules = await context.factory.getActiveSyncRulesContent(); if (!syncRules) { throw new Error('Active sync rules not available'); } const request = bucketRequest(syncRules); - - const checkpoint = await context.getCheckpoint({ timeout: 100_000 }); const duration = Date.now() - start; const checksum = await context.storage!.getChecksums(checkpoint, [request]); expect(checksum.get(request.bucket)!.count).toEqual(operation_count); From 52ae8618d98786e025cd26a367ab0fb489faa389 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 2 Feb 2026 14:56:50 +0200 Subject: [PATCH 082/101] Rewrite parameter tests to be independent of storage format. --- .../register-data-storage-parameter-tests.ts | 141 +++++++++--------- 1 file changed, 74 insertions(+), 67 deletions(-) diff --git a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts index 50528a676..82c025042 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts @@ -1,6 +1,5 @@ import { storage } from '@powersync/service-core'; -import { RequestParameters, ScopedParameterLookup } from '@powersync/service-sync-rules'; -import { ParameterLookupScope } from '@powersync/service-sync-rules/src/HydrationState.js'; +import { RequestParameters } from '@powersync/service-sync-rules'; import { expect, test } from 'vitest'; import * as test_utils from '../test-utils/test-utils-index.js'; @@ -16,7 +15,6 @@ import * as test_utils from '../test-utils/test-utils-index.js'; */ export function registerDataStorageParameterTests(config: storage.TestStorageConfig) { const generateStorageFactory = config.factory; - const MYBUCKET_1: ParameterLookupScope = { lookupName: '20002', queryId: '', source: null as any }; test('save and load parameters', async () => { await using factory = await generateStorageFactory(); @@ -32,13 +30,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); - - const MYBUCKET_1: ParameterLookupScope = { lookupName: '20002', queryId: '', source: null as any }; - // We could get the scope automatically like this: - // const parsed = syncRules.parsed(test_utils.PARSE_OPTIONS); - // const hydrated = parsed.hydratedSyncRules(); - // const parameterSource = hydrated.definition.bucketParameterLookupSources[0]; - // const parameterLookupScope = parsed.hydrationState.getParameterIndexLookupScope(parameterSource); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); await writer.markAllSnapshotDone('1/1'); @@ -69,12 +61,11 @@ bucket_definitions: await writer.commit('1/1'); const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = await checkpoint.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, ['user1'])]); - expect(parameters).toEqual([ - { - group_id: 'group1a' - } - ]); + + const parameters = new RequestParameters({ sub: 'user1' }, {}); + const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; + const parameter_sets = await checkpoint.getParameterSets(querier.parameterQueryLookups); + expect(parameter_sets).toEqual([{ group_id: 'group1a' }]); }); test('it should use the latest version', async () => { @@ -89,6 +80,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); @@ -116,7 +108,11 @@ bucket_definitions: await writer.commit('1/2'); const checkpoint2 = await bucketStorage.getCheckpoint(); - const parameters = await checkpoint2.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, ['user1'])]); + const querier = hydrated.getBucketParameterQuerier( + test_utils.querierOptions(new RequestParameters({ sub: 'user1' }, {})) + ).querier; + + const parameters = await checkpoint2.getParameterSets(querier.parameterQueryLookups); expect(parameters).toEqual([ { group_id: 'group2' @@ -124,7 +120,7 @@ bucket_definitions: ]); // Use the checkpoint to get older data if relevant - const parameters2 = await checkpoint1.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, ['user1'])]); + const parameters2 = await checkpoint1.getParameterSets(querier.parameterQueryLookups); expect(parameters2).toEqual([ { group_id: 'group1' @@ -148,6 +144,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); const table = await test_utils.resolveTestTable(writer, 'todos', ['id', 'list_id'], config); await writer.markAllSnapshotDone('1/1'); @@ -189,11 +186,13 @@ bucket_definitions: // We specifically request the todo_ids for both lists. // There removal operation for the association of `list2`::`todo2` should not interfere with the new // association of `list1`::`todo2` + const querier = hydrated.getBucketParameterQuerier( + test_utils.querierOptions( + new RequestParameters({ sub: 'user1', parameters: { list_id: ['list1', 'list2'] } }, {}) + ) + ).querier; const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = await checkpoint.getParameterSets([ - ScopedParameterLookup.direct(MYBUCKET_1, ['list1']), - ScopedParameterLookup.direct(MYBUCKET_1, ['list2']) - ]); + const parameters = await checkpoint.getParameterSets(querier.parameterQueryLookups); expect(parameters.sort((a, b) => (a.todo_id as string).localeCompare(b.todo_id as string))).toEqual([ { @@ -218,6 +217,7 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(syncRules); await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); await writer.markAllSnapshotDone('1/1'); @@ -240,15 +240,26 @@ bucket_definitions: const checkpoint = await bucketStorage.getCheckpoint(); - const parameters1 = await checkpoint.getParameterSets([ - ScopedParameterLookup.direct(MYBUCKET_1, [314n, 314, 3.14]) - ]); + const querier1 = hydrated.getBucketParameterQuerier( + test_utils.querierOptions( + new RequestParameters({ sub: 'user1', parameters: { n1: 314n, f2: 314, f3: 3.14 } }, {}) + ) + ).querier; + const parameters1 = await checkpoint.getParameterSets(querier1.parameterQueryLookups); expect(parameters1).toEqual([TEST_PARAMS]); - const parameters2 = await checkpoint.getParameterSets([ - ScopedParameterLookup.direct(MYBUCKET_1, [314, 314n, 3.14]) - ]); + + const querier2 = hydrated.getBucketParameterQuerier( + test_utils.querierOptions( + new RequestParameters({ sub: 'user1', parameters: { n1: 314, f2: 314n, f3: 3.14 } }, {}) + ) + ).querier; + const parameters2 = await checkpoint.getParameterSets(querier2.parameterQueryLookups); expect(parameters2).toEqual([TEST_PARAMS]); - const parameters3 = await checkpoint.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, [314n, 314, 3])]); + + const querier3 = hydrated.getBucketParameterQuerier( + test_utils.querierOptions(new RequestParameters({ sub: 'user1', parameters: { n1: 314n, f2: 314, f3: 3 } }, {})) + ).querier; + const parameters3 = await checkpoint.getParameterSets(querier3.parameterQueryLookups); expect(parameters3).toEqual([]); }); @@ -268,6 +279,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); @@ -301,10 +313,10 @@ bucket_definitions: const TEST_PARAMS = { group_id: 'group1' }; const checkpoint = await bucketStorage.getCheckpoint(); - - const parameters1 = await checkpoint.getParameterSets([ - ScopedParameterLookup.direct(MYBUCKET_1, [1152921504606846976n]) - ]); + const querier = hydrated.getBucketParameterQuerier( + test_utils.querierOptions(new RequestParameters({ sub: 'user1', parameters: { n1: 1152921504606846976n } }, {})) + ).querier; + const parameters1 = await checkpoint.getParameterSets(querier.parameterQueryLookups); expect(parameters1).toEqual([TEST_PARAMS]); }); @@ -320,8 +332,8 @@ bucket_definitions: data: [] ` }); - const sync_rules = syncRules.parsed(test_utils.PARSE_OPTIONS).hydratedSyncRules(); const bucketStorage = factory.getInstance(syncRules); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const workspaceTable = await test_utils.resolveTestTable(writer, 'workspace', ['id'], config); @@ -340,12 +352,9 @@ bucket_definitions: const parameters = new RequestParameters({ sub: 'u1' }, {}); - const querier = sync_rules.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; + const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; const lookups = querier.parameterQueryLookups; - expect(lookups).toEqual([ - ScopedParameterLookup.direct({ lookupName: '20002', queryId: '', source: null as any }, ['u1']) - ]); const parameter_sets = await checkpoint.getParameterSets(lookups); expect(parameter_sets).toEqual([{ workspace_id: 'workspace1' }]); @@ -355,9 +364,14 @@ bucket_definitions: return checkpoint.getParameterSets(lookups); } }); - expect(buckets.map(test_utils.removeSourceSymbol)).toEqual([ - { bucket: '10002["workspace1"]', priority: 3, definition: 'by_workspace', inclusion_reasons: ['default'] } - ]); + const cleanedBuckets = buckets.map(test_utils.removeSourceSymbol); + expect(cleanedBuckets).toHaveLength(1); + expect(cleanedBuckets[0]).toMatchObject({ + priority: 3, + definition: 'by_workspace', + inclusion_reasons: ['default'] + }); + expect(cleanedBuckets[0].bucket.endsWith('["workspace1"]')).toBe(true); }); test('save and load parameters with dynamic global buckets', async () => { @@ -372,8 +386,8 @@ bucket_definitions: data: [] ` }); - const sync_rules = syncRules.parsed(test_utils.PARSE_OPTIONS).hydratedSyncRules(); const bucketStorage = factory.getInstance(syncRules); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const workspaceTable = await test_utils.resolveTestTable(writer, 'workspace', undefined, config); @@ -414,12 +428,9 @@ bucket_definitions: const parameters = new RequestParameters({ sub: 'unknown' }, {}); - const querier = sync_rules.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; + const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; const lookups = querier.parameterQueryLookups; - expect(lookups).toEqual([ - ScopedParameterLookup.direct({ lookupName: '20002', queryId: '', source: null as any }, []) - ]); const parameter_sets = await checkpoint.getParameterSets(lookups); parameter_sets.sort((a, b) => JSON.stringify(a).localeCompare(JSON.stringify(b))); @@ -430,21 +441,17 @@ bucket_definitions: return checkpoint.getParameterSets(lookups); } }); - buckets.sort((a, b) => a.bucket.localeCompare(b.bucket)); - expect(buckets.map(test_utils.removeSourceSymbol)).toEqual([ - { - bucket: '10002["workspace1"]', - priority: 3, - definition: 'by_public_workspace', - inclusion_reasons: ['default'] - }, - { - bucket: '10002["workspace3"]', + const cleanedBuckets = buckets.map(test_utils.removeSourceSymbol); + expect(cleanedBuckets).toHaveLength(2); + for (const bucket of cleanedBuckets) { + expect(bucket).toMatchObject({ priority: 3, definition: 'by_public_workspace', inclusion_reasons: ['default'] - } - ]); + }); + } + const bucketSuffixes = cleanedBuckets.map((bucket) => bucket.bucket.slice(bucket.bucket.indexOf('['))).sort(); + expect(bucketSuffixes).toEqual(['["workspace1"]', '["workspace3"]']); }); test('multiple parameter queries', async () => { @@ -461,8 +468,8 @@ bucket_definitions: data: [] ` }); - const sync_rules = syncRules.parsed(test_utils.PARSE_OPTIONS).hydratedSyncRules(); const bucketStorage = factory.getInstance(syncRules); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const workspaceTable = await test_utils.resolveTestTable(writer, 'workspace', undefined, config); @@ -516,13 +523,9 @@ bucket_definitions: const parameters = new RequestParameters({ sub: 'u1' }, {}); // Test intermediate values - could be moved to sync_rules.test.ts - const querier = sync_rules.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; + const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; const lookups = querier.parameterQueryLookups; - expect(lookups).toEqual([ - ScopedParameterLookup.direct({ lookupName: '20003', queryId: '', source: null as any }, []), - ScopedParameterLookup.direct({ lookupName: '20004', queryId: '', source: null as any }, ['u1']) - ]); const parameter_sets = await checkpoint.getParameterSets(lookups); parameter_sets.sort((a, b) => JSON.stringify(a).localeCompare(JSON.stringify(b))); @@ -536,8 +539,8 @@ bucket_definitions: } }) ).map((e) => e.bucket); - buckets.sort(); - expect(buckets).toEqual(['10003["workspace1"]', '10003["workspace3"]']); + const bucketSuffixes = buckets.map((bucket) => bucket.slice(bucket.indexOf('['))).sort(); + expect(bucketSuffixes).toEqual(['["workspace1"]', '["workspace3"]']); }); test('truncate parameters', async () => { @@ -552,6 +555,7 @@ bucket_definitions: ` }); const bucketStorage = factory.getInstance(syncRules); + const hydrated = bucketStorage.getHydratedSyncRules(test_utils.PARSE_OPTIONS); await using writer = await bucketStorage.createWriter(test_utils.BATCH_OPTIONS); const testTable = await test_utils.resolveTestTable(writer, 'test', ['id'], config); @@ -573,7 +577,10 @@ bucket_definitions: const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = await checkpoint.getParameterSets([ScopedParameterLookup.direct(MYBUCKET_1, ['user1'])]); + const querier = hydrated.getBucketParameterQuerier( + test_utils.querierOptions(new RequestParameters({ sub: 'user1' }, {})) + ).querier; + const parameters = await checkpoint.getParameterSets(querier.parameterQueryLookups); expect(parameters).toEqual([]); }); From d169562413a33e4d927b6ceb6aea19431cb1402e Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 2 Feb 2026 18:13:05 +0200 Subject: [PATCH 083/101] Fix initial compile issues post merge. --- .../register-data-storage-parameter-tests.ts | 22 +++++++++---------- packages/sync-rules/src/TablePattern.ts | 4 ---- .../src/compiler/ir_to_sync_plan.ts | 11 +++++++--- .../sync_plan/evaluator/bucket_data_source.ts | 6 ++--- .../src/sync_plan/evaluator/bucket_source.ts | 4 ++-- .../parameter_index_lookup_creator.ts | 6 ++--- packages/sync-rules/src/sync_plan/plan.ts | 1 + .../src/sync_plan/evaluator/evaluator.test.ts | 8 ++++--- 8 files changed, 31 insertions(+), 31 deletions(-) diff --git a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts index 5f73695d8..89bfdf23e 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts @@ -1,7 +1,5 @@ import { storage } from '@powersync/service-core'; -import { RequestParameters } from '@powersync/service-sync-rules'; import { RequestParameters, ScopedParameterLookup, SqliteJsonRow } from '@powersync/service-sync-rules'; -import { ParameterLookupScope } from '@powersync/service-sync-rules/src/HydrationState.js'; import { expect, test } from 'vitest'; import * as test_utils from '../test-utils/test-utils-index.js'; @@ -66,7 +64,7 @@ bucket_definitions: const parameters = new RequestParameters({ sub: 'user1' }, {}); const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; - const parameter_sets = await checkpoint.getParameterSets(querier.parameterQueryLookups); + const parameter_sets = await querier.queryDynamicBucketDescriptions(checkpoint); expect(parameter_sets).toEqual([{ group_id: 'group1a' }]); }); @@ -114,7 +112,7 @@ bucket_definitions: test_utils.querierOptions(new RequestParameters({ sub: 'user1' }, {})) ).querier; - const parameters = await checkpoint2.getParameterSets(querier.parameterQueryLookups); + const parameters = await querier.queryDynamicBucketDescriptions(checkpoint2); expect(parameters).toEqual([ { group_id: 'group2' @@ -122,7 +120,7 @@ bucket_definitions: ]); // Use the checkpoint to get older data if relevant - const parameters2 = await checkpoint1.getParameterSets(querier.parameterQueryLookups); + const parameters2 = await querier.queryDynamicBucketDescriptions(checkpoint1); expect(parameters2).toEqual([ { group_id: 'group1' @@ -194,9 +192,9 @@ bucket_definitions: ) ).querier; const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = await checkpoint.getParameterSets(querier.parameterQueryLookups); + const buckets = await querier.queryDynamicBucketDescriptions(checkpoint); - expect(parameters.sort((a, b) => (a.todo_id as string).localeCompare(b.todo_id as string))).toEqual([ + expect(buckets.sort((a, b) => a.bucket.localeCompare(b.bucket))).toEqual([ { todo_id: 'todo1' }, @@ -247,7 +245,7 @@ bucket_definitions: new RequestParameters({ sub: 'user1', parameters: { n1: 314n, f2: 314, f3: 3.14 } }, {}) ) ).querier; - const parameters1 = await checkpoint.getParameterSets(querier1.parameterQueryLookups); + const parameters1 = await querier1.queryDynamicBucketDescriptions(checkpoint); expect(parameters1).toEqual([TEST_PARAMS]); const querier2 = hydrated.getBucketParameterQuerier( @@ -255,13 +253,13 @@ bucket_definitions: new RequestParameters({ sub: 'user1', parameters: { n1: 314, f2: 314n, f3: 3.14 } }, {}) ) ).querier; - const parameters2 = await checkpoint.getParameterSets(querier2.parameterQueryLookups); + const parameters2 = await querier2.queryDynamicBucketDescriptions(checkpoint); expect(parameters2).toEqual([TEST_PARAMS]); const querier3 = hydrated.getBucketParameterQuerier( test_utils.querierOptions(new RequestParameters({ sub: 'user1', parameters: { n1: 314n, f2: 314, f3: 3 } }, {})) ).querier; - const parameters3 = await checkpoint.getParameterSets(querier3.parameterQueryLookups); + const parameters3 = await querier3.queryDynamicBucketDescriptions(checkpoint); expect(parameters3).toEqual([]); }); @@ -318,7 +316,7 @@ bucket_definitions: const querier = hydrated.getBucketParameterQuerier( test_utils.querierOptions(new RequestParameters({ sub: 'user1', parameters: { n1: 1152921504606846976n } }, {})) ).querier; - const parameters1 = await checkpoint.getParameterSets(querier.parameterQueryLookups); + const parameters1 = await querier.queryDynamicBucketDescriptions(checkpoint); expect(parameters1).toEqual([TEST_PARAMS]); }); @@ -591,7 +589,7 @@ bucket_definitions: const querier = hydrated.getBucketParameterQuerier( test_utils.querierOptions(new RequestParameters({ sub: 'user1' }, {})) ).querier; - const parameters = await checkpoint.getParameterSets(querier.parameterQueryLookups); + const parameters = await querier.queryDynamicBucketDescriptions(checkpoint); expect(parameters).toEqual([]); }); diff --git a/packages/sync-rules/src/TablePattern.ts b/packages/sync-rules/src/TablePattern.ts index 69348f5b3..2d9d15a8c 100644 --- a/packages/sync-rules/src/TablePattern.ts +++ b/packages/sync-rules/src/TablePattern.ts @@ -34,10 +34,6 @@ export class TablePattern implements Equatable { return JSON.stringify([this.connectionTag, this.schema, this.tablePattern]); } - equals(other: TablePattern): boolean { - return this.key == other.key; - } - get isWildcard() { return this.tablePattern.endsWith('%'); } diff --git a/packages/sync-rules/src/compiler/ir_to_sync_plan.ts b/packages/sync-rules/src/compiler/ir_to_sync_plan.ts index 407188853..1b1de965e 100644 --- a/packages/sync-rules/src/compiler/ir_to_sync_plan.ts +++ b/packages/sync-rules/src/compiler/ir_to_sync_plan.ts @@ -107,19 +107,24 @@ export class CompilerModelToSyncPlan { return this.translateStatefulObject(value, () => { const hasher = new StableHasher(); value.buildBehaviorHashCode(hasher); - return { + const indexLookupCreator: plan.StreamParameterIndexLookupCreator = { sourceTable: value.tablePattern, defaultLookupScope: { // This just needs to be unique, and isn't visible to users (unlike bucket names). We might want to use a // more stable naming scheme in the future. lookupName: 'lookup', - queryId: index.toString() + queryId: index.toString(), + get source() { + // FIXME: the types don't match at the moment + return indexLookupCreator as any; + } }, hashCode: hasher.buildHashCode(), outputs: value.result.map((e) => this.translateExpression(e.expression)), filters: value.filters.map((e) => this.translateExpression(e.expression)), parameters: value.partitionBy.map((e) => this.translatePartitionKey(e)) - } satisfies plan.StreamParameterIndexLookupCreator; + }; + return indexLookupCreator; }); } diff --git a/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts b/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts index e2fdff51b..47529ed2e 100644 --- a/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts +++ b/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts @@ -17,7 +17,7 @@ import { StreamEvaluationContext } from './index.js'; import { mapExternalDataToInstantiation, ScalarExpressionEvaluator } from '../engine/scalar_expression_engine.js'; export class PreparedStreamBucketDataSource implements BucketDataSource { - private readonly sourceTables = new Set(); + private readonly sourceTables: TablePattern[] = []; private readonly sources: PreparedStreamDataSource[] = []; constructor( @@ -28,7 +28,7 @@ export class PreparedStreamBucketDataSource implements BucketDataSource { const prepared = new PreparedStreamDataSource(data, context); this.sources.push(prepared); - this.sourceTables.add(prepared.tablePattern); + this.sourceTables.push(prepared.tablePattern); } } @@ -44,7 +44,7 @@ export class PreparedStreamBucketDataSource implements BucketDataSource { return evaluator.parameters.map((p) => ExpressionToSqlite.toSqlite(p.expr)); } - getSourceTables(): Set { + getSourceTables(): TablePattern[] { return this.sourceTables; } diff --git a/packages/sync-rules/src/sync_plan/evaluator/bucket_source.ts b/packages/sync-rules/src/sync_plan/evaluator/bucket_source.ts index c801249c5..1754eca79 100644 --- a/packages/sync-rules/src/sync_plan/evaluator/bucket_source.ts +++ b/packages/sync-rules/src/sync_plan/evaluator/bucket_source.ts @@ -15,7 +15,7 @@ import { parametersForRequest, RequestParameterEvaluators } from './parameter_ev import { PendingQueriers } from '../../BucketParameterQuerier.js'; import { RequestedStream } from '../../SqlSyncRules.js'; import { BucketInclusionReason, ResolvedBucket } from '../../BucketDescription.js'; -import { buildBucketName, JSONBucketNameSerialize } from '../../utils.js'; +import { buildBucketInfo, JSONBucketNameSerialize, SOURCE } from '../../utils.js'; export interface StreamInput extends StreamEvaluationContext { preparedBuckets: Map; @@ -131,9 +131,9 @@ class PreparedQuerier { const parametersToBucket = (instantiation: SqliteParameterValue[]): ResolvedBucket => { return { + ...buildBucketInfo(bucketScope, JSONBucketNameSerialize.stringify(instantiation)), definition: this.stream.name, inclusion_reasons: [reason], - bucket: buildBucketName(bucketScope, JSONBucketNameSerialize.stringify(instantiation)), priority: this.stream.priority }; }; diff --git a/packages/sync-rules/src/sync_plan/evaluator/parameter_index_lookup_creator.ts b/packages/sync-rules/src/sync_plan/evaluator/parameter_index_lookup_creator.ts index a0a829ef5..57d27a4d9 100644 --- a/packages/sync-rules/src/sync_plan/evaluator/parameter_index_lookup_creator.ts +++ b/packages/sync-rules/src/sync_plan/evaluator/parameter_index_lookup_creator.ts @@ -37,10 +37,8 @@ export class PreparedParameterIndexLookupCreator implements ParameterIndexLookup this.evaluatorInputs = mapExpressions.instantiation; } - getSourceTables(): Set { - const set = new Set(); - set.add(this.source.sourceTable); - return set; + getSourceTables(): TablePattern[] { + return [this.source.sourceTable]; } evaluateParameterRow(sourceTable: SourceTableInterface, row: SqliteRow): UnscopedEvaluatedParametersResult[] { diff --git a/packages/sync-rules/src/sync_plan/plan.ts b/packages/sync-rules/src/sync_plan/plan.ts index cb75e4eed..7117566a3 100644 --- a/packages/sync-rules/src/sync_plan/plan.ts +++ b/packages/sync-rules/src/sync_plan/plan.ts @@ -1,4 +1,5 @@ import { BucketPriority } from '../BucketDescription.js'; +import { ParameterIndexLookupCreator } from '../BucketSource.js'; import { ParameterLookupScope } from '../HydrationState.js'; import { TablePattern } from '../TablePattern.js'; import { UnscopedEvaluatedParameters } from '../types.js'; diff --git a/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts b/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts index 678764575..4da03508a 100644 --- a/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts +++ b/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts @@ -144,7 +144,7 @@ describe('evaluating parameters', () => { expect(desc.evaluateParameterRow(ISSUES, { id: 'issue_id', owner_id: 'user1', name: 'name' })).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0' }, ['user1']), + lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: null as any }, ['user1']), bucketParameters: [ { '0': 'issue_id' @@ -240,7 +240,8 @@ describe('querier', () => { ScopedParameterLookup.direct( { lookupName: 'lookup', - queryId: '0' + queryId: '0', + source: null as any }, ['user'] ) @@ -253,7 +254,8 @@ describe('querier', () => { ScopedParameterLookup.direct( { lookupName: 'lookup', - queryId: '1' + queryId: '1', + source: null as any }, ['name'] ) From ec975c87c258d71bcbf2d44b98c0d08e31aecb73 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 2 Feb 2026 18:20:43 +0200 Subject: [PATCH 084/101] Fix circular import. --- packages/sync-rules/src/BaseSqlDataQuery.ts | 3 ++- packages/sync-rules/src/index.ts | 1 + .../sync_plan/evaluator/bucket_data_source.ts | 3 ++- packages/sync-rules/src/sync_plan/plan.ts | 1 - packages/sync-rules/src/utils.ts | 18 ------------------ 5 files changed, 5 insertions(+), 21 deletions(-) diff --git a/packages/sync-rules/src/BaseSqlDataQuery.ts b/packages/sync-rules/src/BaseSqlDataQuery.ts index 9d86a157d..97d92a8a7 100644 --- a/packages/sync-rules/src/BaseSqlDataQuery.ts +++ b/packages/sync-rules/src/BaseSqlDataQuery.ts @@ -15,7 +15,8 @@ import { SqliteJsonRow, SqliteRow } from './types.js'; -import { filterJsonRow, idFromData } from './utils.js'; +import { filterJsonRow } from './utils.js'; +import { idFromData } from './utils2.js'; export interface RowValueExtractor { extract(tables: QueryParameters, into: SqliteRow): void; diff --git a/packages/sync-rules/src/index.ts b/packages/sync-rules/src/index.ts index ce2694484..2b3fe614d 100644 --- a/packages/sync-rules/src/index.ts +++ b/packages/sync-rules/src/index.ts @@ -27,6 +27,7 @@ export * from './types.js'; export * from './types/custom_sqlite_value.js'; export * from './types/time.js'; export * from './utils.js'; +export * from './utils2.js'; export { versionedHydrationState } from './HydrationState.js'; export * from './HydratedSyncRules.js'; export * from './HydrationState.js'; diff --git a/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts b/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts index 47529ed2e..0ff152eca 100644 --- a/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts +++ b/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts @@ -9,12 +9,13 @@ import { UnscopedEvaluatedRow, UnscopedEvaluationResult } from '../../types.js'; -import { filterJsonRow, idFromData, isJsonValue, isValidParameterValue, JSONBucketNameSerialize } from '../../utils.js'; +import { filterJsonRow, isJsonValue, isValidParameterValue, JSONBucketNameSerialize } from '../../utils.js'; import { SqlExpression } from '../expression.js'; import { ExpressionToSqlite } from '../expression_to_sql.js'; import * as plan from '../plan.js'; import { StreamEvaluationContext } from './index.js'; import { mapExternalDataToInstantiation, ScalarExpressionEvaluator } from '../engine/scalar_expression_engine.js'; +import { idFromData } from '../../utils2.js'; export class PreparedStreamBucketDataSource implements BucketDataSource { private readonly sourceTables: TablePattern[] = []; diff --git a/packages/sync-rules/src/sync_plan/plan.ts b/packages/sync-rules/src/sync_plan/plan.ts index 7117566a3..cb75e4eed 100644 --- a/packages/sync-rules/src/sync_plan/plan.ts +++ b/packages/sync-rules/src/sync_plan/plan.ts @@ -1,5 +1,4 @@ import { BucketPriority } from '../BucketDescription.js'; -import { ParameterIndexLookupCreator } from '../BucketSource.js'; import { ParameterLookupScope } from '../HydrationState.js'; import { TablePattern } from '../TablePattern.js'; import { UnscopedEvaluatedParameters } from '../types.js'; diff --git a/packages/sync-rules/src/utils.ts b/packages/sync-rules/src/utils.ts index 114b2f332..e728645af 100644 --- a/packages/sync-rules/src/utils.ts +++ b/packages/sync-rules/src/utils.ts @@ -4,7 +4,6 @@ import { BucketDataSource } from './BucketSource.js'; import { CompatibilityContext } from './compatibility.js'; import { SyncRuleProcessingError as SyncRulesProcessingError } from './errors.js'; import { BucketDataScope } from './HydrationState.js'; -import { castAsText } from './sql_functions.js'; import { SQLITE_FALSE, SQLITE_TRUE } from './sql_support.js'; import { DatabaseInputRow, @@ -255,20 +254,3 @@ export function normalizeParameterValue(value: SqliteJsonValue): SqliteJsonValue } return value; } - -/** - * Extracts and normalizes the ID column from a row. - */ -export function idFromData(data: SqliteJsonRow): string { - let id = data.id; - if (typeof id != 'string') { - // While an explicit cast would be better, this covers against very common - // issues when initially testing out sync, for example when the id column is an - // auto-incrementing integer. - // If there is no id column, we use a blank id. This will result in the user syncing - // a single arbitrary row for this table - better than just not being able to sync - // anything. - id = castAsText(id) ?? ''; - } - return id; -} From b236426fa5bcb914831f1578cee7da5e4917a60d Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Mon, 2 Feb 2026 18:22:35 +0200 Subject: [PATCH 085/101] Fix some tests. --- .../src/sync_plan/evaluator/evaluator.test.ts | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts b/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts index 4da03508a..84a4c5715 100644 --- a/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts +++ b/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts @@ -27,9 +27,9 @@ describe('evaluating rows', () => { _blob: new Uint8Array(10) // non-JSON columns should be removed } }) - ).toStrictEqual([ + ).toMatchObject([ { - bucket: 'stream|0[]', + bucket: '1#stream|0[]', id: 'foo', data: { id: 'foo', _double: 1, _int: 1n, _null: null, _text: 'text' }, table: 'users' @@ -51,10 +51,10 @@ describe('evaluating rows', () => { return rows[0].bucket; } - expect(evaluate(1)).toStrictEqual('stream|0[1]'); - expect(evaluate(1n)).toStrictEqual('stream|0[1]'); - expect(evaluate(1.1)).toStrictEqual('stream|0[1.1]'); - expect(evaluate('1')).toStrictEqual('stream|0["1"]'); + expect(evaluate(1)).toStrictEqual('1#stream|0[1]'); + expect(evaluate(1n)).toStrictEqual('1#stream|0[1]'); + expect(evaluate(1.1)).toStrictEqual('1#stream|0[1.1]'); + expect(evaluate('1')).toStrictEqual('1#stream|0["1"]'); // null is not equal to itself, so WHERE null = subscription.paraeter('p') should not match any rows. expect(evaluate(null)).toStrictEqual(undefined); @@ -72,9 +72,9 @@ describe('evaluating rows', () => { id: 'foo' } }) - ).toStrictEqual([ + ).toMatchObject([ { - bucket: 'stream|0[]', + bucket: '1#stream|0[]', id: 'foo', data: { id: 'foo' }, table: 'u' @@ -91,9 +91,9 @@ describe('evaluating rows', () => { id: 'foo' } }) - ).toStrictEqual([ + ).toMatchObject([ { - bucket: 'stream|0[]', + bucket: '1#stream|0[]', id: 'foo', data: { id: 'foo' }, table: 'output' @@ -110,9 +110,9 @@ describe('evaluating rows', () => { id: 'foo' } }) - ).toStrictEqual([ + ).toMatchObject([ { - bucket: 'stream|0[]', + bucket: '1#stream|0[]', id: 'foo', data: { id: 'foo' }, table: 'users' @@ -124,8 +124,8 @@ describe('evaluating rows', () => { const desc = sync.prepareSyncStreams([ { name: 'stream', queries: ['SELECT * FROM users', 'SELECT * FROM comments'] } ]); - expect(evaluateBucketIds(desc, USERS, { id: 'foo' })).toStrictEqual(['stream|0[]']); - expect(evaluateBucketIds(desc, COMMENTS, { id: 'foo2' })).toStrictEqual(['stream|0[]']); + expect(evaluateBucketIds(desc, USERS, { id: 'foo' })).toStrictEqual(['1#stream|0[]']); + expect(evaluateBucketIds(desc, COMMENTS, { id: 'foo2' })).toStrictEqual(['1#stream|0[]']); }); }); @@ -187,7 +187,7 @@ describe('querier', () => { streams: {} }); - expect(querier.staticBuckets.map((e) => e.bucket)).toStrictEqual(['stream|0[]']); + expect(querier.staticBuckets.map((e) => e.bucket)).toStrictEqual(['1#stream|0[]']); }); syncTest('request data', ({ sync }) => { @@ -205,7 +205,7 @@ describe('querier', () => { }); expect(errors).toStrictEqual([]); - expect(querier.staticBuckets.map((e) => e.bucket)).toStrictEqual(['stream|0["user"]']); + expect(querier.staticBuckets.map((e) => e.bucket)).toStrictEqual(['1#stream|0["user"]']); }); syncTest('parameter lookups', async ({ sync }) => { @@ -266,7 +266,7 @@ describe('querier', () => { throw new Error('Function not implemented.'); } }); - expect(buckets.map((b) => b.bucket)).toStrictEqual(['stream|0["issue"]']); + expect(buckets.map((b) => b.bucket)).toStrictEqual(['1#stream|0["issue"]']); }); }); From 127d4161d633c51aa0961abb85c9a21eaf0347a6 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 3 Feb 2026 11:20:56 +0200 Subject: [PATCH 086/101] Fix test build issue. --- packages/service-core/test/tsconfig.json | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/service-core/test/tsconfig.json b/packages/service-core/test/tsconfig.json index 124a1cbe5..9acd25c61 100644 --- a/packages/service-core/test/tsconfig.json +++ b/packages/service-core/test/tsconfig.json @@ -4,6 +4,7 @@ "rootDir": "src", "baseUrl": "./", "outDir": "dist", + "noEmit": true, "esModuleInterop": true, "skipLibCheck": true, "sourceMap": true, From d885186331800bbc6ce02a3afa6e15f0c801cc69 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 3 Feb 2026 11:23:41 +0200 Subject: [PATCH 087/101] Add missing file. --- packages/sync-rules/src/utils2.ts | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 packages/sync-rules/src/utils2.ts diff --git a/packages/sync-rules/src/utils2.ts b/packages/sync-rules/src/utils2.ts new file mode 100644 index 000000000..2f5d3f1ac --- /dev/null +++ b/packages/sync-rules/src/utils2.ts @@ -0,0 +1,19 @@ +import { castAsText } from './sql_functions.js'; +import { SqliteJsonRow } from './types.js'; + +/** + * Extracts and normalizes the ID column from a row. + */ +export function idFromData(data: SqliteJsonRow): string { + let id = data.id; + if (typeof id != 'string') { + // While an explicit cast would be better, this covers against very common + // issues when initially testing out sync, for example when the id column is an + // auto-incrementing integer. + // If there is no id column, we use a blank id. This will result in the user syncing + // a single arbitrary row for this table - better than just not being able to sync + // anything. + id = castAsText(id) ?? ''; + } + return id; +} From d45305f2f69f8d8b3ec27cba3b408dafd098322e Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 3 Feb 2026 11:31:30 +0200 Subject: [PATCH 088/101] Restructure cast functions. --- packages/sync-rules/src/BaseSqlDataQuery.ts | 9 +- packages/sync-rules/src/cast.ts | 123 ++++++++++++++++++ packages/sync-rules/src/compiler/sqlite.ts | 10 +- packages/sync-rules/src/index.ts | 2 +- packages/sync-rules/src/sql_functions.ts | 106 +-------------- .../src/sync_plan/engine/javascript.ts | 3 +- .../sync_plan/evaluator/bucket_data_source.ts | 2 +- packages/sync-rules/src/utils2.ts | 19 --- 8 files changed, 137 insertions(+), 137 deletions(-) create mode 100644 packages/sync-rules/src/cast.ts delete mode 100644 packages/sync-rules/src/utils2.ts diff --git a/packages/sync-rules/src/BaseSqlDataQuery.ts b/packages/sync-rules/src/BaseSqlDataQuery.ts index 97d92a8a7..4e1f59497 100644 --- a/packages/sync-rules/src/BaseSqlDataQuery.ts +++ b/packages/sync-rules/src/BaseSqlDataQuery.ts @@ -1,22 +1,21 @@ import { SelectedColumn } from 'pgsql-ast-parser'; +import { idFromData } from './cast.js'; import { SqlRuleError } from './errors.js'; import { ColumnDefinition } from './ExpressionType.js'; import { SourceTableInterface } from './SourceTableInterface.js'; import { AvailableTable, SqlTools } from './sql_filters.js'; -import { castAsText } from './sql_functions.js'; import { TablePattern } from './TablePattern.js'; import { QueryParameters, QuerySchema, - UnscopedEvaluatedRow, - UnscopedEvaluationResult, SourceSchema, SourceSchemaTable, SqliteJsonRow, - SqliteRow + SqliteRow, + UnscopedEvaluatedRow, + UnscopedEvaluationResult } from './types.js'; import { filterJsonRow } from './utils.js'; -import { idFromData } from './utils2.js'; export interface RowValueExtractor { extract(tables: QueryParameters, into: SqliteRow): void; diff --git a/packages/sync-rules/src/cast.ts b/packages/sync-rules/src/cast.ts new file mode 100644 index 000000000..58ebfa715 --- /dev/null +++ b/packages/sync-rules/src/cast.ts @@ -0,0 +1,123 @@ +import type { SqliteJsonRow, SqliteValue } from './types.js'; + +/** + * Extracts and normalizes the ID column from a row. + */ +export function idFromData(data: SqliteJsonRow): string { + let id = data.id; + if (typeof id != 'string') { + // While an explicit cast would be better, this covers against very common + // issues when initially testing out sync, for example when the id column is an + // auto-incrementing integer. + // If there is no id column, we use a blank id. This will result in the user syncing + // a single arbitrary row for this table - better than just not being able to sync + // anything. + id = castAsText(id) ?? ''; + } + return id; +} + +export const CAST_TYPES = new Set(['text', 'numeric', 'integer', 'real', 'blob']); + +const textEncoder = new TextEncoder(); +const textDecoder = new TextDecoder(); + +export function castAsText(value: SqliteValue): string | null { + if (value == null) { + return null; + } else if (value instanceof Uint8Array) { + return textDecoder.decode(value); + } else { + return value.toString(); + } +} + +export function castAsBlob(value: SqliteValue): Uint8Array | null { + if (value == null) { + return null; + } else if (value instanceof Uint8Array) { + return value!; + } + + if (typeof value != 'string') { + value = value.toString(); + } + return textEncoder.encode(value); +} + +export function cast(value: SqliteValue, to: string) { + if (value == null) { + return null; + } + if (to == 'text') { + return castAsText(value); + } else if (to == 'numeric') { + if (value instanceof Uint8Array) { + value = textDecoder.decode(value); + } + if (typeof value == 'string') { + return parseNumeric(value); + } else if (typeof value == 'number' || typeof value == 'bigint') { + return value; + } else { + return 0n; + } + } else if (to == 'real') { + if (value instanceof Uint8Array) { + value = textDecoder.decode(value); + } + if (typeof value == 'string') { + const nr = parseFloat(value); + if (isNaN(nr)) { + return 0.0; + } else { + return nr; + } + } else if (typeof value == 'number') { + return value; + } else if (typeof value == 'bigint') { + return Number(value); + } else { + return 0.0; + } + } else if (to == 'integer') { + if (value instanceof Uint8Array) { + value = textDecoder.decode(value); + } + if (typeof value == 'string') { + return parseBigInt(value); + } else if (typeof value == 'number') { + return Number.isInteger(value) ? BigInt(value) : BigInt(Math.floor(value)); + } else if (typeof value == 'bigint') { + return value; + } else { + return 0n; + } + } else if (to == 'blob') { + return castAsBlob(value); + } else { + throw new Error(`Type not supported for cast: '${to}'`); + } +} + +function parseNumeric(text: string): bigint | number { + const match = /^\s*(\d+)(\.\d*)?(e[+\-]?\d+)?/i.exec(text); + if (!match) { + return 0n; + } + + if (match[2] != null || match[3] != null) { + const v = parseFloat(match[0]); + return isNaN(v) ? 0n : v; + } else { + return BigInt(match[1]); + } +} + +function parseBigInt(text: string): bigint { + const match = /^\s*(\d+)/.exec(text); + if (!match) { + return 0n; + } + return BigInt(match[1]); +} diff --git a/packages/sync-rules/src/compiler/sqlite.ts b/packages/sync-rules/src/compiler/sqlite.ts index 7f09c113b..1fd7958f8 100644 --- a/packages/sync-rules/src/compiler/sqlite.ts +++ b/packages/sync-rules/src/compiler/sqlite.ts @@ -8,19 +8,19 @@ import { PGNode, SelectFromStatement } from 'pgsql-ast-parser'; -import { CAST_TYPES } from '../sql_functions.js'; -import { ColumnInRow, ConnectionParameter, ExpressionInput, NodeLocations, SyncExpression } from './expression.js'; +import { CAST_TYPES } from '../cast.js'; import { BetweenExpression, LiteralExpression, SqlExpression, - supportedFunctions, - BinaryOperator as SupportedBinaryOperator + BinaryOperator as SupportedBinaryOperator, + supportedFunctions } from '../sync_plan/expression.js'; import { ConnectionParameterSource } from '../sync_plan/plan.js'; import { ParsingErrorListener } from './compiler.js'; -import { BaseSourceResultSet, SourceResultSet, SyntacticResultSetSource } from './table.js'; +import { ColumnInRow, ConnectionParameter, ExpressionInput, NodeLocations, SyncExpression } from './expression.js'; import { SqlScope } from './scope.js'; +import { BaseSourceResultSet, SourceResultSet, SyntacticResultSetSource } from './table.js'; export interface ResolvedSubqueryExpression { filters: SqlExpression[]; diff --git a/packages/sync-rules/src/index.ts b/packages/sync-rules/src/index.ts index 2b3fe614d..5402fcd85 100644 --- a/packages/sync-rules/src/index.ts +++ b/packages/sync-rules/src/index.ts @@ -27,7 +27,7 @@ export * from './types.js'; export * from './types/custom_sqlite_value.js'; export * from './types/time.js'; export * from './utils.js'; -export * from './utils2.js'; +export * from './cast.js'; export { versionedHydrationState } from './HydrationState.js'; export * from './HydratedSyncRules.js'; export * from './HydrationState.js'; diff --git a/packages/sync-rules/src/sql_functions.ts b/packages/sync-rules/src/sql_functions.ts index bbf89b662..f4f356691 100644 --- a/packages/sync-rules/src/sql_functions.ts +++ b/packages/sync-rules/src/sql_functions.ts @@ -10,6 +10,7 @@ import { ExpressionType, SqliteType, SqliteValueType, TYPE_INTEGER } from './Exp import * as uuid from 'uuid'; import { CustomSqliteValue } from './types/custom_sqlite_value.js'; import { CompatibilityContext, CompatibilityOption } from './compatibility.js'; +import { cast, CAST_TYPES, castAsBlob, castAsText } from './cast.js'; export const BASIC_OPERATORS = new Set([ '=', @@ -526,89 +527,6 @@ export function generateSqlFunctions(compatibility: CompatibilityContext) { }; } -export const CAST_TYPES = new Set(['text', 'numeric', 'integer', 'real', 'blob']); - -const textEncoder = new TextEncoder(); -const textDecoder = new TextDecoder(); - -export function castAsText(value: SqliteValue): string | null { - if (value == null) { - return null; - } else if (value instanceof Uint8Array) { - return textDecoder.decode(value); - } else { - return value.toString(); - } -} - -export function castAsBlob(value: SqliteValue): Uint8Array | null { - if (value == null) { - return null; - } else if (value instanceof Uint8Array) { - return value!; - } - - if (typeof value != 'string') { - value = value.toString(); - } - return textEncoder.encode(value); -} - -export function cast(value: SqliteValue, to: string) { - if (value == null) { - return null; - } - if (to == 'text') { - return castAsText(value); - } else if (to == 'numeric') { - if (value instanceof Uint8Array) { - value = textDecoder.decode(value); - } - if (typeof value == 'string') { - return parseNumeric(value); - } else if (typeof value == 'number' || typeof value == 'bigint') { - return value; - } else { - return 0n; - } - } else if (to == 'real') { - if (value instanceof Uint8Array) { - value = textDecoder.decode(value); - } - if (typeof value == 'string') { - const nr = parseFloat(value); - if (isNaN(nr)) { - return 0.0; - } else { - return nr; - } - } else if (typeof value == 'number') { - return value; - } else if (typeof value == 'bigint') { - return Number(value); - } else { - return 0.0; - } - } else if (to == 'integer') { - if (value instanceof Uint8Array) { - value = textDecoder.decode(value); - } - if (typeof value == 'string') { - return parseBigInt(value); - } else if (typeof value == 'number') { - return Number.isInteger(value) ? BigInt(value) : BigInt(Math.floor(value)); - } else if (typeof value == 'bigint') { - return value; - } else { - return 0n; - } - } else if (to == 'blob') { - return castAsBlob(value); - } else { - throw new Error(`Type not supported for cast: '${to}'`); - } -} - export function sqliteTypeOf(arg: SqliteInputValue): SqliteValueType { if (arg == null) { return 'null'; @@ -644,28 +562,6 @@ export function parseGeometry(value?: SqliteValue) { return geo; } -function parseNumeric(text: string): bigint | number { - const match = /^\s*(\d+)(\.\d*)?(e[+\-]?\d+)?/i.exec(text); - if (!match) { - return 0n; - } - - if (match[2] != null || match[3] != null) { - const v = parseFloat(match[0]); - return isNaN(v) ? 0n : v; - } else { - return BigInt(match[1]); - } -} - -function parseBigInt(text: string): bigint { - const match = /^\s*(\d+)/.exec(text); - if (!match) { - return 0n; - } - return BigInt(match[1]); -} - function isNumeric(a: SqliteValue): a is number | bigint { return typeof a == 'number' || typeof a == 'bigint'; } diff --git a/packages/sync-rules/src/sync_plan/engine/javascript.ts b/packages/sync-rules/src/sync_plan/engine/javascript.ts index 7b5d3adde..65c8d0e8f 100644 --- a/packages/sync-rules/src/sync_plan/engine/javascript.ts +++ b/packages/sync-rules/src/sync_plan/engine/javascript.ts @@ -1,4 +1,5 @@ import { + cast, compare, CompatibilityContext, generateSqlFunctions, @@ -7,7 +8,7 @@ import { sqliteBool, sqliteNot } from '../../index.js'; -import { cast, evaluateOperator, SqlFunction } from '../../sql_functions.js'; +import { evaluateOperator, SqlFunction } from '../../sql_functions.js'; import { cartesianProduct } from '../../streams/utils.js'; import { generateTableValuedFunctions } from '../../TableValuedFunctions.js'; import { SqliteRow, SqliteValue } from '../../types.js'; diff --git a/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts b/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts index 0ff152eca..9e77c15c4 100644 --- a/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts +++ b/packages/sync-rules/src/sync_plan/evaluator/bucket_data_source.ts @@ -15,7 +15,7 @@ import { ExpressionToSqlite } from '../expression_to_sql.js'; import * as plan from '../plan.js'; import { StreamEvaluationContext } from './index.js'; import { mapExternalDataToInstantiation, ScalarExpressionEvaluator } from '../engine/scalar_expression_engine.js'; -import { idFromData } from '../../utils2.js'; +import { idFromData } from '../../cast.js'; export class PreparedStreamBucketDataSource implements BucketDataSource { private readonly sourceTables: TablePattern[] = []; diff --git a/packages/sync-rules/src/utils2.ts b/packages/sync-rules/src/utils2.ts deleted file mode 100644 index 2f5d3f1ac..000000000 --- a/packages/sync-rules/src/utils2.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { castAsText } from './sql_functions.js'; -import { SqliteJsonRow } from './types.js'; - -/** - * Extracts and normalizes the ID column from a row. - */ -export function idFromData(data: SqliteJsonRow): string { - let id = data.id; - if (typeof id != 'string') { - // While an explicit cast would be better, this covers against very common - // issues when initially testing out sync, for example when the id column is an - // auto-incrementing integer. - // If there is no id column, we use a blank id. This will result in the user syncing - // a single arbitrary row for this table - better than just not being able to sync - // anything. - id = castAsText(id) ?? ''; - } - return id; -} From 997145b205c57a46a44690319bb4759f35e0174b Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 3 Feb 2026 11:46:16 +0200 Subject: [PATCH 089/101] Fix storage parameter tests. --- .../register-data-storage-parameter-tests.ts | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts index 89bfdf23e..1b6cd7b35 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts @@ -65,7 +65,7 @@ bucket_definitions: const parameters = new RequestParameters({ sub: 'user1' }, {}); const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; const parameter_sets = await querier.queryDynamicBucketDescriptions(checkpoint); - expect(parameter_sets).toEqual([{ group_id: 'group1a' }]); + expect(parameter_sets).toMatchObject([{ bucket: expect.stringMatching(/"group1a"/) }]); }); test('it should use the latest version', async () => { @@ -112,18 +112,18 @@ bucket_definitions: test_utils.querierOptions(new RequestParameters({ sub: 'user1' }, {})) ).querier; - const parameters = await querier.queryDynamicBucketDescriptions(checkpoint2); - expect(parameters).toEqual([ + const buckets1 = await querier.queryDynamicBucketDescriptions(checkpoint2); + expect(buckets1).toMatchObject([ { - group_id: 'group2' + bucket: expect.stringMatching(/"group2"/) } ]); // Use the checkpoint to get older data if relevant - const parameters2 = await querier.queryDynamicBucketDescriptions(checkpoint1); - expect(parameters2).toEqual([ + const buckets2 = await querier.queryDynamicBucketDescriptions(checkpoint1); + expect(buckets2).toMatchObject([ { - group_id: 'group1' + bucket: expect.stringMatching(/"group1"/) } ]); }); @@ -194,12 +194,12 @@ bucket_definitions: const checkpoint = await bucketStorage.getCheckpoint(); const buckets = await querier.queryDynamicBucketDescriptions(checkpoint); - expect(buckets.sort((a, b) => a.bucket.localeCompare(b.bucket))).toEqual([ + expect(buckets.sort((a, b) => a.bucket.localeCompare(b.bucket))).toMatchObject([ { - todo_id: 'todo1' + bucket: expect.stringMatching(/"todo1"/) }, { - todo_id: 'todo2' + bucket: expect.stringMatching(/"todo2"/) } ]); }); @@ -236,8 +236,6 @@ bucket_definitions: await writer.commit('1/1'); - const TEST_PARAMS = { group_id: 'group1' }; - const checkpoint = await bucketStorage.getCheckpoint(); const querier1 = hydrated.getBucketParameterQuerier( @@ -245,22 +243,22 @@ bucket_definitions: new RequestParameters({ sub: 'user1', parameters: { n1: 314n, f2: 314, f3: 3.14 } }, {}) ) ).querier; - const parameters1 = await querier1.queryDynamicBucketDescriptions(checkpoint); - expect(parameters1).toEqual([TEST_PARAMS]); + const buckets1 = await querier1.queryDynamicBucketDescriptions(checkpoint); + expect(buckets1).toMatchObject([{ bucket: expect.stringMatching(/"group1"/), definition: 'mybucket' }]); const querier2 = hydrated.getBucketParameterQuerier( test_utils.querierOptions( new RequestParameters({ sub: 'user1', parameters: { n1: 314, f2: 314n, f3: 3.14 } }, {}) ) ).querier; - const parameters2 = await querier2.queryDynamicBucketDescriptions(checkpoint); - expect(parameters2).toEqual([TEST_PARAMS]); + const buckets2 = await querier2.queryDynamicBucketDescriptions(checkpoint); + expect(buckets2).toMatchObject([{ bucket: expect.stringMatching(/"group1"/), definition: 'mybucket' }]); const querier3 = hydrated.getBucketParameterQuerier( test_utils.querierOptions(new RequestParameters({ sub: 'user1', parameters: { n1: 314n, f2: 314, f3: 3 } }, {})) ).querier; - const parameters3 = await querier3.queryDynamicBucketDescriptions(checkpoint); - expect(parameters3).toEqual([]); + const buckets3 = await querier3.queryDynamicBucketDescriptions(checkpoint); + expect(buckets3).toEqual([]); }); test('save and load parameters with large numbers', async () => { @@ -310,14 +308,17 @@ bucket_definitions: await writer.commit('1/1'); - const TEST_PARAMS = { group_id: 'group1' }; - const checkpoint = await bucketStorage.getCheckpoint(); const querier = hydrated.getBucketParameterQuerier( test_utils.querierOptions(new RequestParameters({ sub: 'user1', parameters: { n1: 1152921504606846976n } }, {})) ).querier; - const parameters1 = await querier.queryDynamicBucketDescriptions(checkpoint); - expect(parameters1).toEqual([TEST_PARAMS]); + const buckets = await querier.queryDynamicBucketDescriptions(checkpoint); + expect(buckets.map(test_utils.removeSourceSymbol)).toMatchObject([ + { + bucket: expect.stringMatching(/"group1"/), + definition: 'mybucket' + } + ]); }); test('save and load parameters with workspaceId', async () => { From cdbece1b11ef2c397484565bcbd8b09832ff420c Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 3 Feb 2026 11:48:27 +0200 Subject: [PATCH 090/101] Fix type issues. --- packages/sync-rules/src/compiler/ir_to_sync_plan.ts | 11 +++-------- .../evaluator/parameter_index_lookup_creator.ts | 5 ++++- packages/sync-rules/src/sync_plan/plan.ts | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/packages/sync-rules/src/compiler/ir_to_sync_plan.ts b/packages/sync-rules/src/compiler/ir_to_sync_plan.ts index 1b1de965e..407188853 100644 --- a/packages/sync-rules/src/compiler/ir_to_sync_plan.ts +++ b/packages/sync-rules/src/compiler/ir_to_sync_plan.ts @@ -107,24 +107,19 @@ export class CompilerModelToSyncPlan { return this.translateStatefulObject(value, () => { const hasher = new StableHasher(); value.buildBehaviorHashCode(hasher); - const indexLookupCreator: plan.StreamParameterIndexLookupCreator = { + return { sourceTable: value.tablePattern, defaultLookupScope: { // This just needs to be unique, and isn't visible to users (unlike bucket names). We might want to use a // more stable naming scheme in the future. lookupName: 'lookup', - queryId: index.toString(), - get source() { - // FIXME: the types don't match at the moment - return indexLookupCreator as any; - } + queryId: index.toString() }, hashCode: hasher.buildHashCode(), outputs: value.result.map((e) => this.translateExpression(e.expression)), filters: value.filters.map((e) => this.translateExpression(e.expression)), parameters: value.partitionBy.map((e) => this.translatePartitionKey(e)) - }; - return indexLookupCreator; + } satisfies plan.StreamParameterIndexLookupCreator; }); } diff --git a/packages/sync-rules/src/sync_plan/evaluator/parameter_index_lookup_creator.ts b/packages/sync-rules/src/sync_plan/evaluator/parameter_index_lookup_creator.ts index 57d27a4d9..a0936bcdb 100644 --- a/packages/sync-rules/src/sync_plan/evaluator/parameter_index_lookup_creator.ts +++ b/packages/sync-rules/src/sync_plan/evaluator/parameter_index_lookup_creator.ts @@ -20,7 +20,10 @@ export class PreparedParameterIndexLookupCreator implements ParameterIndexLookup private readonly source: plan.StreamParameterIndexLookupCreator, { engine }: StreamEvaluationContext ) { - this.defaultLookupScope = source.defaultLookupScope; + this.defaultLookupScope = { + ...source.defaultLookupScope, + source: this + }; const mapExpressions = mapExternalDataToInstantiation(); const expressions = source.outputs.map((o) => mapExpressions.transform(o)); diff --git a/packages/sync-rules/src/sync_plan/plan.ts b/packages/sync-rules/src/sync_plan/plan.ts index cb75e4eed..4ba44c44f 100644 --- a/packages/sync-rules/src/sync_plan/plan.ts +++ b/packages/sync-rules/src/sync_plan/plan.ts @@ -116,7 +116,7 @@ export interface StreamBucketDataSource { */ export interface StreamParameterIndexLookupCreator extends TableProcessor { hashCode: number; - defaultLookupScope: ParameterLookupScope; + defaultLookupScope: Omit; /** * Outputs to persist in the lookup. From 745d85f48ee42c3ef37f8a3b699d87cdaabd7680 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 3 Feb 2026 11:59:56 +0200 Subject: [PATCH 091/101] Disable more unstable tests. --- .../test/src/schema_changes.test.ts | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/modules/module-postgres/test/src/schema_changes.test.ts b/modules/module-postgres/test/src/schema_changes.test.ts index b334f098b..a2418cf2c 100644 --- a/modules/module-postgres/test/src/schema_changes.test.ts +++ b/modules/module-postgres/test/src/schema_changes.test.ts @@ -55,23 +55,24 @@ function defineTests(config: storage.TestStorageConfig) { const reduced = reduceBucket(data).slice(1); expect(reduced.sort(compareIds)).toMatchObject([PUT_T3]); - // Initial inserts - expect(data.slice(0, 2)).toMatchObject([PUT_T1, PUT_T2]); - - // Truncate - order doesn't matter - expect(data.slice(2, 4).sort(compareIds)).toMatchObject([REMOVE_T1, REMOVE_T2]); - - expect(data.slice(4, 5)).toMatchObject([ - // Snapshot and/or replication insert - PUT_T3 - ]); - - if (data.length > 5) { - expect(data.slice(5)).toMatchObject([ - // Replicated insert (optional duplication) - PUT_T3 - ]); - } + // Actual operations may look like this, but is not stable: + // // Initial inserts + // expect(data.slice(0, 2)).toMatchObject([PUT_T1, PUT_T2]); + + // // Truncate - order doesn't matter + // expect(data.slice(2, 4).sort(compareIds)).toMatchObject([REMOVE_T1, REMOVE_T2]); + + // expect(data.slice(4, 5)).toMatchObject([ + // // Snapshot and/or replication insert + // PUT_T3 + // ]); + + // if (data.length > 5) { + // expect(data.slice(5)).toMatchObject([ + // // Replicated insert (optional duplication) + // PUT_T3 + // ]); + // } }); test('add table', async () => { From dfa7e3227cee7ba6c4e7ec09a00d9e537bb6ec72 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 3 Feb 2026 12:19:09 +0200 Subject: [PATCH 092/101] Fix MS SQL replication. --- modules/module-mssql/src/replication/CDCStream.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/module-mssql/src/replication/CDCStream.ts b/modules/module-mssql/src/replication/CDCStream.ts index e09fb82d7..67200f54d 100644 --- a/modules/module-mssql/src/replication/CDCStream.ts +++ b/modules/module-mssql/src/replication/CDCStream.ts @@ -213,7 +213,7 @@ export class CDCStream { schema: matchedTable.schema }); - const tables = await this.processTable( + const processedTables = await this.processTable( writer, { name: matchedTable.name, @@ -238,7 +238,7 @@ export class CDCStream { }); await writer.drop(dropTables); - tables.push(...tables); + tables.push(...processedTables); } return tables; From 44edd304274f5961a25882034ee046f36dd0ceab Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 3 Feb 2026 13:42:29 +0200 Subject: [PATCH 093/101] Restructure parameter lookups. --- .../implementation/MongoBucketDataWriter.ts | 17 ++++++++--- .../implementation/MongoSyncBucketStorage.ts | 19 ++++++------ .../storage/implementation/PersistedBatch.ts | 29 +++++++++++++++---- .../src/storage/implementation/models.ts | 8 ++++- .../sync-rules/src/BucketParameterQuerier.ts | 13 +++++++-- 5 files changed, 63 insertions(+), 23 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts index 6457de1f2..22bfaf3ef 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts @@ -30,7 +30,14 @@ import * as timers from 'node:timers/promises'; import { idPrefixFilter, mongoTableId } from '../../utils/util.js'; import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; import { PowerSyncMongo } from './db.js'; -import { CurrentBucket, CurrentDataDocument, SourceKey, SourceTableDocument, SyncRuleDocument } from './models.js'; +import { + CurrentBucket, + CurrentDataDocument, + RecordedLookup, + SourceKey, + SourceTableDocument, + SyncRuleDocument +} from './models.js'; import { MongoIdSequence } from './MongoIdSequence.js'; import { MongoPersistedSyncRules } from './MongoPersistedSyncRules.js'; import { batchCreateCustomWriteCheckpoints } from './MongoWriteCheckpointAPI.js'; @@ -683,8 +690,8 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { let existing_buckets: CurrentBucket[] = []; let new_buckets: CurrentBucket[] = []; - let existing_lookups: bson.Binary[] = []; - let new_lookups: bson.Binary[] = []; + let existing_lookups: RecordedLookup[] = []; + let new_lookups: RecordedLookup[] = []; const before_key: SourceKey = { g: 0, t: mongoTableId(record.sourceTable.id), k: beforeId }; @@ -889,7 +896,9 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { existing_lookups }); new_lookups = paramEvaluated.map((p) => { - return storage.serializeLookup(p.lookup); + const l = storage.serializeLookup(p.lookup); + const d = this.mapping.parameterLookupId(p.lookup.source); + return { l, d }; }); } } diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index 4501e0e2d..8823a752a 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -513,16 +513,15 @@ export class MongoSyncBucketStorage if (keepParameterLookupIds.has(id)) { continue; } - // FIXME: how do we do these deletes? - // For now, we delete only when the source table is removed. - // await this.retriedDelete(`deleting parameter lookup data for ${name}`, signal, () => - // this.db.bucket_parameters.deleteMany( - // { - // 'key.g': id - // }, - // { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } - // ) - // ); + // FIXME: Index this + await this.retriedDelete(`deleting parameter lookup data for ${name}`, signal, () => + this.db.bucket_parameters.deleteMany( + { + def: id + }, + { maxTimeMS: lib_mongo.db.MONGO_CLEAR_OPERATION_TIMEOUT_MS } + ) + ); } } diff --git a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts index 08f64ca28..2e373f736 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts @@ -15,6 +15,7 @@ import { BucketStateDocument, CurrentBucket, CurrentDataDocument, + RecordedLookup, SourceKey } from './models.js'; import { BucketDefinitionMapping } from './BucketDefinitionMapping.js'; @@ -153,6 +154,11 @@ export class PersistedBatch { for (let bd of remaining_buckets.values()) { // REMOVE + if (options.table.bucketDataSourceIds?.indexOf(bd.def) === -1) { + // This bucket definition is no longer used for this table. + // Don't generate REMOVE operations for it. + continue; + } const op_id = options.op_seq.next(); this.debugLastOpId = op_id; @@ -185,7 +191,7 @@ export class PersistedBatch { sourceKey: storage.ReplicaId; sourceTable: storage.SourceTable; evaluated: EvaluatedParameters[]; - existing_lookups: bson.Binary[]; + existing_lookups: RecordedLookup[]; }) { // This is similar to saving bucket data. // A key difference is that we don't need to keep the history intact. @@ -196,16 +202,19 @@ export class PersistedBatch { // We also don't need to keep history intact. const { sourceTable, sourceKey, evaluated } = data; - const remaining_lookups = new Map(); + const remaining_lookups = new Map(); for (let l of data.existing_lookups) { - remaining_lookups.set(l.toString('base64'), l); + const key = l.d + '.' + l.l.toString('base64'); + remaining_lookups.set(key, l); } // 1. Insert new entries for (let result of evaluated) { + const sourceDefinitionId = this.mapping.parameterLookupId(result.lookup.source); const binLookup = storage.serializeLookup(result.lookup); const hex = binLookup.toString('base64'); - remaining_lookups.delete(hex); + const key = sourceDefinitionId + '.' + hex; + remaining_lookups.delete(key); const op_id = data.op_seq.next(); this.debugLastOpId = op_id; @@ -213,6 +222,7 @@ export class PersistedBatch { insertOne: { document: { _id: op_id, + def: sourceDefinitionId, key: { g: 0, t: mongoTableId(sourceTable.id), @@ -229,6 +239,14 @@ export class PersistedBatch { // 2. "REMOVE" entries for any lookup not touched. for (let lookup of remaining_lookups.values()) { + const sourceDefinitionId = lookup.d; + + if (sourceTable.parameterLookupSourceIds?.indexOf(sourceDefinitionId) === -1) { + // This bucket definition is no longer used for this table. + // Don't generate REMOVE operations for it. + continue; + } + const op_id = data.op_seq.next(); this.debugLastOpId = op_id; this.bucketParameters.push({ @@ -240,7 +258,8 @@ export class PersistedBatch { t: mongoTableId(sourceTable.id), k: sourceKey }, - lookup: lookup, + def: sourceDefinitionId, + lookup: lookup.l, bucket_parameters: [] } } diff --git a/modules/module-mongodb-storage/src/storage/implementation/models.ts b/modules/module-mongodb-storage/src/storage/implementation/models.ts index 85f36ba8a..dfc21cb31 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/models.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/models.ts @@ -30,11 +30,16 @@ export interface BucketDataKey { o: bigint; } +export interface RecordedLookup { + d: number; + l: bson.Binary; +} + export interface CurrentDataDocument { _id: SourceKey; data: bson.Binary; buckets: CurrentBucket[]; - lookups: bson.Binary[]; + lookups: RecordedLookup[]; /** * If set, this can be deleted, once there is a consistent checkpoint >= pending_delete. * @@ -52,6 +57,7 @@ export interface CurrentBucket { export interface BucketParameterDocument { _id: bigint; + def: number; key: SourceKey; lookup: bson.Binary; bucket_parameters: Record[]; diff --git a/packages/sync-rules/src/BucketParameterQuerier.ts b/packages/sync-rules/src/BucketParameterQuerier.ts index 4d603b0a8..88960babc 100644 --- a/packages/sync-rules/src/BucketParameterQuerier.ts +++ b/packages/sync-rules/src/BucketParameterQuerier.ts @@ -4,6 +4,7 @@ import { ParameterLookupScope } from './HydrationState.js'; import { RequestedStream } from './SqlSyncRules.js'; import { RequestParameters, SqliteJsonRow, SqliteJsonValue } from './types.js'; import { normalizeParameterValue } from './utils.js'; +import { ParameterIndexLookupCreator } from './index.js'; /** * Represents a set of parameter queries for a specific request. @@ -106,6 +107,7 @@ export function mergeBucketParameterQueriers(queriers: BucketParameterQuerier[]) export class ScopedParameterLookup { // bucket definition name, parameter query index, ...lookup values readonly values: readonly SqliteJsonValue[]; + readonly source: ParameterIndexLookupCreator; #cachedSerializedForm?: string; @@ -119,22 +121,27 @@ export class ScopedParameterLookup { } static normalized(scope: ParameterLookupScope, lookup: UnscopedParameterLookup): ScopedParameterLookup { - return new ScopedParameterLookup([scope.lookupName, scope.queryId, ...lookup.lookupValues]); + return new ScopedParameterLookup(scope.source, [scope.lookupName, scope.queryId, ...lookup.lookupValues]); } /** * Primarily for test fixtures. */ static direct(scope: ParameterLookupScope, values: SqliteJsonValue[]): ScopedParameterLookup { - return new ScopedParameterLookup([scope.lookupName, scope.queryId, ...values.map(normalizeParameterValue)]); + return new ScopedParameterLookup(scope.source, [ + scope.lookupName, + scope.queryId, + ...values.map(normalizeParameterValue) + ]); } /** * * @param values must be pre-normalized (any integer converted into bigint) */ - private constructor(values: SqliteJsonValue[]) { + private constructor(source: ParameterIndexLookupCreator, values: SqliteJsonValue[]) { this.values = Object.freeze(values); + this.source = source; } } From 6285c9fdd892a2f3959e2dfbcf2b0275e4ca0668 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 3 Feb 2026 13:53:33 +0200 Subject: [PATCH 094/101] Fix test. --- .../test/src/sync/BucketChecksumState.test.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/service-core/test/src/sync/BucketChecksumState.test.ts b/packages/service-core/test/src/sync/BucketChecksumState.test.ts index 24b1b2a2f..84664ec3f 100644 --- a/packages/service-core/test/src/sync/BucketChecksumState.test.ts +++ b/packages/service-core/test/src/sync/BucketChecksumState.test.ts @@ -19,6 +19,7 @@ import { ScopedParameterLookup, SqliteJsonRow, SqlSyncRules, + TablePattern, versionedHydrationState } from '@powersync/service-sync-rules'; import { beforeEach, describe, expect, test } from 'vitest'; @@ -519,10 +520,13 @@ bucket_definitions: bucketStorage: storage }); + const source = SYNC_RULES_DYNAMIC.getMatchingSources(new TablePattern('public', 'projects')) + .parameterIndexLookupCreators[0]; + const line = (await state.buildNextCheckpointLine({ base: storage.makeCheckpoint(1n, (lookups) => { expect(lookups).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'by_project', queryId: '1', source: null as any }, ['u1']) + ScopedParameterLookup.direct({ lookupName: 'by_project', queryId: '1', source }, ['u1']) ]); return [{ id: 1 }, { id: 2 }]; }), @@ -589,7 +593,7 @@ bucket_definitions: const line2 = (await state.buildNextCheckpointLine({ base: storage.makeCheckpoint(2n, (lookups) => { expect(lookups).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'by_project', queryId: '1', source: null as any }, ['u1']) + ScopedParameterLookup.direct({ lookupName: 'by_project', queryId: '1', source }, ['u1']) ]); return [{ id: 1 }, { id: 2 }, { id: 3 }]; }), From 6c06fd0206b8c67a036168dafa2fffbb406b0e88 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 3 Feb 2026 14:26:03 +0200 Subject: [PATCH 095/101] Fix sync-rule tests to use the correct source. --- packages/sync-rules/test/src/streams.test.ts | 76 ++++++++++++++++--- .../src/sync_plan/evaluator/evaluator.test.ts | 16 +++- .../sync-rules/test/src/sync_rules.test.ts | 9 ++- 3 files changed, 82 insertions(+), 19 deletions(-) diff --git a/packages/sync-rules/test/src/streams.test.ts b/packages/sync-rules/test/src/streams.test.ts index 9b75bde28..068226d72 100644 --- a/packages/sync-rules/test/src/streams.test.ts +++ b/packages/sync-rules/test/src/streams.test.ts @@ -27,17 +27,6 @@ import { import { normalizeQuerierOptions, PARSE_OPTIONS, removeSourceSymbol, TestSourceTable } from './util.js'; describe('streams', () => { - const STREAM_0: ParameterLookupScope = { - lookupName: 'stream', - queryId: '0', - source: null as any - }; - const STREAM_1: ParameterLookupScope = { - lookupName: 'stream', - queryId: '1', - source: null as any - }; - test('refuses edition: 1', () => { expect(() => syncStreamFromSql('stream', 'SELECT * FROM comments', { @@ -226,6 +215,12 @@ describe('streams', () => { "SELECT * FROM comments WHERE issue_id IN (SELECT id FROM issues WHERE owner_id = auth.user_id()) OR auth.parameter('is_admin')" ); + const STREAM_0: ParameterLookupScope = { + lookupName: 'stream', + queryId: '0', + source: desc.parameterIndexLookupCreators[0] + }; + expect(evaluateBucketIds(desc, COMMENTS, { id: 'c', issue_id: 'i1' })).toStrictEqual([ '1#stream|0["i1"]', '1#stream|1[]' @@ -269,6 +264,12 @@ describe('streams', () => { ); const lookup = desc.parameterIndexLookupCreators[0]; + const STREAM_0: ParameterLookupScope = { + lookupName: 'stream', + queryId: '0', + source: lookup + }; + expect(lookup.tableSyncsParameters(ISSUES)).toBe(true); expect(lookup.evaluateParameterRow(ISSUES, { id: 'issue_id', owner_id: 'user1', name: 'name' })).toStrictEqual([ { @@ -300,6 +301,12 @@ describe('streams', () => { const desc = parseStream('SELECT * FROM issues WHERE auth.user_id() IN (SELECT id FROM users WHERE is_admin)'); const lookup = desc.parameterIndexLookupCreators[0]; + const STREAM_0: ParameterLookupScope = { + lookupName: 'stream', + queryId: '0', + source: lookup + }; + expect(lookup.tableSyncsParameters(ISSUES)).toBe(false); expect(lookup.tableSyncsParameters(USERS)).toBe(true); @@ -344,6 +351,17 @@ describe('streams', () => { id IN (SELECT user_b FROM friends WHERE user_a = auth.user_id()) `); + const STREAM_0: ParameterLookupScope = { + lookupName: 'stream', + queryId: '0', + source: desc.parameterIndexLookupCreators[0] + }; + const STREAM_1: ParameterLookupScope = { + lookupName: 'stream', + queryId: '1', + source: desc.parameterIndexLookupCreators[1] + }; + expect(evaluateBucketIds(desc, USERS, { id: 'a', name: 'a' })).toStrictEqual([ '1#stream|0["a"]', '1#stream|1["a"]' @@ -407,6 +425,12 @@ describe('streams', () => { "SELECT * FROM comments WHERE issue_id IN (SELECT id FROM issues WHERE owner_id = auth.user_id()) AND label IN (subscription.parameters() -> 'labels')" ); + const STREAM_0: ParameterLookupScope = { + lookupName: 'stream', + queryId: '0', + source: desc.parameterIndexLookupCreators[0] + }; + expect(evaluateBucketIds(desc, COMMENTS, { id: 'a', issue_id: 'i', label: 'l' })).toStrictEqual([ '1#stream|0["i","l"]' ]); @@ -457,6 +481,12 @@ describe('streams', () => { ); const lookup = desc.parameterIndexLookupCreators[0]; + const STREAM_0: ParameterLookupScope = { + lookupName: 'stream', + queryId: '0', + source: lookup + }; + expect(lookup.tableSyncsParameters(FRIENDS)).toBe(true); expect(lookup.evaluateParameterRow(FRIENDS, { user_a: 'a', user_b: 'b' })).toStrictEqual([ { @@ -613,6 +643,12 @@ describe('streams', () => { 'select * from comments where NOT (issue_id not in (select id from issues where owner_id = auth.user_id()))' ); + const STREAM_0: ParameterLookupScope = { + lookupName: 'stream', + queryId: '0', + source: desc.parameterIndexLookupCreators[0] + }; + expect( desc.parameterIndexLookupCreators[0].evaluateParameterRow(ISSUES, { id: 'issue_id', @@ -682,6 +718,12 @@ describe('streams', () => { ); expect(desc.variants).toHaveLength(2); + const STREAM_0: ParameterLookupScope = { + lookupName: 'stream', + queryId: '0', + source: desc.parameterIndexLookupCreators[0] + }; + expect(evaluateBucketIds(desc, COMMENTS, { id: 'c', issue_id: 'issue_id', content: 'a' })).toStrictEqual([]); expect(evaluateBucketIds(desc, COMMENTS, { id: 'c', issue_id: 'issue_id', content: 'aaa' })).toStrictEqual([ '1#stream|0["issue_id"]', @@ -826,6 +868,12 @@ WHERE { ...options, schema } ); + const STREAM_0: ParameterLookupScope = { + lookupName: 'stream', + queryId: '0', + source: desc.parameterIndexLookupCreators[0] + }; + expect(evaluateBucketIds(desc, scene, { _id: 'scene', project: 'foo' })).toStrictEqual(['1#stream|0["foo"]']); expect( @@ -914,6 +962,12 @@ WHERE { ...options, schema } ); + const STREAM_0: ParameterLookupScope = { + lookupName: 'stream', + queryId: '0', + source: desc.parameterIndexLookupCreators[0] + }; + expect(evaluateBucketIds(desc, users, { id: 'user', first_name: 'Test', last_name: 'User' })).toStrictEqual([ '1#stream|0["user"]' ]); diff --git a/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts b/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts index 84a4c5715..40a21ffb8 100644 --- a/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts +++ b/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts @@ -7,7 +7,8 @@ import { SourceTableInterface, SqliteJsonRow, SqliteRow, - SqliteValue + SqliteValue, + TablePattern } from '../../../../src/index.js'; import { TestSourceTable } from '../../util.js'; @@ -138,13 +139,16 @@ describe('evaluating parameters', () => { } ]); + const issueSource = desc.getMatchingSources(new TablePattern('test_schema', 'issues')) + .parameterIndexLookupCreators[0]; + expect(desc.tableSyncsData(COMMENTS)).toBeTruthy(); expect(desc.tableSyncsData(ISSUES)).toBeFalsy(); expect(desc.tableSyncsParameters(ISSUES)).toBeTruthy(); expect(desc.evaluateParameterRow(ISSUES, { id: 'issue_id', owner_id: 'user1', name: 'name' })).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: null as any }, ['user1']), + lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: issueSource }, ['user1']), bucketParameters: [ { '0': 'issue_id' @@ -228,6 +232,10 @@ describe('querier', () => { streams: {} }); expect(errors).toStrictEqual([]); + const userSource = desc.getMatchingSources(new TablePattern('test_schema', 'users')) + .parameterIndexLookupCreators[0]; + const issueSource = desc.getMatchingSources(new TablePattern('test_schema', 'issues')) + .parameterIndexLookupCreators[0]; expect(querier.staticBuckets.map((e) => e.bucket)).toStrictEqual([]); let call = 0; @@ -241,7 +249,7 @@ describe('querier', () => { { lookupName: 'lookup', queryId: '0', - source: null as any + source: userSource }, ['user'] ) @@ -255,7 +263,7 @@ describe('querier', () => { { lookupName: 'lookup', queryId: '1', - source: null as any + source: issueSource }, ['name'] ) diff --git a/packages/sync-rules/test/src/sync_rules.test.ts b/packages/sync-rules/test/src/sync_rules.test.ts index 37dacea92..f010aa944 100644 --- a/packages/sync-rules/test/src/sync_rules.test.ts +++ b/packages/sync-rules/test/src/sync_rules.test.ts @@ -111,10 +111,11 @@ bucket_definitions: PARSE_OPTIONS ); const hydrated = rules.hydrate(hydrationParams); + const source = rules.bucketParameterLookupSources[0]; expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 1 })).toEqual([ { bucketParameters: [{}], - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket', queryId: '1', source: null as any }, ['user1']) + lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket', queryId: '1', source }, ['user1']) } ]); expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 0 })).toEqual([]); @@ -214,7 +215,7 @@ bucket_definitions: ]); expect(await findQuerierLookups(querier)).toEqual([ ScopedParameterLookup.direct( - { lookupName: 'mybucket.test', queryId: '2.test', source: rules.bucketParameterLookupSources[1] }, + { lookupName: 'mybucket.test', queryId: '2.test', source: rules.bucketParameterLookupSources[0] }, ['user1'] ) ]); @@ -223,7 +224,7 @@ bucket_definitions: { bucketParameters: [{ user_id: 'user1' }], lookup: ScopedParameterLookup.direct( - { lookupName: 'mybucket.test', queryId: '2.test', source: rules.bucketParameterLookupSources[1] }, + { lookupName: 'mybucket.test', queryId: '2.test', source: rules.bucketParameterLookupSources[0] }, ['user1'] ) } @@ -1086,7 +1087,7 @@ bucket_definitions: expect(await findQuerierLookups(hydratedQuerier)).toEqual([ ScopedParameterLookup.direct( - { lookupName: 'admin_only', queryId: '1', source: rules.bucketParameterLookupSources[3] }, + { lookupName: 'admin_only', queryId: '1', source: rules.bucketParameterLookupSources[2] }, [1] ) ]); From 2c311231825465e000090037c3622a925cfa8817 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 3 Feb 2026 14:38:07 +0200 Subject: [PATCH 096/101] Fix truncating tables. --- .../src/storage/implementation/MongoBucketDataWriter.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts index 22bfaf3ef..9a8294df1 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketDataWriter.ts @@ -484,7 +484,9 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter { name: doc.table_name, replicaIdColumns: doc.replica_id_columns2?.map((c) => ({ name: c.name, typeOid: c.type_oid, type: c.type })) ?? [], - snapshotComplete: doc.snapshot_done ?? true + snapshotComplete: doc.snapshot_done ?? true, + bucketDataSourceIds: doc.bucket_data_source_ids ?? [], + parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? [] }) ); return dropTables; From 8bc2aec99b6c2d26a8c7cb95362a98bbe95332bb Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 17 Feb 2026 13:32:34 +0200 Subject: [PATCH 097/101] Post-merge type fixes. --- .../src/storage/MongoBucketStorage.ts | 8 ++--- .../storage/implementation/MergedSyncRules.ts | 6 ++-- .../src/test-utils/general-utils.ts | 11 +++++-- .../register-data-storage-parameter-tests.ts | 30 +++++++++-------- .../test/src/sync/BucketChecksumState.test.ts | 3 +- .../test/src/parameter_queries.test.ts | 29 ++++++++++------ .../test/src/static_parameter_queries.test.ts | 11 ++++--- packages/sync-rules/test/src/streams.test.ts | 25 +++++++++----- .../src/sync_plan/evaluator/evaluator.test.ts | 8 +++-- .../sync_plan/evaluator/table_valued.test.ts | 7 ++-- .../sync-rules/test/src/sync_rules.test.ts | 15 +++++---- .../src/table_valued_function_queries.test.ts | 33 ++++++++++++------- 12 files changed, 118 insertions(+), 68 deletions(-) diff --git a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts index 187b2e7d1..1c14100dc 100644 --- a/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/MongoBucketStorage.ts @@ -1,4 +1,4 @@ -import { SqlSyncRules } from '@powersync/service-sync-rules'; +import { BucketDataSource, ParameterIndexLookupCreator, SqlSyncRules } from '@powersync/service-sync-rules'; import { GetIntanceOptions, maxLsn, CreateWriterOptions, storage } from '@powersync/service-core'; @@ -286,9 +286,9 @@ export class MongoBucketStorage existingMapping = new BucketDefinitionMapping({}, {}); } - syncRules.hydrate({ + syncRules.config.hydrate({ hydrationState: { - getBucketSourceScope(source) { + getBucketSourceScope(source: BucketDataSource) { const existingId = existingMapping.equivalentBucketSourceId(source); if (existingId != null) { bucketDefinitionMapping[source.uniqueName] = existingId; @@ -302,7 +302,7 @@ export class MongoBucketStorage source }; }, - getParameterIndexLookupScope(source) { + getParameterIndexLookupScope(source: ParameterIndexLookupCreator) { const key = `${source.defaultLookupScope.lookupName}#${source.defaultLookupScope.queryId}`; const existingId = existingMapping.equivalentParameterLookupId(source); if (existingId != null) { diff --git a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts index 0413b57b2..06607deb7 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts @@ -21,7 +21,7 @@ import { SqliteInputValue, SqliteRow, SqliteValue, - SqlSyncRules, + SyncConfig, TableDataSources, TablePattern } from '@powersync/service-sync-rules'; @@ -58,7 +58,7 @@ export class MergedSyncRules implements RowProcessor { // keyed by TablePattern.key private tableDataSources: Map = new Map(); - private allSyncRules: SqlSyncRules[]; + private allSyncRules: SyncConfig[]; // all table patterns private sourcePatterns: TablePattern[]; @@ -77,7 +77,7 @@ export class MergedSyncRules implements RowProcessor { this.allSyncRules = []; for (let source of sources) { - const syncRules = source.sync_rules; + const syncRules = source.sync_rules.config; const mapping = source.mapping; const hydrationState = source.hydrationState; const dataSources = syncRules.bucketDataSources; diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index 60d4640e1..f6d64bf1d 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -1,4 +1,4 @@ -import { BucketDataRequest, InternalOpId, storage, utils } from '@powersync/service-core'; +import { BucketDataRequest, InternalOpId, JwtPayload, storage, utils } from '@powersync/service-core'; import { GetQuerierOptions, RequestParameters, @@ -136,6 +136,13 @@ export function querierOptions(globalParameters: RequestParameters): GetQuerierO }; } +export function requestParameters( + jwtPayload: Record, + clientParameters?: Record +): RequestParameters { + return new RequestParameters(new JwtPayload(jwtPayload), clientParameters ?? {}); +} + function isParsedSyncRules( syncRules: storage.PersistedSyncRulesContent | storage.PersistedSyncRules ): syncRules is storage.PersistedSyncRules { @@ -156,7 +163,7 @@ export function bucketRequest( bucket ??= 'global[]'; const definitionName = bucket.substring(0, bucket.indexOf('[')); const parameters = bucket.substring(bucket.indexOf('[')); - const source = parsed.sync_rules.bucketDataSources.find((b) => b.uniqueName === definitionName); + const source = parsed.sync_rules.config.bucketDataSources.find((b) => b.uniqueName === definitionName); if (source == null) { throw new Error(`Failed to find global bucket ${bucket}`); diff --git a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts index 42fbd4c2d..d2ced62a1 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts @@ -1,5 +1,5 @@ -import { JwtPayload, storage } from '@powersync/service-core'; -import { RequestParameters, ScopedParameterLookup, SqliteJsonRow } from '@powersync/service-sync-rules'; +import { storage } from '@powersync/service-core'; +import { ScopedParameterLookup, SqliteJsonRow } from '@powersync/service-sync-rules'; import { expect, test } from 'vitest'; import * as test_utils from '../test-utils/test-utils-index.js'; @@ -62,7 +62,7 @@ bucket_definitions: const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = new RequestParameters({ sub: 'user1' }, {}); + const parameters = test_utils.requestParameters({ sub: 'user1' }); const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; const parameter_sets = await querier.queryDynamicBucketDescriptions(checkpoint); expect(parameter_sets).toMatchObject([{ bucket: expect.stringMatching(/"group1a"/) }]); @@ -109,7 +109,7 @@ bucket_definitions: const checkpoint2 = await bucketStorage.getCheckpoint(); const querier = hydrated.getBucketParameterQuerier( - test_utils.querierOptions(new RequestParameters({ sub: 'user1' }, {})) + test_utils.querierOptions(test_utils.requestParameters({ sub: 'user1' })) ).querier; const buckets1 = await querier.queryDynamicBucketDescriptions(checkpoint2); @@ -188,7 +188,7 @@ bucket_definitions: // association of `list1`::`todo2` const querier = hydrated.getBucketParameterQuerier( test_utils.querierOptions( - new RequestParameters({ sub: 'user1', parameters: { list_id: ['list1', 'list2'] } }, {}) + test_utils.requestParameters({ sub: 'user1', parameters: { list_id: ['list1', 'list2'] } }) ) ).querier; const checkpoint = await bucketStorage.getCheckpoint(); @@ -240,7 +240,7 @@ bucket_definitions: const querier1 = hydrated.getBucketParameterQuerier( test_utils.querierOptions( - new RequestParameters({ sub: 'user1', parameters: { n1: 314n, f2: 314, f3: 3.14 } }, {}) + test_utils.requestParameters({ sub: 'user1', parameters: { n1: 314n, f2: 314, f3: 3.14 } }) ) ).querier; const buckets1 = await querier1.queryDynamicBucketDescriptions(checkpoint); @@ -248,14 +248,16 @@ bucket_definitions: const querier2 = hydrated.getBucketParameterQuerier( test_utils.querierOptions( - new RequestParameters({ sub: 'user1', parameters: { n1: 314, f2: 314n, f3: 3.14 } }, {}) + test_utils.requestParameters({ sub: 'user1', parameters: { n1: 314, f2: 314n, f3: 3.14 } }) ) ).querier; const buckets2 = await querier2.queryDynamicBucketDescriptions(checkpoint); expect(buckets2).toMatchObject([{ bucket: expect.stringMatching(/"group1"/), definition: 'mybucket' }]); const querier3 = hydrated.getBucketParameterQuerier( - test_utils.querierOptions(new RequestParameters({ sub: 'user1', parameters: { n1: 314n, f2: 314, f3: 3 } }, {})) + test_utils.querierOptions( + test_utils.requestParameters({ sub: 'user1', parameters: { n1: 314n, f2: 314, f3: 3 } }) + ) ).querier; const buckets3 = await querier3.queryDynamicBucketDescriptions(checkpoint); expect(buckets3).toEqual([]); @@ -310,7 +312,9 @@ bucket_definitions: const checkpoint = await bucketStorage.getCheckpoint(); const querier = hydrated.getBucketParameterQuerier( - test_utils.querierOptions(new RequestParameters({ sub: 'user1', parameters: { n1: 1152921504606846976n } }, {})) + test_utils.querierOptions( + test_utils.requestParameters({ sub: 'user1', parameters: { n1: 1152921504606846976n } }) + ) ).querier; const buckets = await querier.queryDynamicBucketDescriptions(checkpoint); expect(buckets.map(test_utils.removeSourceSymbol)).toMatchObject([ @@ -351,7 +355,7 @@ bucket_definitions: await writer.commit('1/1'); const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = new RequestParameters(new JwtPayload({ sub: 'u1' }), {}); + const parameters = test_utils.requestParameters({ sub: 'u1' }); const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; @@ -427,7 +431,7 @@ bucket_definitions: const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = new RequestParameters(new JwtPayload({ sub: 'unknown' }), {}); + const parameters = test_utils.requestParameters({ sub: 'unknown' }); const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; @@ -523,7 +527,7 @@ bucket_definitions: const checkpoint = await bucketStorage.getCheckpoint(); - const parameters = new RequestParameters(new JwtPayload({ sub: 'u1' }), {}); + const parameters = test_utils.requestParameters({ sub: 'u1' }); // Test intermediate values - could be moved to sync_rules.test.ts const querier = hydrated.getBucketParameterQuerier(test_utils.querierOptions(parameters)).querier; @@ -588,7 +592,7 @@ bucket_definitions: const checkpoint = await bucketStorage.getCheckpoint(); const querier = hydrated.getBucketParameterQuerier( - test_utils.querierOptions(new RequestParameters({ sub: 'user1' }, {})) + test_utils.querierOptions(test_utils.requestParameters({ sub: 'user1' })) ).querier; const parameters = await querier.queryDynamicBucketDescriptions(checkpoint); expect(parameters).toEqual([]); diff --git a/packages/service-core/test/src/sync/BucketChecksumState.test.ts b/packages/service-core/test/src/sync/BucketChecksumState.test.ts index fbf4436d5..b9286877c 100644 --- a/packages/service-core/test/src/sync/BucketChecksumState.test.ts +++ b/packages/service-core/test/src/sync/BucketChecksumState.test.ts @@ -20,7 +20,8 @@ import { ScopedParameterLookup, SqliteJsonRow, SqlSyncRules, - TablePattern + TablePattern, + versionedHydrationState } from '@powersync/service-sync-rules'; import { beforeEach, describe, expect, test } from 'vitest'; import { removeSource, removeSourceSymbol } from '../utils.js'; diff --git a/packages/sync-rules/test/src/parameter_queries.test.ts b/packages/sync-rules/test/src/parameter_queries.test.ts index e1e703bce..8ac82a7f9 100644 --- a/packages/sync-rules/test/src/parameter_queries.test.ts +++ b/packages/sync-rules/test/src/parameter_queries.test.ts @@ -124,13 +124,15 @@ describe('parameter queries', () => { // We _do_ need to care about the bucket string representation. expect( query.resolveBucketDescriptions([{ int1: 314, float1: 3.14, float2: 314 }], requestParameters({}), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([{ bucket: 'mybucket[314,3.14,314]', priority: 3 }]); expect( query.resolveBucketDescriptions([{ int1: 314n, float1: 3.14, float2: 314 }], requestParameters({}), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([{ bucket: 'mybucket[314,3.14,314]', priority: 3 }]); }); @@ -494,7 +496,7 @@ describe('parameter queries', () => { query.resolveBucketDescriptions( [{ user_id: 'user1' }], requestParameters({ sub: 'user1', parameters: { is_admin: true } }), - { bucketPrefix: 'mybucket' } + { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE } ) ).toEqual([{ bucket: 'mybucket["user1",1]', priority: 3 }]); }); @@ -873,12 +875,13 @@ describe('parameter queries', () => { describe('custom hydrationState', function () { const hydrationState: HydrationState = { getBucketSourceScope(source) { - return { bucketPrefix: `${source.uniqueName}-test` }; + return { bucketPrefix: `${source.uniqueName}-test`, source }; }, getParameterIndexLookupScope(source) { return { lookupName: `${source.defaultLookupScope.lookupName}.test`, - queryId: `${source.defaultLookupScope.queryId}.test` + queryId: `${source.defaultLookupScope.queryId}.test`, + source }; } }; @@ -906,13 +909,17 @@ describe('parameter queries', () => { }); expect(result).toEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test' }, ['test-user']), + lookup: ScopedParameterLookup.direct( + { lookupName: 'mybucket.test', queryId: 'myquery.test', source: {} as any }, + ['test-user'] + ), bucketParameters: [{ group_id: 'group1' }] }, { - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test' }, [ - 'other-user' - ]), + lookup: ScopedParameterLookup.direct( + { lookupName: 'mybucket.test', queryId: 'myquery.test', source: {} as any }, + ['other-user'] + ), bucketParameters: [{ group_id: 'group1' }] } ]); @@ -944,7 +951,9 @@ describe('parameter queries', () => { const querier = queriers[0]; expect(querier.hasDynamicBuckets).toBeTruthy(); expect(await findQuerierLookups(querier)).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test' }, ['test-user']) + ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test', source: {} as any }, [ + 'test-user' + ]) ]); }); }); diff --git a/packages/sync-rules/test/src/static_parameter_queries.test.ts b/packages/sync-rules/test/src/static_parameter_queries.test.ts index 6b6c66dcf..b025949b5 100644 --- a/packages/sync-rules/test/src/static_parameter_queries.test.ts +++ b/packages/sync-rules/test/src/static_parameter_queries.test.ts @@ -6,7 +6,8 @@ import { EMPTY_DATA_SOURCE, PARSE_OPTIONS, requestParameters } from './util.js'; describe('static parameter queries', () => { const MYBUCKET_SCOPE: BucketDataScope = { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }; test('basic query', function () { @@ -38,7 +39,8 @@ describe('static parameter queries', () => { expect(query.bucketParameters!).toEqual(['user_id']); expect( query.getStaticBucketDescriptions(requestParameters({ sub: 'user1' }), { - bucketPrefix: '1#mybucket' + bucketPrefix: '1#mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([{ bucket: '1#mybucket["user1"]', priority: 3 }]); }); @@ -479,12 +481,13 @@ describe('static parameter queries', () => { const hydrationState: HydrationState = { getBucketSourceScope(source) { - return { bucketPrefix: `${source.uniqueName}-test` }; + return { bucketPrefix: `${source.uniqueName}-test`, source }; }, getParameterIndexLookupScope(source) { return { lookupName: `${source.defaultLookupScope.lookupName}.test`, - queryId: `${source.defaultLookupScope.queryId}.test` + queryId: `${source.defaultLookupScope.queryId}.test`, + source }; } }; diff --git a/packages/sync-rules/test/src/streams.test.ts b/packages/sync-rules/test/src/streams.test.ts index 6f5709b12..20d979c1f 100644 --- a/packages/sync-rules/test/src/streams.test.ts +++ b/packages/sync-rules/test/src/streams.test.ts @@ -29,11 +29,13 @@ import { normalizeQuerierOptions, PARSE_OPTIONS, requestParameters, TestSourceTa describe('streams', () => { const STREAM_0: ParameterLookupScope = { lookupName: 'stream', - queryId: '0' + queryId: '0', + source: {} as any }; const STREAM_1: ParameterLookupScope = { lookupName: 'stream', - queryId: '1' + queryId: '1', + source: {} as any }; test('refuses edition: 1', () => { @@ -760,7 +762,7 @@ describe('streams', () => { parameters: {}, getParameterSets(lookups) { expect(lookups).toStrictEqual([ - ScopedParameterLookup.direct({ lookupName: 'account_member', queryId: '0' }, ['id']) + ScopedParameterLookup.direct({ lookupName: 'account_member', queryId: '0', source: {} as any }, ['id']) ]); return [{ result: 'account_id' }]; } @@ -973,12 +975,13 @@ WHERE const hydrationState: HydrationState = { getBucketSourceScope(source) { - return { bucketPrefix: `${source.uniqueName}.test` }; + return { bucketPrefix: `${source.uniqueName}.test`, source }; }, getParameterIndexLookupScope(source) { return { lookupName: `${source.defaultLookupScope.lookupName}.test`, - queryId: `${source.defaultLookupScope.queryId}.test` + queryId: `${source.defaultLookupScope.queryId}.test`, + source }; } }; @@ -997,7 +1000,9 @@ WHERE }) ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test' }, ['u1']), + lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test', source: {} as any }, [ + 'u1' + ]), bucketParameters: [ { result: 'i1' @@ -1006,7 +1011,9 @@ WHERE }, { - lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '1.test' }, ['myname']), + lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '1.test', source: {} as any }, [ + 'myname' + ]), bucketParameters: [ { result: 'i1' @@ -1022,7 +1029,9 @@ WHERE }) ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test' }, ['u1']), + lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test', source: {} as any }, [ + 'u1' + ]), bucketParameters: [ { result: 'i1' diff --git a/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts b/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts index ccd0d6854..b730f3f8a 100644 --- a/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts +++ b/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts @@ -213,7 +213,7 @@ streams: expect(desc.evaluateParameterRow(ISSUES, { id: 'issue_id', owner_id: 'user1', name: 'name' })).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0' }, ['user1']), + lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: {} as any }, ['user1']), bucketParameters: [ { '0': 'issue_id' @@ -348,7 +348,8 @@ streams: ScopedParameterLookup.direct( { lookupName: 'lookup', - queryId: '0' + queryId: '0', + source: {} as any }, ['user'] ) @@ -361,7 +362,8 @@ streams: ScopedParameterLookup.direct( { lookupName: 'lookup', - queryId: '1' + queryId: '1', + source: {} as any }, ['name'] ) diff --git a/packages/sync-rules/test/src/sync_plan/evaluator/table_valued.test.ts b/packages/sync-rules/test/src/sync_plan/evaluator/table_valued.test.ts index e8bab6c03..76a8bfac6 100644 --- a/packages/sync-rules/test/src/sync_plan/evaluator/table_valued.test.ts +++ b/packages/sync-rules/test/src/sync_plan/evaluator/table_valued.test.ts @@ -46,7 +46,7 @@ streams: desc.evaluateParameterRow(conversations, { id: 'chat', members: JSON.stringify(['user', 'another']) }) ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0' }, ['chat']), + lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: {} as any }, ['chat']), bucketParameters: [ { '0': 'user' @@ -54,7 +54,7 @@ streams: ] }, { - lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0' }, ['chat']), + lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: {} as any }, ['chat']), bucketParameters: [ { '0': 'another' @@ -82,7 +82,8 @@ streams: ScopedParameterLookup.direct( { lookupName: 'lookup', - queryId: '0' + queryId: '0', + source: {} as any }, ['chat'] ) diff --git a/packages/sync-rules/test/src/sync_rules.test.ts b/packages/sync-rules/test/src/sync_rules.test.ts index 2b11df144..de3531925 100644 --- a/packages/sync-rules/test/src/sync_rules.test.ts +++ b/packages/sync-rules/test/src/sync_rules.test.ts @@ -114,7 +114,7 @@ bucket_definitions: expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 1 })).toEqual([ { bucketParameters: [{}], - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket', queryId: '1' }, ['user1']) + lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket', queryId: '1', source: {} as any }, ['user1']) } ]); expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 0 })).toEqual([]); @@ -184,12 +184,13 @@ bucket_definitions: ); const hydrationState: HydrationState = { getBucketSourceScope(source) { - return { bucketPrefix: `${source.uniqueName}-test` }; + return { bucketPrefix: `${source.uniqueName}-test`, source }; }, getParameterIndexLookupScope(source) { return { lookupName: `${source.defaultLookupScope.lookupName}.test`, - queryId: `${source.defaultLookupScope.queryId}.test` + queryId: `${source.defaultLookupScope.queryId}.test`, + source }; } }; @@ -207,13 +208,15 @@ bucket_definitions: } ]); expect(await findQuerierLookups(querier)).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test' }, ['user1']) + ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test', source: {} as any }, ['user1']) ]); expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 1 })).toEqual([ { bucketParameters: [{ user_id: 'user1' }], - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test' }, ['user1']) + lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test', source: {} as any }, [ + 'user1' + ]) } ]); @@ -1044,7 +1047,7 @@ bucket_definitions: }); expect(await findQuerierLookups(hydratedQuerier)).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'admin_only', queryId: '1' }, [1]) + ScopedParameterLookup.direct({ lookupName: 'admin_only', queryId: '1', source: {} as any }, [1]) ]); }); diff --git a/packages/sync-rules/test/src/table_valued_function_queries.test.ts b/packages/sync-rules/test/src/table_valued_function_queries.test.ts index 411e1d701..e81db1b84 100644 --- a/packages/sync-rules/test/src/table_valued_function_queries.test.ts +++ b/packages/sync-rules/test/src/table_valued_function_queries.test.ts @@ -30,7 +30,8 @@ describe('table-valued function queries', () => { expect( query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3, null] }), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, @@ -61,7 +62,8 @@ describe('table-valued function queries', () => { expect( query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3, null] }), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, @@ -85,7 +87,8 @@ describe('table-valued function queries', () => { expect( query.getStaticBucketDescriptions(requestParameters({}, {}), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, @@ -108,7 +111,8 @@ describe('table-valued function queries', () => { expect( query.getStaticBucketDescriptions(requestParameters({}, {}), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([]); }); @@ -130,7 +134,8 @@ describe('table-valued function queries', () => { expect( query.getStaticBucketDescriptions(requestParameters({}, {}), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([]); }); @@ -152,7 +157,8 @@ describe('table-valued function queries', () => { expect( query.getStaticBucketDescriptions(requestParameters({}, {}), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([]); }); @@ -171,7 +177,8 @@ describe('table-valued function queries', () => { expect( query.getStaticBucketDescriptions(requestParameters({}, {}), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket["a"]', priority: 3 }, @@ -197,7 +204,8 @@ describe('table-valued function queries', () => { expect( query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3] }), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, @@ -223,7 +231,8 @@ describe('table-valued function queries', () => { expect( query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3] }), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket[1]', priority: 3 }, @@ -249,7 +258,8 @@ describe('table-valued function queries', () => { expect( query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3] }), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE }) ).toEqual([ { bucket: 'mybucket[2]', priority: 3 }, @@ -285,7 +295,8 @@ describe('table-valued function queries', () => { {} ), { - bucketPrefix: 'mybucket' + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE } ) ).toEqual([{ bucket: 'mybucket[1]', priority: 3 }]); From dfe726ac2dfc28df158141780bf94ec333c2d9e2 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 17 Feb 2026 13:42:35 +0200 Subject: [PATCH 098/101] Fix sync-rule tests. --- .../sync-rules/test/src/compatibility.test.ts | 156 +++++++++++------- .../test/src/parameter_queries.test.ts | 68 +++++--- .../test/src/static_parameter_queries.test.ts | 92 ++++++----- packages/sync-rules/test/src/streams.test.ts | 125 +++++++++----- .../src/sync_plan/evaluator/evaluator.test.ts | 116 +++++++------ .../sync_plan/evaluator/table_valued.test.ts | 38 +++-- .../sync-rules/test/src/sync_rules.test.ts | 92 +++++++---- .../src/table_valued_function_queries.test.ts | 33 ++-- 8 files changed, 442 insertions(+), 278 deletions(-) diff --git a/packages/sync-rules/test/src/compatibility.test.ts b/packages/sync-rules/test/src/compatibility.test.ts index 16973cf33..92ecdcbdc 100644 --- a/packages/sync-rules/test/src/compatibility.test.ts +++ b/packages/sync-rules/test/src/compatibility.test.ts @@ -10,7 +10,7 @@ import { } from '../../src/index.js'; import { versionedHydrationState } from '../../src/HydrationState.js'; -import { ASSETS, normalizeQuerierOptions, PARSE_OPTIONS } from './util.js'; +import { ASSETS, normalizeQuerierOptions, PARSE_OPTIONS, removeSource, removeSourceSymbol } from './util.js'; describe('compatibility options', () => { describe('timestamps', () => { @@ -31,13 +31,15 @@ bucket_definitions: ).config.hydrate(); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: value + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: value + }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: 'mybucket[]', data: { description: '2025-08-19 09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); @@ -58,13 +60,15 @@ config: ).config.hydrate(); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: value + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: value + }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: 'mybucket[]', data: { description: '2025-08-19T09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); @@ -85,18 +89,24 @@ config: ).config.hydrate({ hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: value + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: value + }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: '1#stream|0[]', data: { description: '2025-08-19T09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); - expect(rules.getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})).querier.staticBuckets).toStrictEqual([ + expect( + rules + .getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})) + .querier.staticBuckets.map(removeSourceSymbol) + ).toStrictEqual([ { bucket: '1#stream|0[]', definition: 'stream', @@ -123,17 +133,23 @@ config: ).config.hydrate({ hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: value + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: value + }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: 'stream|0[]', data: { description: '2025-08-19 09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); - expect(rules.getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})).querier.staticBuckets).toStrictEqual([ + expect( + rules + .getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})) + .querier.staticBuckets.map(removeSourceSymbol) + ).toStrictEqual([ { bucket: 'stream|0[]', definition: 'stream', @@ -160,13 +176,15 @@ config: ).config.hydrate({ hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: { - id: 'id', - description: 'desc' - } - }) + rules + .evaluateRow({ + sourceTable: ASSETS, + record: { + id: 'id', + description: 'desc' + } + }) + .map(removeSource) ).toStrictEqual([{ bucket: '1#mybucket[]', data: { description: 'desc', id: 'id' }, id: 'id', table: 'assets' }]); }); @@ -184,16 +202,18 @@ config: ).config.hydrate({ hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: new DateTimeValue('2025-08-19T09:21:00Z', undefined, { - subSecondPrecision: TimeValuePrecision.seconds, - defaultSubSecondPrecision: TimeValuePrecision.seconds + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: new DateTimeValue('2025-08-19T09:21:00Z', undefined, { + subSecondPrecision: TimeValuePrecision.seconds, + defaultSubSecondPrecision: TimeValuePrecision.seconds + }) }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: '1#stream|0[]', data: { description: '2025-08-19T09:21:00Z', id: 'id' }, id: 'id', table: 'assets' } ]); @@ -214,13 +234,15 @@ bucket_definitions: ).config.hydrate(); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: { - id: 'id', - description: description - } - }) + rules + .evaluateRow({ + sourceTable: ASSETS, + record: { + id: 'id', + description: description + } + }) + .map(removeSource) ).toStrictEqual([{ bucket: 'a[]', data: { desc: 'baz', id: 'id' }, id: 'id', table: 'assets' }]); }); @@ -238,13 +260,15 @@ config: ).config.hydrate(); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: { - id: 'id', - description: description - } - }) + rules + .evaluateRow({ + sourceTable: ASSETS, + record: { + id: 'id', + description: description + } + }) + .map(removeSource) ).toStrictEqual([{ bucket: 'a[]', data: { desc: null, id: 'id' }, id: 'id', table: 'assets' }]); }); }); @@ -294,13 +318,15 @@ config: hydrationState: versionedHydrationState(1) }); expect( - rules.evaluateRow({ - sourceTable: ASSETS, - record: rules.applyRowContext({ - id: 'id', - description: data + rules + .evaluateRow({ + sourceTable: ASSETS, + record: rules.applyRowContext({ + id: 'id', + description: data + }) }) - }) + .map(removeSource) ).toStrictEqual([ { bucket: withFixedQuirk ? '1#mybucket[]' : 'mybucket[]', @@ -315,7 +341,11 @@ config: } ]); - expect(rules.getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})).querier.staticBuckets).toStrictEqual([ + expect( + rules + .getBucketParameterQuerier(normalizeQuerierOptions({}, {}, {})) + .querier.staticBuckets.map(removeSourceSymbol) + ).toStrictEqual([ { bucket: withFixedQuirk ? '1#mybucket[]' : 'mybucket[]', definition: 'mybucket', diff --git a/packages/sync-rules/test/src/parameter_queries.test.ts b/packages/sync-rules/test/src/parameter_queries.test.ts index 8ac82a7f9..c2b00b123 100644 --- a/packages/sync-rules/test/src/parameter_queries.test.ts +++ b/packages/sync-rules/test/src/parameter_queries.test.ts @@ -12,7 +12,15 @@ import { UnscopedParameterLookup } from '../../src/index.js'; import { StaticSqlParameterQuery } from '../../src/StaticSqlParameterQuery.js'; -import { BASIC_SCHEMA, EMPTY_DATA_SOURCE, findQuerierLookups, PARSE_OPTIONS, requestParameters } from './util.js'; +import { + BASIC_SCHEMA, + EMPTY_DATA_SOURCE, + findQuerierLookups, + PARSE_OPTIONS, + removeSource, + removeSourceSymbol, + requestParameters +} from './util.js'; describe('parameter queries', () => { const table = (name: string): SourceTableInterface => ({ @@ -123,17 +131,21 @@ describe('parameter queries', () => { // We _do_ need to care about the bucket string representation. expect( - query.resolveBucketDescriptions([{ int1: 314, float1: 3.14, float2: 314 }], requestParameters({}), { - bucketPrefix: 'mybucket', - source: EMPTY_DATA_SOURCE - }) + query + .resolveBucketDescriptions([{ int1: 314, float1: 3.14, float2: 314 }], requestParameters({}), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE + }) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[314,3.14,314]', priority: 3 }]); expect( - query.resolveBucketDescriptions([{ int1: 314n, float1: 3.14, float2: 314 }], requestParameters({}), { - bucketPrefix: 'mybucket', - source: EMPTY_DATA_SOURCE - }) + query + .resolveBucketDescriptions([{ int1: 314n, float1: 3.14, float2: 314 }], requestParameters({}), { + bucketPrefix: 'mybucket', + source: EMPTY_DATA_SOURCE + }) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[314,3.14,314]', priority: 3 }]); }); @@ -493,11 +505,13 @@ describe('parameter queries', () => { ]); expect( - query.resolveBucketDescriptions( - [{ user_id: 'user1' }], - requestParameters({ sub: 'user1', parameters: { is_admin: true } }), - { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE } - ) + query + .resolveBucketDescriptions( + [{ user_id: 'user1' }], + requestParameters({ sub: 'user1', parameters: { is_admin: true } }), + { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE } + ) + .map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket["user1",1]', priority: 3 }]); }); @@ -907,18 +921,20 @@ describe('parameter queries', () => { id: 'group1', user_ids: JSON.stringify(['test-user', 'other-user']) }); - expect(result).toEqual([ + expect(result.map((entry) => ({ ...entry, lookup: removeSource(entry.lookup) }))).toEqual([ { - lookup: ScopedParameterLookup.direct( - { lookupName: 'mybucket.test', queryId: 'myquery.test', source: {} as any }, - ['test-user'] + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test', source: {} as any }, [ + 'test-user' + ]) ), bucketParameters: [{ group_id: 'group1' }] }, { - lookup: ScopedParameterLookup.direct( - { lookupName: 'mybucket.test', queryId: 'myquery.test', source: {} as any }, - ['other-user'] + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test', source: {} as any }, [ + 'other-user' + ]) ), bucketParameters: [{ group_id: 'group1' }] } @@ -950,10 +966,12 @@ describe('parameter queries', () => { const querier = queriers[0]; expect(querier.hasDynamicBuckets).toBeTruthy(); - expect(await findQuerierLookups(querier)).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test', source: {} as any }, [ - 'test-user' - ]) + expect((await findQuerierLookups(querier)).map(removeSource)).toEqual([ + removeSource( + ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test', source: {} as any }, [ + 'test-user' + ]) + ) ]); }); }); diff --git a/packages/sync-rules/test/src/static_parameter_queries.test.ts b/packages/sync-rules/test/src/static_parameter_queries.test.ts index b025949b5..7a648b2ff 100644 --- a/packages/sync-rules/test/src/static_parameter_queries.test.ts +++ b/packages/sync-rules/test/src/static_parameter_queries.test.ts @@ -2,7 +2,7 @@ import { describe, expect, test } from 'vitest'; import { BucketDataScope, HydrationState } from '../../src/HydrationState.js'; import { BucketParameterQuerier, GetQuerierOptions, QuerierError, SqlParameterQuery } from '../../src/index.js'; import { StaticSqlParameterQuery } from '../../src/StaticSqlParameterQuery.js'; -import { EMPTY_DATA_SOURCE, PARSE_OPTIONS, requestParameters } from './util.js'; +import { EMPTY_DATA_SOURCE, PARSE_OPTIONS, removeSourceSymbol, requestParameters } from './util.js'; describe('static parameter queries', () => { const MYBUCKET_SCOPE: BucketDataScope = { @@ -10,6 +10,14 @@ describe('static parameter queries', () => { source: EMPTY_DATA_SOURCE }; + function getStaticBucketDescriptions( + query: StaticSqlParameterQuery, + parameters: ReturnType, + scope: BucketDataScope + ) { + return query.getStaticBucketDescriptions(parameters, scope).map(removeSourceSymbol); + } + test('basic query', function () { const sql = 'SELECT token_parameters.user_id'; const query = SqlParameterQuery.fromSql( @@ -21,7 +29,7 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect(query.bucketParameters!).toEqual(['user_id']); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["user1"]', priority: 3 } ]); }); @@ -38,7 +46,7 @@ describe('static parameter queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters!).toEqual(['user_id']); expect( - query.getStaticBucketDescriptions(requestParameters({ sub: 'user1' }), { + getStaticBucketDescriptions(query, requestParameters({ sub: 'user1' }), { bucketPrefix: '1#mybucket', source: EMPTY_DATA_SOURCE }) @@ -56,7 +64,7 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect(query.bucketParameters!).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[]', priority: 3 } ]); }); @@ -72,13 +80,15 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: 'user1', parameters: { is_admin: true } }), MYBUCKET_SCOPE ) ).toEqual([{ bucket: 'mybucket["user1"]', priority: 3 }]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: 'user1', parameters: { is_admin: false } }), MYBUCKET_SCOPE ) @@ -95,7 +105,7 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["USER1"]', priority: 3 } ]); expect(query.bucketParameters!).toEqual(['upper_id']); @@ -112,13 +122,15 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: 'admin', parameters: { role: 'admin' } }), MYBUCKET_SCOPE ) ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: 'user', parameters: { role: 'user' } }), MYBUCKET_SCOPE ) @@ -136,10 +148,10 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions(requestParameters({ parameters: { id1: 't1', id2: 't1' } }), MYBUCKET_SCOPE) + getStaticBucketDescriptions(query, requestParameters({ parameters: { id1: 't1', id2: 't1' } }), MYBUCKET_SCOPE) ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); expect( - query.getStaticBucketDescriptions(requestParameters({ parameters: { id1: 't1', id2: 't2' } }), MYBUCKET_SCOPE) + getStaticBucketDescriptions(query, requestParameters({ parameters: { id1: 't1', id2: 't2' } }), MYBUCKET_SCOPE) ).toEqual([]); }); @@ -157,7 +169,7 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({}, { org_id: 'test' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({}, { org_id: 'test' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["test"]', priority: 3 } ]); }); @@ -174,16 +186,16 @@ describe('static parameter queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['user_id']); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["user1"]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 123 }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 123 }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[123]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: true }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: true }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[1]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: { a: 123 } }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: { a: 123 } }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[\"{\\\"a\\\":123.0}\"]', priority: 3 } ]); }); @@ -201,16 +213,16 @@ describe('static parameter queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['user_id']); - expect(query.getStaticBucketDescriptions(requestParameters({ other: 'user1' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ other: 'user1' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["user1"]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ other: 123 }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ other: 123 }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[123]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ other: true }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ other: true }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[1]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ other: { a: 123 } }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ other: { a: 123 } }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[\"{\\\"a\\\":123.0}\"]', priority: 3 } ]); }); @@ -228,7 +240,7 @@ describe('static parameter queries', () => { expect(query.bucketParameters).toEqual(['email']); expect( - query.getStaticBucketDescriptions(requestParameters({ sub: { email: 'a@example.org' } }), MYBUCKET_SCOPE) + getStaticBucketDescriptions(query, requestParameters({ sub: { email: 'a@example.org' } }), MYBUCKET_SCOPE) ).toEqual([{ bucket: 'mybucket["a@example.org"]', priority: 3 }]); }); @@ -244,18 +256,18 @@ describe('static parameter queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['user_id']); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 'user1' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["user1"]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 123 }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 123 }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[123]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: true }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: true }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[1]', priority: 3 } ]); // This is not expected to be used - we just document the current behavior - expect(query.getStaticBucketDescriptions(requestParameters({ sub: { a: 123 } }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: { a: 123 } }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[\"{\\\"a\\\":123.0}\"]', priority: 3 } ]); }); @@ -272,13 +284,13 @@ describe('static parameter queries', () => { expect(query.errors).toEqual([]); expect(query.bucketParameters).toEqual(['user_id']); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: '123' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: '123' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["text"]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: 123 }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: 123 }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["real"]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ sub: true }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ sub: true }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket["integer"]', priority: 3 } ]); }); @@ -293,7 +305,7 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({}), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({}), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[]', priority: 3 } ]); }); @@ -308,7 +320,7 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({}), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({}), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[]', priority: 3 } ]); }); @@ -323,7 +335,7 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({}), MYBUCKET_SCOPE)).toEqual([]); + expect(getStaticBucketDescriptions(query, requestParameters({}), MYBUCKET_SCOPE)).toEqual([]); }); test('static IN expression', function () { @@ -336,7 +348,7 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({}), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({}), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[]', priority: 3 } ]); }); @@ -353,13 +365,15 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: '', permissions: ['write', 'read:users'] }), MYBUCKET_SCOPE ) ).toEqual([{ bucket: 'mybucket[1]', priority: 3 }]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: '', permissions: ['write', 'write:users'] }), MYBUCKET_SCOPE ) @@ -378,13 +392,15 @@ describe('static parameter queries', () => { ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: '', permissions: ['write', 'read:users'] }), MYBUCKET_SCOPE ) ).toEqual([{ bucket: 'mybucket[]', priority: 3 }]); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters({ sub: '', permissions: ['write', 'write:users'] }), MYBUCKET_SCOPE ) @@ -401,10 +417,10 @@ describe('static parameter queries', () => { EMPTY_DATA_SOURCE ) as StaticSqlParameterQuery; expect(query.errors).toEqual([]); - expect(query.getStaticBucketDescriptions(requestParameters({ role: 'superuser' }), MYBUCKET_SCOPE)).toEqual([ + expect(getStaticBucketDescriptions(query, requestParameters({ role: 'superuser' }), MYBUCKET_SCOPE)).toEqual([ { bucket: 'mybucket[]', priority: 3 } ]); - expect(query.getStaticBucketDescriptions(requestParameters({ role: 'superadmin' }), MYBUCKET_SCOPE)).toEqual([]); + expect(getStaticBucketDescriptions(query, requestParameters({ role: 'superadmin' }), MYBUCKET_SCOPE)).toEqual([]); }); test('case-sensitive queries (1)', () => { diff --git a/packages/sync-rules/test/src/streams.test.ts b/packages/sync-rules/test/src/streams.test.ts index 20d979c1f..eeb4fe459 100644 --- a/packages/sync-rules/test/src/streams.test.ts +++ b/packages/sync-rules/test/src/streams.test.ts @@ -24,7 +24,14 @@ import { syncStreamFromSql, ScopedParameterLookup } from '../../src/index.js'; -import { normalizeQuerierOptions, PARSE_OPTIONS, requestParameters, TestSourceTable } from './util.js'; +import { + normalizeQuerierOptions, + PARSE_OPTIONS, + removeSource, + removeSourceSymbol, + requestParameters, + TestSourceTable +} from './util.js'; describe('streams', () => { const STREAM_0: ParameterLookupScope = { @@ -38,6 +45,12 @@ describe('streams', () => { source: {} as any }; + function removeLookupSource( + entry: T + ): Omit & { lookup: any } { + return { ...entry, lookup: removeSource(entry.lookup) }; + } + test('refuses edition: 1', () => { expect(() => syncStreamFromSql('stream', 'SELECT * FROM comments', { @@ -92,7 +105,7 @@ describe('streams', () => { normalizeQuerierOptions({ parameters: { test: 'foo' } }, {}, { stream: [{ opaque_id: 0, parameters: null }] }) ); - expect(mergeBucketParameterQueriers(queriers).staticBuckets).toEqual([ + expect(mergeBucketParameterQueriers(queriers).staticBuckets.map(removeSourceSymbol)).toEqual([ { bucket: '1#stream|0["foo"]', definition: 'stream', @@ -232,13 +245,15 @@ describe('streams', () => { ]); expect( - debugHydratedMergedSource(desc, hydrationParams).evaluateParameterRow(ISSUES, { - id: 'i1', - owner_id: 'u1' - }) + debugHydratedMergedSource(desc, hydrationParams) + .evaluateParameterRow(ISSUES, { + id: 'i1', + owner_id: 'u1' + }) + .map(removeLookupSource) ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct(STREAM_0, ['u1']), + lookup: removeSource(ScopedParameterLookup.direct(STREAM_0, ['u1'])), bucketParameters: [ { result: 'i1' @@ -248,7 +263,7 @@ describe('streams', () => { ]); function getParameterSets(lookups: ScopedParameterLookup[]) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['u1'])]); + expect(lookups.map(removeSource)).toStrictEqual([removeSource(ScopedParameterLookup.direct(STREAM_0, ['u1']))]); return [{ result: 'i1' }]; } @@ -287,7 +302,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'user1' }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['user1'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['user1'])) + ]); return [{ result: 'issue_id' }]; } @@ -319,7 +336,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'u' }, getParameterSets: (lookups: ScopedParameterLookup[]) => { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['u'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['u'])) + ]); return [{ result: 'u' }]; } }) @@ -330,7 +349,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'u2' }, getParameterSets: (lookups: ScopedParameterLookup[]) => { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['u2'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['u2'])) + ]); return []; } }) @@ -350,9 +371,9 @@ describe('streams', () => { const source = debugHydratedMergedSource(desc, hydrationParams); - expect(source.evaluateParameterRow(FRIENDS, { user_a: 'a', user_b: 'b' })).toStrictEqual([ + expect(source.evaluateParameterRow(FRIENDS, { user_a: 'a', user_b: 'b' }).map(removeLookupSource)).toStrictEqual([ { - lookup: ScopedParameterLookup.direct(STREAM_0, ['b']), + lookup: removeSource(ScopedParameterLookup.direct(STREAM_0, ['b'])), bucketParameters: [ { result: 'a' @@ -360,7 +381,7 @@ describe('streams', () => { ] }, { - lookup: ScopedParameterLookup.direct(STREAM_1, ['a']), + lookup: removeSource(ScopedParameterLookup.direct(STREAM_1, ['a'])), bucketParameters: [ { result: 'b' @@ -373,10 +394,10 @@ describe('streams', () => { expect(lookups).toHaveLength(1); const [lookup] = lookups; if (lookup.values[1] == '0') { - expect(lookup).toStrictEqual(ScopedParameterLookup.direct(STREAM_0, ['a'])); + expect(removeSource(lookup)).toStrictEqual(removeSource(ScopedParameterLookup.direct(STREAM_0, ['a']))); return []; } else { - expect(lookup).toStrictEqual(ScopedParameterLookup.direct(STREAM_1, ['a'])); + expect(removeSource(lookup)).toStrictEqual(removeSource(ScopedParameterLookup.direct(STREAM_1, ['a']))); return [{ result: 'b' }]; } } @@ -416,7 +437,7 @@ describe('streams', () => { getParameterSets(lookups) { expect(lookups).toHaveLength(1); const [lookup] = lookups; - expect(lookup).toStrictEqual(ScopedParameterLookup.direct(STREAM_0, ['a'])); + expect(removeSource(lookup)).toStrictEqual(removeSource(ScopedParameterLookup.direct(STREAM_0, ['a']))); return [{ result: 'i1' }, { result: 'i2' }]; } }) @@ -476,7 +497,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'user1' }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['user1'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['user1'])) + ]); return [{ result: 'issue_id' }]; } @@ -636,7 +659,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'user1' }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['user1'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['user1'])) + ]); return [{ result: 'issue_id' }]; } @@ -691,7 +716,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'user1' }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['user1'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['user1'])) + ]); return [{ result: 'issue_id' }]; } @@ -701,7 +728,9 @@ describe('streams', () => { await queryBucketIds(desc, { tokenPayload: { sub: 'user1', is_admin: true }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['user1'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['user1'])) + ]); return [{ result: 'issue_id' }]; } @@ -761,8 +790,10 @@ describe('streams', () => { tokenPayload: { sub: 'id' }, parameters: {}, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ - ScopedParameterLookup.direct({ lookupName: 'account_member', queryId: '0', source: {} as any }, ['id']) + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource( + ScopedParameterLookup.direct({ lookupName: 'account_member', queryId: '0', source: {} as any }, ['id']) + ) ]); return [{ result: 'account_id' }]; } @@ -857,7 +888,9 @@ WHERE tokenPayload: { sub: 'user1', haystack_id: 1 }, parameters: { project: 'foo' }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, [1n, 'foo'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, [1n, 'foo'])) + ]); return [{ result: 'foo' }]; } }) @@ -954,7 +987,9 @@ WHERE parameters: { project: 'foo' }, globalParameters: { team_id: 'team' }, getParameterSets(lookups) { - expect(lookups).toStrictEqual([ScopedParameterLookup.direct(STREAM_0, ['team'])]); + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource(ScopedParameterLookup.direct(STREAM_0, ['team'])) + ]); return [{ result: 'user' }]; } }) @@ -993,16 +1028,18 @@ WHERE ).toStrictEqual(['stream|0.test["i1"]', 'stream|1.test["i1"]', 'stream|2.test["l1"]', 'stream|3.test[]']); expect( - hydrated.evaluateParameterRow(ISSUES, { - id: 'i1', - owner_id: 'u1', - name: 'myname' - }) + hydrated + .evaluateParameterRow(ISSUES, { + id: 'i1', + owner_id: 'u1', + name: 'myname' + }) + .map(removeLookupSource) ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test', source: {} as any }, [ - 'u1' - ]), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test', source: {} as any }, ['u1']) + ), bucketParameters: [ { result: 'i1' @@ -1011,9 +1048,9 @@ WHERE }, { - lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '1.test', source: {} as any }, [ - 'myname' - ]), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '1.test', source: {} as any }, ['myname']) + ), bucketParameters: [ { result: 'i1' @@ -1023,15 +1060,17 @@ WHERE ]); expect( - hydrated.evaluateParameterRow(ISSUES, { - id: 'i1', - owner_id: 'u1' - }) + hydrated + .evaluateParameterRow(ISSUES, { + id: 'i1', + owner_id: 'u1' + }) + .map(removeLookupSource) ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test', source: {} as any }, [ - 'u1' - ]), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'stream.test', queryId: '0.test', source: {} as any }, ['u1']) + ), bucketParameters: [ { result: 'i1' diff --git a/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts b/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts index b730f3f8a..8e5f091da 100644 --- a/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts +++ b/packages/sync-rules/test/src/sync_plan/evaluator/evaluator.test.ts @@ -8,7 +8,11 @@ import { SqliteRow, SqliteValue } from '../../../../src/index.js'; -import { requestParameters, TestSourceTable } from '../../util.js'; +import { removeSource, requestParameters, TestSourceTable } from '../../util.js'; + +function removeLookupSource(row: T): Omit & { lookup: any } { + return { ...row, lookup: removeSource(row.lookup) }; +} describe('evaluating rows', () => { syncTest('emits rows', ({ sync }) => { @@ -23,17 +27,19 @@ streams: `); expect( - desc.evaluateRow({ - sourceTable: USERS, - record: { - id: 'foo', - _double: 1, - _int: 1n, - _null: null, - _text: 'text', - _blob: new Uint8Array(10) // non-JSON columns should be removed - } - }) + desc + .evaluateRow({ + sourceTable: USERS, + record: { + id: 'foo', + _double: 1, + _int: 1n, + _null: null, + _text: 'text', + _blob: new Uint8Array(10) // non-JSON columns should be removed + } + }) + .map(removeSource) ).toStrictEqual([ { bucket: 'stream|0[]', @@ -108,12 +114,14 @@ streams: query: SELECT * FROM users u `); expect( - desc.evaluateRow({ - sourceTable: USERS, - record: { - id: 'foo' - } - }) + desc + .evaluateRow({ + sourceTable: USERS, + record: { + id: 'foo' + } + }) + .map(removeSource) ).toStrictEqual([ { bucket: 'stream|0[]', @@ -135,12 +143,14 @@ streams: query: SELECT * FROM "%" output `); expect( - desc.evaluateRow({ - sourceTable: USERS, - record: { - id: 'foo' - } - }) + desc + .evaluateRow({ + sourceTable: USERS, + record: { + id: 'foo' + } + }) + .map(removeSource) ).toStrictEqual([ { bucket: 'stream|0[]', @@ -162,12 +172,14 @@ streams: query: SELECT * FROM "%" `); expect( - desc.evaluateRow({ - sourceTable: USERS, - record: { - id: 'foo' - } - }) + desc + .evaluateRow({ + sourceTable: USERS, + record: { + id: 'foo' + } + }) + .map(removeSource) ).toStrictEqual([ { bucket: 'stream|0[]', @@ -211,9 +223,13 @@ streams: expect(desc.tableSyncsData(ISSUES)).toBeFalsy(); expect(desc.tableSyncsParameters(ISSUES)).toBeTruthy(); - expect(desc.evaluateParameterRow(ISSUES, { id: 'issue_id', owner_id: 'user1', name: 'name' })).toStrictEqual([ + expect( + desc.evaluateParameterRow(ISSUES, { id: 'issue_id', owner_id: 'user1', name: 'name' }).map(removeLookupSource) + ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: {} as any }, ['user1']), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: {} as any }, ['user1']) + ), bucketParameters: [ { '0': 'issue_id' @@ -344,28 +360,32 @@ streams: if (call == 0) { // First call. Lookup from users.id => users.name call++; - expect(lookups).toStrictEqual([ - ScopedParameterLookup.direct( - { - lookupName: 'lookup', - queryId: '0', - source: {} as any - }, - ['user'] + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource( + ScopedParameterLookup.direct( + { + lookupName: 'lookup', + queryId: '0', + source: {} as any + }, + ['user'] + ) ) ]); return [{ '0': 'name' }]; } else if (call == 1) { // Second call. Lookup from issues.owned_by => issues.id call++; - expect(lookups).toStrictEqual([ - ScopedParameterLookup.direct( - { - lookupName: 'lookup', - queryId: '1', - source: {} as any - }, - ['name'] + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource( + ScopedParameterLookup.direct( + { + lookupName: 'lookup', + queryId: '1', + source: {} as any + }, + ['name'] + ) ) ]); return [{ '0': 'issue' }]; diff --git a/packages/sync-rules/test/src/sync_plan/evaluator/table_valued.test.ts b/packages/sync-rules/test/src/sync_plan/evaluator/table_valued.test.ts index 76a8bfac6..c9a060afc 100644 --- a/packages/sync-rules/test/src/sync_plan/evaluator/table_valued.test.ts +++ b/packages/sync-rules/test/src/sync_plan/evaluator/table_valued.test.ts @@ -1,8 +1,12 @@ import { describe, expect } from 'vitest'; import { syncTest } from './utils.js'; -import { requestParameters, TestSourceTable } from '../../util.js'; +import { removeSource, requestParameters, TestSourceTable } from '../../util.js'; import { ScopedParameterLookup, SqliteJsonRow } from '../../../../src/index.js'; +function removeLookupSource(row: T): Omit & { lookup: any } { + return { ...row, lookup: removeSource(row.lookup) }; +} + describe('table-valued functions', () => { syncTest('as partition key', ({ sync }) => { const desc = sync.prepareSyncStreams(` @@ -16,7 +20,7 @@ streams: `); const sourceTable = new TestSourceTable('stores'); - expect(desc.evaluateRow({ sourceTable, record: { id: 'id', tags: '[1,2,3]' } })).toStrictEqual( + expect(desc.evaluateRow({ sourceTable, record: { id: 'id', tags: '[1,2,3]' } }).map(removeSource)).toStrictEqual( [1, 2, 3].map((e) => ({ bucket: `stream|0[${e}]`, data: { id: 'id' }, table: 's', id: 'id' })) ); }); @@ -43,10 +47,14 @@ streams: 'stream|0["user"]' ]); expect( - desc.evaluateParameterRow(conversations, { id: 'chat', members: JSON.stringify(['user', 'another']) }) + desc + .evaluateParameterRow(conversations, { id: 'chat', members: JSON.stringify(['user', 'another']) }) + .map(removeLookupSource) ).toStrictEqual([ { - lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: {} as any }, ['chat']), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: {} as any }, ['chat']) + ), bucketParameters: [ { '0': 'user' @@ -54,7 +62,9 @@ streams: ] }, { - lookup: ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: {} as any }, ['chat']), + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'lookup', queryId: '0', source: {} as any }, ['chat']) + ), bucketParameters: [ { '0': 'another' @@ -78,14 +88,16 @@ streams: const buckets = await querier.queryDynamicBucketDescriptions({ getParameterSets: async function (lookups: ScopedParameterLookup[]): Promise { - expect(lookups).toStrictEqual([ - ScopedParameterLookup.direct( - { - lookupName: 'lookup', - queryId: '0', - source: {} as any - }, - ['chat'] + expect(lookups.map(removeSource)).toStrictEqual([ + removeSource( + ScopedParameterLookup.direct( + { + lookupName: 'lookup', + queryId: '0', + source: {} as any + }, + ['chat'] + ) ) ]); diff --git a/packages/sync-rules/test/src/sync_rules.test.ts b/packages/sync-rules/test/src/sync_rules.test.ts index de3531925..0b682c481 100644 --- a/packages/sync-rules/test/src/sync_rules.test.ts +++ b/packages/sync-rules/test/src/sync_rules.test.ts @@ -12,9 +12,21 @@ import { USERS, findQuerierLookups, normalizeQuerierOptions, + removeSource, + removeSourceSymbol, requestParameters } from './util.js'; +function evaluateRows(hydrated: any, options: { sourceTable: any; record: any }) { + return hydrated.evaluateRow(options).map(removeSource); +} + +function removeLookupSource( + entry: T +): Omit & { lookup: any } { + return { ...entry, lookup: removeSource(entry.lookup) }; +} + describe('sync rules', () => { const hydrationParams: CreateSourceParams = { hydrationState: DEFAULT_HYDRATION_STATE }; @@ -42,7 +54,7 @@ bucket_definitions: expect(dataQuery.bucketParameters).toEqual([]); expect(dataQuery.columnOutputNames()).toEqual(['id', 'description']); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test' } }) @@ -111,10 +123,12 @@ bucket_definitions: PARSE_OPTIONS ); const hydrated = rules.hydrate(hydrationParams); - expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 1 })).toEqual([ + expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 1 }).map(removeLookupSource)).toEqual([ { bucketParameters: [{}], - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket', queryId: '1', source: {} as any }, ['user1']) + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'mybucket', queryId: '1', source: {} as any }, ['user1']) + ) } ]); expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 0 })).toEqual([]); @@ -135,14 +149,15 @@ bucket_definitions: const bucketData = rules.bucketDataSources[0]; expect(bucketData.bucketParameters).toEqual(['user_id', 'device_id']); expect( - hydrated.getBucketParameterQuerier(normalizeQuerierOptions({ sub: 'user1' }, { device_id: 'device1' })).querier - .staticBuckets + hydrated + .getBucketParameterQuerier(normalizeQuerierOptions({ sub: 'user1' }, { device_id: 'device1' })) + .querier.staticBuckets.map(removeSourceSymbol) ).toEqual([ { bucket: 'mybucket["user1","device1"]', definition: 'mybucket', inclusion_reasons: ['default'], priority: 3 } ]); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', user_id: 'user1', device_id: 'device1' } }) @@ -158,7 +173,7 @@ bucket_definitions: } ]); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', user_id: 'user1', archived: 1, device_id: 'device1' } }) @@ -199,7 +214,7 @@ bucket_definitions: normalizeQuerierOptions({ sub: 'user1' }, { device_id: 'device1' }) ); expect(errors).toEqual([]); - expect(querier.staticBuckets).toEqual([ + expect(querier.staticBuckets.map(removeSourceSymbol)).toEqual([ { bucket: 'mybucket-test["user1"]', definition: 'mybucket', @@ -207,21 +222,23 @@ bucket_definitions: priority: 3 } ]); - expect(await findQuerierLookups(querier)).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test', source: {} as any }, ['user1']) + expect((await findQuerierLookups(querier)).map(removeSource)).toEqual([ + removeSource( + ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test', source: {} as any }, ['user1']) + ) ]); - expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 1 })).toEqual([ + expect(hydrated.evaluateParameterRow(USERS, { id: 'user1', is_admin: 1 }).map(removeLookupSource)).toEqual([ { bucketParameters: [{ user_id: 'user1' }], - lookup: ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test', source: {} as any }, [ - 'user1' - ]) + lookup: removeSource( + ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: '2.test', source: {} as any }, ['user1']) + ) } ]); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', user_id: 'user1', device_id: 'device1' } }) @@ -252,12 +269,14 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); const bucketData = rules.bucketDataSources[0]; expect(bucketData.bucketParameters).toEqual(['user_id']); - expect(hydrated.getBucketParameterQuerier(normalizeQuerierOptions({ sub: 'user1' })).querier.staticBuckets).toEqual( - [{ bucket: 'mybucket["user1"]', definition: 'mybucket', inclusion_reasons: ['default'], priority: 3 }] - ); + expect( + hydrated + .getBucketParameterQuerier(normalizeQuerierOptions({ sub: 'user1' })) + .querier.staticBuckets.map(removeSourceSymbol) + ).toEqual([{ bucket: 'mybucket["user1"]', definition: 'mybucket', inclusion_reasons: ['default'], priority: 3 }]); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', user_id: 'user1' } }) @@ -273,7 +292,7 @@ bucket_definitions: } ]); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', owner_id: 'user1' } }) @@ -399,7 +418,7 @@ bucket_definitions: }); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', user_id: 'user1' } }) @@ -437,7 +456,7 @@ bucket_definitions: }); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', user_id: 'user1' } }) @@ -466,7 +485,7 @@ bucket_definitions: ); const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', data: JSON.stringify({ count: 5, bool: true }) } }) @@ -501,7 +520,7 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', @@ -546,7 +565,7 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', role: 'admin' } }) @@ -565,7 +584,7 @@ bucket_definitions: ]); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset2', description: 'test', role: 'normal' } }) @@ -606,8 +625,9 @@ bucket_definitions: ]); expect( - hydrated.getBucketParameterQuerier(normalizeQuerierOptions({ parameters: { is_admin: true } })).querier - .staticBuckets + hydrated + .getBucketParameterQuerier(normalizeQuerierOptions({ parameters: { is_admin: true } })) + .querier.staticBuckets.map(removeSourceSymbol) ).toEqual([{ bucket: 'mybucket[1]', definition: 'mybucket', inclusion_reasons: ['default'], priority: 3 }]); }); @@ -623,7 +643,7 @@ bucket_definitions: ); const hydrated = rules.hydrate(hydrationParams); - expect(hydrated.evaluateRow({ sourceTable: ASSETS, record: { id: 'asset1' } })).toEqual([ + expect(evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1' } })).toEqual([ { bucket: 'mybucket[]', id: 'asset1', @@ -657,7 +677,7 @@ bucket_definitions: ).toMatchObject({ staticBuckets: [{ bucket: 'mybucket[314,3.14,314]', priority: 3 }] }); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', int1: 314n, float1: 3.14, float2: 314 } }) @@ -705,7 +725,7 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: new TestSourceTable('assets_123'), record: { client_id: 'asset1', description: 'test', archived: 0n, other_id: 'other1' } }) @@ -746,7 +766,7 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: new TestSourceTable('assets_123'), record: { client_id: 'asset1', description: 'test', archived: 0n, other_id: 'other1' } }) @@ -780,7 +800,7 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1', description: 'test', archived: 0n } }) @@ -816,7 +836,7 @@ bucket_definitions: const hydrated = rules.hydrate(hydrationParams); expect( - hydrated.evaluateRow({ + evaluateRows(hydrated, { sourceTable: ASSETS, record: { id: 'asset1' } }) @@ -1046,8 +1066,8 @@ bucket_definitions: ] }); - expect(await findQuerierLookups(hydratedQuerier)).toEqual([ - ScopedParameterLookup.direct({ lookupName: 'admin_only', queryId: '1', source: {} as any }, [1]) + expect((await findQuerierLookups(hydratedQuerier)).map(removeSource)).toEqual([ + removeSource(ScopedParameterLookup.direct({ lookupName: 'admin_only', queryId: '1', source: {} as any }, [1])) ]); }); diff --git a/packages/sync-rules/test/src/table_valued_function_queries.test.ts b/packages/sync-rules/test/src/table_valued_function_queries.test.ts index e81db1b84..97493af0a 100644 --- a/packages/sync-rules/test/src/table_valued_function_queries.test.ts +++ b/packages/sync-rules/test/src/table_valued_function_queries.test.ts @@ -8,11 +8,19 @@ import { SqlParameterQuery } from '../../src/index.js'; import { StaticSqlParameterQuery } from '../../src/StaticSqlParameterQuery.js'; -import { EMPTY_DATA_SOURCE, PARSE_OPTIONS, requestParameters } from './util.js'; +import { EMPTY_DATA_SOURCE, PARSE_OPTIONS, removeSourceSymbol, requestParameters } from './util.js'; describe('table-valued function queries', () => { const emptyPayload: RequestJwtPayload = { userIdJson: '', parsedPayload: {} }; + function getStaticBucketDescriptions( + query: StaticSqlParameterQuery, + parameters: RequestParameters, + scope: { bucketPrefix: string; source: unknown } + ) { + return query.getStaticBucketDescriptions(parameters, scope).map(removeSourceSymbol); + } + test('json_each(array param)', function () { const sql = "SELECT json_each.value as v FROM json_each(request.parameters() -> 'array')"; const query = SqlParameterQuery.fromSql( @@ -29,7 +37,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3, null] }), { + getStaticBucketDescriptions(query, requestParameters({}, { array: [1, 2, 3, null] }), { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE }) @@ -61,7 +69,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3, null] }), { + getStaticBucketDescriptions(query, requestParameters({}, { array: [1, 2, 3, null] }), { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE }) @@ -86,7 +94,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, {}), { + getStaticBucketDescriptions(query, requestParameters({}, {}), { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE }) @@ -110,7 +118,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, {}), { + getStaticBucketDescriptions(query, requestParameters({}, {}), { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE }) @@ -133,7 +141,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, {}), { + getStaticBucketDescriptions(query, requestParameters({}, {}), { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE }) @@ -156,7 +164,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, {}), { + getStaticBucketDescriptions(query, requestParameters({}, {}), { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE }) @@ -176,7 +184,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['value']); expect( - query.getStaticBucketDescriptions(requestParameters({}, {}), { + getStaticBucketDescriptions(query, requestParameters({}, {}), { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE }) @@ -203,7 +211,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['value']); expect( - query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3] }), { + getStaticBucketDescriptions(query, requestParameters({}, { array: [1, 2, 3] }), { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE }) @@ -230,7 +238,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['value']); expect( - query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3] }), { + getStaticBucketDescriptions(query, requestParameters({}, { array: [1, 2, 3] }), { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE }) @@ -257,7 +265,7 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['v']); expect( - query.getStaticBucketDescriptions(requestParameters({}, { array: [1, 2, 3] }), { + getStaticBucketDescriptions(query, requestParameters({}, { array: [1, 2, 3] }), { bucketPrefix: 'mybucket', source: EMPTY_DATA_SOURCE }) @@ -284,7 +292,8 @@ describe('table-valued function queries', () => { expect(query.bucketParameters).toEqual(['project_id']); expect( - query.getStaticBucketDescriptions( + getStaticBucketDescriptions( + query, requestParameters( { projects: [ From 6184ebf7db57b0afd62a965fcadf8748b6d6b692 Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Tue, 17 Feb 2026 13:45:40 +0200 Subject: [PATCH 099/101] Fix more type issues. --- packages/sync-rules/test/src/parameter_queries.test.ts | 4 +++- packages/sync-rules/test/src/streams.test.ts | 9 ++++++--- .../test/src/table_valued_function_queries.test.ts | 3 ++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/packages/sync-rules/test/src/parameter_queries.test.ts b/packages/sync-rules/test/src/parameter_queries.test.ts index c2b00b123..1714bb55b 100644 --- a/packages/sync-rules/test/src/parameter_queries.test.ts +++ b/packages/sync-rules/test/src/parameter_queries.test.ts @@ -921,7 +921,9 @@ describe('parameter queries', () => { id: 'group1', user_ids: JSON.stringify(['test-user', 'other-user']) }); - expect(result.map((entry) => ({ ...entry, lookup: removeSource(entry.lookup) }))).toEqual([ + expect( + result.map((entry) => ('lookup' in entry ? { ...entry, lookup: removeSource(entry.lookup) } : entry)) + ).toEqual([ { lookup: removeSource( ScopedParameterLookup.direct({ lookupName: 'mybucket.test', queryId: 'myquery.test', source: {} as any }, [ diff --git a/packages/sync-rules/test/src/streams.test.ts b/packages/sync-rules/test/src/streams.test.ts index eeb4fe459..c9d49386f 100644 --- a/packages/sync-rules/test/src/streams.test.ts +++ b/packages/sync-rules/test/src/streams.test.ts @@ -15,6 +15,8 @@ import { UnscopedParameterLookup, QuerierError, RequestParameters, + EvaluatedParametersResult, + isEvaluatedParameters, SourceTableInterface, SqliteJsonRow, SqliteRow, @@ -45,9 +47,10 @@ describe('streams', () => { source: {} as any }; - function removeLookupSource( - entry: T - ): Omit & { lookup: any } { + function removeLookupSource(entry: EvaluatedParametersResult) { + if (!isEvaluatedParameters(entry)) { + return entry; + } return { ...entry, lookup: removeSource(entry.lookup) }; } diff --git a/packages/sync-rules/test/src/table_valued_function_queries.test.ts b/packages/sync-rules/test/src/table_valued_function_queries.test.ts index 97493af0a..581b35aa7 100644 --- a/packages/sync-rules/test/src/table_valued_function_queries.test.ts +++ b/packages/sync-rules/test/src/table_valued_function_queries.test.ts @@ -8,6 +8,7 @@ import { SqlParameterQuery } from '../../src/index.js'; import { StaticSqlParameterQuery } from '../../src/StaticSqlParameterQuery.js'; +import { BucketDataScope } from '../../src/HydrationState.js'; import { EMPTY_DATA_SOURCE, PARSE_OPTIONS, removeSourceSymbol, requestParameters } from './util.js'; describe('table-valued function queries', () => { @@ -16,7 +17,7 @@ describe('table-valued function queries', () => { function getStaticBucketDescriptions( query: StaticSqlParameterQuery, parameters: RequestParameters, - scope: { bucketPrefix: string; source: unknown } + scope: BucketDataScope ) { return query.getStaticBucketDescriptions(parameters, scope).map(removeSourceSymbol); } From ada05fdab5aac2846de5803cce8714e8a420d8dd Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 18 Feb 2026 11:09:08 +0200 Subject: [PATCH 100/101] Fix tsconfig. --- packages/service-core/test/tsconfig.json | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/packages/service-core/test/tsconfig.json b/packages/service-core/test/tsconfig.json index 9acd25c61..9055101d7 100644 --- a/packages/service-core/test/tsconfig.json +++ b/packages/service-core/test/tsconfig.json @@ -1,13 +1,9 @@ { - "extends": "../../../tsconfig.base.json", + "extends": "../../../tsconfig.tests.json", "compilerOptions": { "rootDir": "src", "baseUrl": "./", "outDir": "dist", - "noEmit": true, - "esModuleInterop": true, - "skipLibCheck": true, - "sourceMap": true, "paths": { "@/*": ["../src/*"] } From d6807a264d96284f70bb9e4bf723ed0408d7a49e Mon Sep 17 00:00:00 2001 From: Ralf Kistner Date: Wed, 18 Feb 2026 11:56:03 +0200 Subject: [PATCH 101/101] Fix hydration issue. --- packages/sync-rules/src/SyncConfig.ts | 11 +++-------- packages/sync-rules/test/src/compatibility.test.ts | 10 +++++----- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/packages/sync-rules/src/SyncConfig.ts b/packages/sync-rules/src/SyncConfig.ts index a8103131d..93f0a9663 100644 --- a/packages/sync-rules/src/SyncConfig.ts +++ b/packages/sync-rules/src/SyncConfig.ts @@ -33,17 +33,12 @@ export abstract class SyncConfig { /** * Hydrate the sync rule definitions with persisted state into runnable sync rules. * - * @param params.hydrationState Transforms bucket ids based on persisted state. May omit for tests. + * @param createParams.hydrationState Transforms bucket ids based on persisted state. */ - hydrate(params?: CreateSourceParams): HydratedSyncRules { - let hydrationState = params?.hydrationState; - if (hydrationState == null || !this.compatibility.isEnabled(CompatibilityOption.versionedBucketIds)) { - hydrationState = DEFAULT_HYDRATION_STATE; - } - const resolvedParams = { hydrationState }; + hydrate(createParams: CreateSourceParams): HydratedSyncRules { return new HydratedSyncRules({ definition: this, - createParams: resolvedParams, + createParams: createParams, bucketDataSources: this.bucketDataSources, bucketParameterIndexLookupCreators: this.bucketParameterLookupSources, eventDescriptors: this.eventDescriptors, diff --git a/packages/sync-rules/test/src/compatibility.test.ts b/packages/sync-rules/test/src/compatibility.test.ts index 92ecdcbdc..f0b59d2fc 100644 --- a/packages/sync-rules/test/src/compatibility.test.ts +++ b/packages/sync-rules/test/src/compatibility.test.ts @@ -9,7 +9,7 @@ import { toSyncRulesValue } from '../../src/index.js'; -import { versionedHydrationState } from '../../src/HydrationState.js'; +import { DEFAULT_HYDRATION_STATE, versionedHydrationState } from '../../src/HydrationState.js'; import { ASSETS, normalizeQuerierOptions, PARSE_OPTIONS, removeSource, removeSourceSymbol } from './util.js'; describe('compatibility options', () => { @@ -28,7 +28,7 @@ bucket_definitions: - SELECT id, description FROM assets `, PARSE_OPTIONS - ).config.hydrate(); + ).config.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); expect( rules @@ -57,7 +57,7 @@ config: timestamps_iso8601: true `, PARSE_OPTIONS - ).config.hydrate(); + ).config.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); expect( rules @@ -231,7 +231,7 @@ bucket_definitions: - SELECT id, description ->> 'foo.bar' AS "desc" FROM assets `, PARSE_OPTIONS - ).config.hydrate(); + ).config.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); expect( rules @@ -257,7 +257,7 @@ config: fixed_json_extract: true `, PARSE_OPTIONS - ).config.hydrate(); + ).config.hydrate({ hydrationState: DEFAULT_HYDRATION_STATE }); expect( rules